diff --git a/.arcconfig b/.arcconfig index 92f8f458291e..a8e665bd920a 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,3 +1,4 @@ { + "repository.callsign" : "L", "conduit_uri" : "https://reviews.llvm.org/" } diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cd9d053c63b..1d06bb2f5ec3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ cmake_policy(SET CMP0056 NEW) cmake_policy(SET CMP0057 NEW) if(NOT DEFINED LLVM_VERSION_MAJOR) - set(LLVM_VERSION_MAJOR 6) + set(LLVM_VERSION_MAJOR 7) endif() if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) @@ -110,7 +110,7 @@ endif() # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS # This allows an easy way of setting up a build directory for llvm and another # one for llvm+clang+... using the same sources. -set(LLVM_ALL_PROJECTS "clang;libcxx;libcxxabi;lldb;compiler-rt;lld;polly") +set(LLVM_ALL_PROJECTS "clang;libcxx;libcxxabi;lldb;compiler-rt;lld;polly;debuginfo-tests") set(LLVM_ENABLE_PROJECTS "" CACHE STRING "Semicolon-separated list of projects to build (${LLVM_ALL_PROJECTS}), or \"all\".") if( LLVM_ENABLE_PROJECTS STREQUAL "all" ) @@ -276,9 +276,9 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')") mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) -set(LLVM_UTILS_INSTALL_DIR "bin" CACHE STRING +set(LLVM_UTILS_INSTALL_DIR "${LLVM_TOOLS_INSTALL_DIR}" CACHE STRING "Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)") -mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) +mark_as_advanced(LLVM_UTILS_INSTALL_DIR) # They are used as destination of target generators. set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) @@ -385,7 +385,7 @@ option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF) option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) -option(LLVM_ENABLE_DUMP "Enable dump functions in release builds" OFF) +option(LLVM_ENABLE_DUMP "Enable dump functions even when assertions are disabled" OFF) if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF) @@ -393,10 +393,6 @@ else() option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON) endif() -if( LLVM_ENABLE_ASSERTIONS ) - set(LLVM_ENABLE_DUMP ON) -endif() - option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING @@ -682,9 +678,13 @@ foreach(t ${LLVM_TARGETS_TO_BUILD}) list(FIND LLVM_ALL_TARGETS ${t} idx) list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${t} idy) + # At this point, LLVMBUILDTOOL already checked all the targets passed in + # LLVM_TARGETS_TO_BUILD and LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, so + # this test just makes sure that any experimental targets were passed via + # LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, not LLVM_TARGETS_TO_BUILD. if( idx LESS 0 AND idy LESS 0 ) - message(FATAL_ERROR "The target `${t}' does not exist. - It should be one of\n${LLVM_ALL_TARGETS}") + message(FATAL_ERROR "The target `${t}' is experimental and must be passed " + "via LLVM_EXPERIMENTAL_TARGETS_TO_BUILD.") else() set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${t})\n") endif() @@ -975,11 +975,8 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) set_target_properties(llvm-headers PROPERTIES FOLDER "Misc") if (NOT CMAKE_CONFIGURATION_TYPES) - add_custom_target(install-llvm-headers - DEPENDS llvm-headers - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=llvm-headers - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-llvm-headers + COMPONENT llvm-headers) endif() endif() @@ -992,6 +989,7 @@ if(LLVM_DISTRIBUTION_COMPONENTS) add_custom_target(distribution) add_custom_target(install-distribution) + add_custom_target(install-distribution-stripped) foreach(target ${LLVM_DISTRIBUTION_COMPONENTS}) if(TARGET ${target}) add_dependencies(distribution ${target}) @@ -1004,11 +1002,19 @@ if(LLVM_DISTRIBUTION_COMPONENTS) else() message(SEND_ERROR "Specified distribution component '${target}' doesn't have an install target") endif() + + if(TARGET install-${target}-stripped) + add_dependencies(install-distribution-stripped install-${target}-stripped) + else() + message(SEND_ERROR "Specified distribution component '${target}' doesn't have an install-stripped target." + " Its installation target creation should be changed to use add_llvm_install_targets," + " or you should manually create the 'install-${target}-stripped' target.") + endif() endforeach() endif() # This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake -if (MSVC) +if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") include(InstallRequiredSystemLibraries) endif() diff --git a/CREDITS.TXT b/CREDITS.TXT index bd92388cebf2..b8b38f3bd8f2 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -269,7 +269,7 @@ D: Release manager (1.7+) N: Sylvestre Ledru E: sylvestre@debian.org W: http://sylvestre.ledru.info/ -W: http://apt.llvm.org/ +W: https://apt.llvm.org/ D: Debian and Ubuntu packaging D: Continuous integration with jenkins diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go index 325ee4890f4c..fb39955ec10f 100644 --- a/bindings/go/llvm/ir_test.go +++ b/bindings/go/llvm/ir_test.go @@ -142,7 +142,7 @@ func TestSubtypes(t *testing.T) { int_pointer := PointerType(cont.Int32Type(), 0) int_inner := int_pointer.Subtypes() if len(int_inner) != 1 { - t.Errorf("Got size %d, though wanted 1") + t.Errorf("Got size %d, though wanted 1", len(int_inner)) } if int_inner[0] != cont.Int32Type() { t.Errorf("Expected int32 type") @@ -151,7 +151,7 @@ func TestSubtypes(t *testing.T) { st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) st_inner := st_pointer.Subtypes() if len(st_inner) != 2 { - t.Errorf("Got size %d, though wanted 2") + t.Errorf("Got size %d, though wanted 2", len(int_inner)) } if st_inner[0] != cont.Int32Type() { t.Errorf("Expected first struct field to be int32") diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 23494fb96c6b..aaf22ff474b7 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -17,7 +17,7 @@ include(HandleLLVMStdlib) if( UNIX AND NOT (BEOS OR HAIKU) ) # Used by check_symbol_exists: - set(CMAKE_REQUIRED_LIBRARIES m) + list(APPEND CMAKE_REQUIRED_LIBRARIES "m") endif() # x86_64 FreeBSD 9.2 requires libcxxrt to be specified explicitly. if( CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE" AND @@ -127,45 +127,55 @@ if(HAVE_LIBPTHREAD) set(LLVM_PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT}) endif() -# Don't look for these libraries on Windows. Also don't look for them if we're -# using MSan, since uninstrumented third party code may call MSan interceptors -# like strlen, leading to false positives. -if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*") - if (LLVM_ENABLE_ZLIB) - check_library_exists(z compress2 "" HAVE_LIBZ) - else() - set(HAVE_LIBZ 0) - endif() - # Skip libedit if using ASan as it contains memory leaks. - if (LLVM_ENABLE_LIBEDIT AND HAVE_HISTEDIT_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*") - check_library_exists(edit el_init "" HAVE_LIBEDIT) - else() - set(HAVE_LIBEDIT 0) - endif() - if(LLVM_ENABLE_TERMINFO) - set(HAVE_TERMINFO 0) - foreach(library tinfo terminfo curses ncurses ncursesw) +# Don't look for these libraries if we're using MSan, since uninstrumented third +# party code may call MSan interceptors like strlen, leading to false positives. +if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + set(HAVE_LIBZ 0) + if(LLVM_ENABLE_ZLIB) + foreach(library z zlib_static zlib) string(TOUPPER ${library} library_suffix) - check_library_exists(${library} setupterm "" HAVE_TERMINFO_${library_suffix}) - if(HAVE_TERMINFO_${library_suffix}) - set(HAVE_TERMINFO 1) - set(TERMINFO_LIBS "${library}") + check_library_exists(${library} compress2 "" HAVE_LIBZ_${library_suffix}) + if(HAVE_LIBZ_${library_suffix}) + set(HAVE_LIBZ 1) + set(ZLIB_LIBRARIES "${library}") break() endif() endforeach() - else() - set(HAVE_TERMINFO 0) endif() - find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) - set(LLVM_LIBXML2_ENABLED 0) - set(LIBXML2_FOUND 0) - if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) - find_package(LibXml2) - if (LIBXML2_FOUND) - set(LLVM_LIBXML2_ENABLED 1) - include_directories(${LIBXML2_INCLUDE_DIR}) - set(LIBXML2_LIBS "xml2") + # Don't look for these libraries on Windows. + if (NOT PURE_WINDOWS) + # Skip libedit if using ASan as it contains memory leaks. + if (LLVM_ENABLE_LIBEDIT AND HAVE_HISTEDIT_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*") + check_library_exists(edit el_init "" HAVE_LIBEDIT) + else() + set(HAVE_LIBEDIT 0) + endif() + if(LLVM_ENABLE_TERMINFO) + set(HAVE_TERMINFO 0) + foreach(library tinfo terminfo curses ncurses ncursesw) + string(TOUPPER ${library} library_suffix) + check_library_exists(${library} setupterm "" HAVE_TERMINFO_${library_suffix}) + if(HAVE_TERMINFO_${library_suffix}) + set(HAVE_TERMINFO 1) + set(TERMINFO_LIBS "${library}") + break() + endif() + endforeach() + else() + set(HAVE_TERMINFO 0) + endif() + + find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) + set(LLVM_LIBXML2_ENABLED 0) + set(LIBXML2_FOUND 0) + if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) + find_package(LibXml2) + if (LIBXML2_FOUND) + set(LLVM_LIBXML2_ENABLED 1) + include_directories(${LIBXML2_INCLUDE_DIR}) + set(LIBXML2_LIBS "xml2") + endif() endif() endif() endif() @@ -628,3 +638,38 @@ else() endif() string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}") + +function(find_python_module module) + string(REPLACE "." "_" module_name ${module}) + string(TOUPPER ${module_name} module_upper) + set(FOUND_VAR PY_${module_upper}_FOUND) + + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${module}" + RESULT_VARIABLE status + ERROR_QUIET) + + if(status) + set(${FOUND_VAR} 0 PARENT_SCOPE) + message(STATUS "Could NOT find Python module ${module}") + else() + set(${FOUND_VAR} 1 PARENT_SCOPE) + message(STATUS "Found Python module ${module}") + endif() +endfunction() + +set (PYTHON_MODULES + pygments + # Some systems still don't have pygments.lexers.c_cpp which was introduced in + # version 2.0 in 2014... + pygments.lexers.c_cpp + yaml + ) +foreach(module ${PYTHON_MODULES}) + find_python_module(${module}) +endforeach() + +if(PY_PYGMENTS_FOUND AND PY_PYGMENTS_LEXERS_C_CPP_FOUND AND PY_YAML_FOUND) + set (LLVM_HAVE_OPT_VIEWER_MODULES 1) +else() + set (LLVM_HAVE_OPT_VIEWER_MODULES 0) +endif() diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 908e7ee51ca8..20166d2cd30c 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -569,6 +569,32 @@ function(llvm_add_library name) endif() endfunction() +function(add_llvm_install_targets target) + cmake_parse_arguments(ARG "" "COMPONENT;PREFIX" "DEPENDS" ${ARGN}) + if(ARG_COMPONENT) + set(component_option -DCMAKE_INSTALL_COMPONENT="${ARG_COMPONENT}") + endif() + if(ARG_PREFIX) + set(prefix_option -DCMAKE_INSTALL_PREFIX="${ARG_PREFIX}") + endif() + + add_custom_target(${target} + DEPENDS ${ARG_DEPENDS} + COMMAND "${CMAKE_COMMAND}" + ${component_option} + ${prefix_option} + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" + USES_TERMINAL) + add_custom_target(${target}-stripped + DEPENDS ${ARG_DEPENDS} + COMMAND "${CMAKE_COMMAND}" + ${component_option} + ${prefix_option} + -DCMAKE_INSTALL_DO_STRIP=1 + -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" + USES_TERMINAL) +endfunction() + macro(add_llvm_library name) cmake_parse_arguments(ARG "SHARED;BUILDTREE_ONLY" @@ -619,11 +645,9 @@ macro(add_llvm_library name) COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) - add_custom_target(install-${name} - DEPENDS ${name} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) endif() endif() set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) @@ -744,7 +768,7 @@ macro(add_llvm_executable name) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. - target_link_libraries(${name} ${LLVM_PTHREAD_LIB}) + target_link_libraries(${name} PRIVATE ${LLVM_PTHREAD_LIB}) endif() endmacro(add_llvm_executable name) @@ -849,11 +873,9 @@ macro(add_llvm_tool name) COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) - add_custom_target(install-${name} - DEPENDS ${name} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) endif() endif() endif() @@ -889,11 +911,9 @@ macro(add_llvm_utility name) RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) - add_custom_target(install-${name} - DEPENDS ${name} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) endif() endif() endmacro(add_llvm_utility name) @@ -903,7 +923,7 @@ macro(add_llvm_fuzzer name) if( LLVM_LIB_FUZZING_ENGINE ) set(LLVM_OPTIONAL_SOURCES ${ARG_DUMMY_MAIN}) add_llvm_executable(${name} ${ARG_UNPARSED_ARGUMENTS}) - target_link_libraries(${name} ${LLVM_LIB_FUZZING_ENGINE}) + target_link_libraries(${name} PRIVATE ${LLVM_LIB_FUZZING_ENGINE}) set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") elseif( LLVM_USE_SANITIZE_COVERAGE ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") @@ -1073,7 +1093,7 @@ function(add_unittest test_suite test_name) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. - target_link_libraries(${test_name} gtest_main gtest ${LLVM_PTHREAD_LIB}) + target_link_libraries(${test_name} PRIVATE gtest_main gtest ${LLVM_PTHREAD_LIB}) add_dependencies(${test_suite} ${test_name}) get_target_property(test_suite_folder ${test_suite} FOLDER) @@ -1400,11 +1420,9 @@ function(llvm_install_library_symlink name dest type) COMPONENT ${component}) if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) - add_custom_target(install-${name} - DEPENDS ${name} ${dest} install-${dest} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-${name} + DEPENDS ${name} ${dest} install-${dest} + COMPONENT ${name}) endif() endfunction() @@ -1435,11 +1453,9 @@ function(llvm_install_symlink name dest) COMPONENT ${component}) if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE) - add_custom_target(install-${name} - DEPENDS ${name} ${dest} install-${dest} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-${name} + DEPENDS ${name} ${dest} install-${dest} + COMPONENT ${name}) endif() endfunction() @@ -1457,7 +1473,7 @@ function(add_llvm_tool_symlink link_name target) if(NOT ARG_OUTPUT_DIR) # If you're not overriding the OUTPUT_DIR, we can make the link relative in # the same directory. - if(UNIX) + if(CMAKE_HOST_UNIX) set(dest_binary "$") endif() if(CMAKE_CONFIGURATION_TYPES) @@ -1483,7 +1499,7 @@ function(add_llvm_tool_symlink link_name target) endif() endif() - if(UNIX) + if(CMAKE_HOST_UNIX) set(LLVM_LINK_OR_COPY create_symlink) else() set(LLVM_LINK_OR_COPY copy) diff --git a/cmake/modules/AddSphinxTarget.cmake b/cmake/modules/AddSphinxTarget.cmake index 4540c5c36c8e..22e3dcb776aa 100644 --- a/cmake/modules/AddSphinxTarget.cmake +++ b/cmake/modules/AddSphinxTarget.cmake @@ -19,7 +19,7 @@ endif() # ``project`` should be the project name function (add_sphinx_target builder project) set(SPHINX_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${builder}") - set(SPHINX_DOC_TREE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees-${builder}") + set(SPHINX_DOC_TREE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees-${project}-${builder}") set(SPHINX_TARGET_NAME docs-${project}-${builder}) if (SPHINX_WARNINGS_AS_ERRORS) diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index ac4b0b7c0304..6074e8358594 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -129,9 +129,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) if (NOT CMAKE_CONFIGURATION_TYPES) # Add a dummy target so this can be used with LLVM_DISTRIBUTION_COMPONENTS add_custom_target(cmake-exports) - add_custom_target(install-cmake-exports - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=cmake-exports - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake") + add_llvm_install_targets(install-cmake-exports + COMPONENT cmake-exports) endif() endif() diff --git a/cmake/modules/CheckAtomic.cmake b/cmake/modules/CheckAtomic.cmake index dcf021b8fdda..9a4cdf12a622 100644 --- a/cmake/modules/CheckAtomic.cmake +++ b/cmake/modules/CheckAtomic.cmake @@ -1,13 +1,14 @@ # atomic builtins are required for threading support. INCLUDE(CheckCXXSourceCompiles) +INCLUDE(CheckLibraryExists) # Sometimes linking against libatomic is required for atomic ops, if # the platform doesn't support lock-free atomics. function(check_working_cxx_atomics varname) set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS "-std=c++11") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11") CHECK_CXX_SOURCE_COMPILES(" #include std::atomic x; @@ -80,7 +81,6 @@ endif() ## assumes C++11 works. CHECK_CXX_SOURCE_COMPILES(" #ifdef _MSC_VER -#include /* Workaround for PR19898. */ #include #endif int main() { diff --git a/cmake/modules/CheckCompilerVersion.cmake b/cmake/modules/CheckCompilerVersion.cmake index 2e8f5445781c..adf500ad53a7 100644 --- a/cmake/modules/CheckCompilerVersion.cmake +++ b/cmake/modules/CheckCompilerVersion.cmake @@ -28,7 +28,7 @@ if(NOT DEFINED LLVM_COMPILER_CHECKED) # bug in libstdc++4.6 that is fixed in libstdc++4.7. set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(OLD_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) - set(CMAKE_REQUIRED_FLAGS "-std=c++0x") + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++0x") check_cxx_source_compiles(" #include std::atomic x(0.0f); diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake index ff092b257ab7..b239816c8253 100644 --- a/cmake/modules/CrossCompile.cmake +++ b/cmake/modules/CrossCompile.cmake @@ -7,9 +7,26 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) endif(NOT DEFINED LLVM_${target_name}_BUILD) if (EXISTS ${LLVM_MAIN_SRC_DIR}/cmake/platforms/${toolchain}.cmake) - set(CROSS_TOOLCHAIN_FLAGS_${target_name} - -DCMAKE_TOOLCHAIN_FILE=\"${LLVM_MAIN_SRC_DIR}/cmake/platforms/${toolchain}.cmake\" - CACHE STRING "Toolchain file for ${target_name}") + set(CROSS_TOOLCHAIN_FLAGS_INIT + -DCMAKE_TOOLCHAIN_FILE=\"${LLVM_MAIN_SRC_DIR}/cmake/platforms/${toolchain}.cmake\") + else() + set(CROSS_TOOLCHAIN_FLAGS_INIT + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + ) + endif() + set(CROSS_TOOLCHAIN_FLAGS_${target_name} ${CROSS_TOOLCHAIN_FLAGS_INIT} + CACHE STRING "Toolchain configuration for ${target_name}") + + if (buildtype) + set(build_type_flags "-DCMAKE_BUILD_TYPE=${buildtype}") + endif() + if (LLVM_USE_LINKER AND NOT CMAKE_CROSSCOMPILING) + set(linker_flag "-DLLVM_USE_LINKER=${LLVM_USE_LINKER}") + endif() + if (LLVM_EXTERNAL_CLANG_SOURCE_DIR) + # Propagate LLVM_EXTERNAL_CLANG_SOURCE_DIR so that clang-tblgen can be built + set(external_clang_dir "-DLLVM_EXTERNAL_CLANG_SOURCE_DIR=${LLVM_EXTERNAL_CLANG_SOURCE_DIR}") endif() add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD} @@ -19,10 +36,23 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) add_custom_target(CREATE_LLVM_${target_name} DEPENDS ${LLVM_${target_name}_BUILD}) + # Escape semicolons in the targets list so that cmake doesn't expand + # them to spaces. + string(REPLACE ";" "$" targets_to_build_arg + "${LLVM_TARGETS_TO_BUILD}") + string(REPLACE ";" "$" experimental_targets_to_build_arg + "${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}") + add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" + -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}" ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE + -DLLVM_TARGETS_TO_BUILD="${targets_to_build_arg}" + -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="${experimental_targets_to_build_arg}" + -DLLVM_DEFAULT_TARGET_TRIPLE="${TARGET_TRIPLE}" + -DLLVM_TARGET_ARCH="${LLVM_TARGET_ARCH}" + ${build_type_flags} ${linker_flag} ${external_clang_dir} WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} DEPENDS CREATE_LLVM_${target_name} COMMENT "Configuring ${target_name} LLVM...") @@ -30,32 +60,6 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype) add_custom_target(CONFIGURE_LLVM_${target_name} DEPENDS ${LLVM_${target_name}_BUILD}/CMakeCache.txt) - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES - ${LLVM_${target_name}_BUILD}) - - if(NOT IS_DIRECTORY ${LLVM_${target_name}_BUILD}) - - - message(STATUS "Configuring ${target_name} build...") - execute_process(COMMAND ${CMAKE_COMMAND} -E make_directory - ${LLVM_${target_name}_BUILD} ) - - message(STATUS "Configuring ${target_name} targets...") - if (buildtype) - set(build_type_flags "-DCMAKE_BUILD_TYPE=${buildtype}") - endif() - if (LLVM_EXTERNAL_CLANG_SOURCE_DIR) - # Propagate LLVM_EXTERNAL_CLANG_SOURCE_DIR so that clang-tblgen can be built - set(external_clang_dir "-DLLVM_EXTERNAL_CLANG_SOURCE_DIR=${LLVM_EXTERNAL_CLANG_SOURCE_DIR}") - endif() - execute_process(COMMAND ${CMAKE_COMMAND} ${build_type_flags} - -G "${CMAKE_GENERATOR}" -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD} - ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR} - -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE - ${external_clang_dir} - WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} ) - endif(NOT IS_DIRECTORY ${LLVM_${target_name}_BUILD}) - endfunction() function(llvm_create_cross_target target_name sysroot) diff --git a/cmake/modules/GetHostTriple.cmake b/cmake/modules/GetHostTriple.cmake index 0cad1db4effe..019188a59cc6 100644 --- a/cmake/modules/GetHostTriple.cmake +++ b/cmake/modules/GetHostTriple.cmake @@ -3,7 +3,7 @@ function( get_host_triple var ) if( MSVC ) - if( CMAKE_CL_64 ) + if( CMAKE_SIZEOF_VOID_P EQUAL 8 ) set( value "x86_64-pc-win32" ) else() set( value "i686-pc-win32" ) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index b5059a8a60e7..58347fd1fbb1 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -458,64 +458,66 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) endif(LLVM_ENABLE_MODULES) endif( MSVC ) -if (MSVC AND NOT CLANG_CL) - set(msvc_warning_flags - # Disabled warnings. - -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline) - -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' - -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' - -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' - -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' - -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data' - -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception' - -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized' - -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized' - -wd4355 # Suppress ''this' : used in base member initializer list' - -wd4456 # Suppress 'declaration of 'var' hides local variable' - -wd4457 # Suppress 'declaration of 'var' hides function parameter' - -wd4458 # Suppress 'declaration of 'var' hides class member' - -wd4459 # Suppress 'declaration of 'var' hides global declaration' - -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated' - -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' - -wd4722 # Suppress 'function' : destructor never returns, potential memory leak - -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' - -wd4100 # Suppress 'unreferenced formal parameter' - -wd4127 # Suppress 'conditional expression is constant' - -wd4512 # Suppress 'assignment operator could not be generated' - -wd4505 # Suppress 'unreferenced local function has been removed' - -wd4610 # Suppress ' can never be instantiated' - -wd4510 # Suppress 'default constructor could not be generated' - -wd4702 # Suppress 'unreachable code' - -wd4245 # Suppress 'signed/unsigned mismatch' - -wd4706 # Suppress 'assignment within conditional expression' - -wd4310 # Suppress 'cast truncates constant value' - -wd4701 # Suppress 'potentially uninitialized local variable' - -wd4703 # Suppress 'potentially uninitialized local pointer variable' - -wd4389 # Suppress 'signed/unsigned mismatch' - -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable' - -wd4805 # Suppress 'unsafe mix of type and type in operation' - -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' - -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' - -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' - # C4592 is disabled because of false positives in Visual Studio 2015 - # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. - -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) - -wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size' - - # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't - # support the 'aligned' attribute in the way that clang sources requires (for - # any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to - # avoid unwanted alignment warnings. - # When we switch to requiring a version of MSVC that supports the 'alignas' - # specifier (MSVC 2015?) this warning can be re-enabled. - -wd4324 # Suppress 'structure was padded due to __declspec(align())' - - # Promoted warnings. - -w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning. - - # Promoted warnings to errors. - -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error. - ) +if (MSVC) + if (NOT CLANG_CL) + set(msvc_warning_flags + # Disabled warnings. + -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline) + -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' + -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' + -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' + -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' + -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data' + -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception' + -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized' + -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized' + -wd4355 # Suppress ''this' : used in base member initializer list' + -wd4456 # Suppress 'declaration of 'var' hides local variable' + -wd4457 # Suppress 'declaration of 'var' hides function parameter' + -wd4458 # Suppress 'declaration of 'var' hides class member' + -wd4459 # Suppress 'declaration of 'var' hides global declaration' + -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated' + -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' + -wd4722 # Suppress 'function' : destructor never returns, potential memory leak + -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' + -wd4100 # Suppress 'unreferenced formal parameter' + -wd4127 # Suppress 'conditional expression is constant' + -wd4512 # Suppress 'assignment operator could not be generated' + -wd4505 # Suppress 'unreferenced local function has been removed' + -wd4610 # Suppress ' can never be instantiated' + -wd4510 # Suppress 'default constructor could not be generated' + -wd4702 # Suppress 'unreachable code' + -wd4245 # Suppress 'signed/unsigned mismatch' + -wd4706 # Suppress 'assignment within conditional expression' + -wd4310 # Suppress 'cast truncates constant value' + -wd4701 # Suppress 'potentially uninitialized local variable' + -wd4703 # Suppress 'potentially uninitialized local pointer variable' + -wd4389 # Suppress 'signed/unsigned mismatch' + -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable' + -wd4805 # Suppress 'unsafe mix of type and type in operation' + -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' + -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' + -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' + # C4592 is disabled because of false positives in Visual Studio 2015 + # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. + -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) + -wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size' + + # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't + # support the 'aligned' attribute in the way that clang sources requires (for + # any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to + # avoid unwanted alignment warnings. + # When we switch to requiring a version of MSVC that supports the 'alignas' + # specifier (MSVC 2015?) this warning can be re-enabled. + -wd4324 # Suppress 'structure was padded due to __declspec(align())' + + # Promoted warnings. + -w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning. + + # Promoted warnings to errors. + -we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error. + ) + endif(NOT CLANG_CL) # Enable warnings if (LLVM_ENABLE_WARNINGS) @@ -538,10 +540,17 @@ if (MSVC AND NOT CLANG_CL) foreach(flag ${msvc_warning_flags}) append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endforeach(flag) -endif (MSVC AND NOT CLANG_CL) +endif (MSVC) if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) - append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + + # Don't add -Wall for clang-cl, because it maps -Wall to -Weverything for + # MSVC compatibility. /W4 is added above instead. + if (NOT CLANG_CL) + append("-Wall" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + endif() + + append("-W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) append("-Wcast-qual" CMAKE_CXX_FLAGS) # Turn off missing field initializer warnings for gcc to avoid noise from @@ -840,6 +849,13 @@ else() set(LLVM_ENABLE_PLUGINS ON) endif() +set(LLVM_ENABLE_IDE_default OFF) +if (XCODE OR MSVC_IDE OR CMAKE_EXTRA_GENERATOR) + set(LLVM_ENABLE_IDE_default ON) +endif() +option(LLVM_ENABLE_IDE "Generate targets and process sources for use with an IDE" + ${LLVM_ENABLE_IDE_default}) + function(get_compile_definitions) get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS) foreach(definition ${top_dir_definitions}) diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake index 2b9ab23c4770..10fd52609274 100644 --- a/cmake/modules/LLVM-Config.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -87,7 +87,7 @@ macro(llvm_config executable) endif() endif() - target_link_libraries(${executable} LLVM) + target_link_libraries(${executable} PRIVATE LLVM) endif() explicit_llvm_config(${executable} ${link_components}) @@ -99,9 +99,9 @@ function(explicit_llvm_config executable) llvm_map_components_to_libnames(LIBRARIES ${link_components}) get_target_property(t ${executable} TYPE) - if("x${t}" STREQUAL "xSTATIC_LIBRARY") + if(t STREQUAL "STATIC_LIBRARY") target_link_libraries(${executable} INTERFACE ${LIBRARIES}) - elseif("x${t}" STREQUAL "xSHARED_LIBRARY" OR "x${t}" STREQUAL "xMODULE_LIBRARY") + elseif(t STREQUAL "EXECUTABLE" OR t STREQUAL "SHARED_LIBRARY" OR t STREQUAL "MODULE_LIBRARY") target_link_libraries(${executable} PRIVATE ${LIBRARIES}) else() # Use plain form for legacy user. diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in index 169fc9987be8..fe4df5278498 100644 --- a/cmake/modules/LLVMConfig.cmake.in +++ b/cmake/modules/LLVMConfig.cmake.in @@ -37,6 +37,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@) +set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@) + set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) @@ -72,6 +74,7 @@ set(LLVM_CMAKE_DIR "@LLVM_CONFIG_CMAKE_DIR@") set(LLVM_BINARY_DIR "@LLVM_CONFIG_BINARY_DIR@") set(LLVM_TOOLS_BINARY_DIR "@LLVM_CONFIG_TOOLS_BINARY_DIR@") set(LLVM_TOOLS_INSTALL_DIR "@LLVM_TOOLS_INSTALL_DIR@") +set(LLVM_HAVE_OPT_VIEWER_MODULES @LLVM_HAVE_OPT_VIEWER_MODULES@) if(NOT TARGET LLVMSupport) set(LLVM_EXPORTED_TARGETS "@LLVM_CONFIG_EXPORTS@") diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake index 8ecf42acfee1..619550b5943a 100644 --- a/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -95,14 +95,14 @@ function(llvm_ExternalProject_Add name source_dir) foreach(prefix ${ARG_PASSTHROUGH_PREFIXES}) foreach(variableName ${variableNames}) if(variableName MATCHES "^${prefix}") - string(REPLACE ";" "," value "${${variableName}}") + string(REPLACE ";" "|" value "${${variableName}}") list(APPEND PASSTHROUGH_VARIABLES -D${variableName}=${value}) endif() endforeach() endforeach() - if(ARG_USE_TOOLCHAIN) + if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING) if(CLANG_IN_TOOLCHAIN) set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++) @@ -132,6 +132,20 @@ function(llvm_ExternalProject_Add name source_dir) set(exclude EXCLUDE_FROM_ALL 1) endif() + if(CMAKE_SYSROOT) + set(sysroot_arg -DCMAKE_SYSROOT=${CMAKE_SYSROOT}) + endif() + + if(CMAKE_CROSSCOMPILING) + set(compiler_args -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_AR=${CMAKE_AR} + -DCMAKE_RANLIB=${CMAKE_RANLIB}) + set(llvm_config_path ${LLVM_CONFIG_PATH}) + else() + set(llvm_config_path "$") + endif() + ExternalProject_Add(${name} DEPENDS ${ARG_DEPENDS} llvm-config ${name}-clobber @@ -143,11 +157,12 @@ function(llvm_ExternalProject_Add name source_dir) CMAKE_ARGS ${${nameCanon}_CMAKE_ARGS} ${compiler_args} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} - -DCMAKE_SYSROOT=${CMAKE_SYSROOT} + ${sysroot_arg} -DLLVM_BINARY_DIR=${PROJECT_BINARY_DIR} - -DLLVM_CONFIG_PATH=$ + -DLLVM_CONFIG_PATH=${llvm_config_path} -DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR} -DLLVM_HOST_TRIPLE=${LLVM_HOST_TRIPLE} + -DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT} -DPACKAGE_VERSION=${PACKAGE_VERSION} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} @@ -160,7 +175,7 @@ function(llvm_ExternalProject_Add name source_dir) USES_TERMINAL_CONFIGURE 1 USES_TERMINAL_BUILD 1 USES_TERMINAL_INSTALL 1 - LIST_SEPARATOR , + LIST_SEPARATOR | ) if(ARG_USE_TOOLCHAIN) @@ -189,12 +204,9 @@ function(llvm_ExternalProject_Add name source_dir) install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)" COMPONENT ${name}) - add_custom_target(install-${name} - DEPENDS ${name} - COMMAND "${CMAKE_COMMAND}" - -DCMAKE_INSTALL_COMPONENT=${name} - -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" - USES_TERMINAL) + add_llvm_install_targets(install-${name} + DEPENDS ${name} + COMPONENT ${name}) endif() # Add top-level targets diff --git a/cmake/modules/LLVMInstallSymlink.cmake b/cmake/modules/LLVMInstallSymlink.cmake index 482697b06baf..1a04de931ff7 100644 --- a/cmake/modules/LLVMInstallSymlink.cmake +++ b/cmake/modules/LLVMInstallSymlink.cmake @@ -3,7 +3,7 @@ # See PR8397. function(install_symlink name target outdir) - if(UNIX) + if(CMAKE_HOST_UNIX) set(LINK_OR_COPY create_symlink) set(DESTDIR $ENV{DESTDIR}) else() diff --git a/cmake/modules/LLVMProcessSources.cmake b/cmake/modules/LLVMProcessSources.cmake index 3b4838daed5a..8b7dc65d9497 100644 --- a/cmake/modules/LLVMProcessSources.cmake +++ b/cmake/modules/LLVMProcessSources.cmake @@ -52,7 +52,7 @@ function(llvm_process_sources OUT_VAR) cmake_parse_arguments(ARG "" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN}) set(sources ${ARG_UNPARSED_ARGUMENTS}) llvm_check_source_file_list( ${sources} ) - if( MSVC_IDE OR XCODE ) + if( LLVM_ENABLE_IDE ) # This adds .td and .h files to the Visual Studio solution: add_td_sources(sources) find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}") diff --git a/cmake/platforms/ClangClCMakeCompileRules.cmake b/cmake/platforms/ClangClCMakeCompileRules.cmake new file mode 100644 index 000000000000..a3bcf1c24a91 --- /dev/null +++ b/cmake/platforms/ClangClCMakeCompileRules.cmake @@ -0,0 +1,9 @@ +# macOS paths usually start with /Users/*. Unfortunately, clang-cl interprets +# paths starting with /U as macro undefines, so we need to put a -- before the +# input file path to force it to be treated as a path. CMake's compilation rules +# should be tweaked accordingly, but until that's done, and to support older +# CMake versions, overriding compilation rules works well enough. This file will +# be included by cmake after the default compilation rules have already been set +# up, so we can just modify them instead of duplicating them entirely. +string(REPLACE "-c " "-c -- " CMAKE_C_COMPILE_OBJECT "${CMAKE_C_COMPILE_OBJECT}") +string(REPLACE "-c " "-c -- " CMAKE_CXX_COMPILE_OBJECT "${CMAKE_CXX_COMPILE_OBJECT}") diff --git a/cmake/platforms/WinMsvc.cmake b/cmake/platforms/WinMsvc.cmake index 9ea9ff78a255..a736a4578722 100644 --- a/cmake/platforms/WinMsvc.cmake +++ b/cmake/platforms/WinMsvc.cmake @@ -4,11 +4,15 @@ # Usage: # cmake -G Ninja # -DCMAKE_TOOLCHAIN_FILE=/path/to/this/file +# -DHOST_ARCH=[aarch64|arm64|armv7|arm|i686|x86|x86_64|x64] # -DLLVM_NATIVE_TOOLCHAIN=/path/to/llvm/installation # -DMSVC_BASE=/path/to/MSVC/system/libraries/and/includes # -DWINSDK_BASE=/path/to/windows-sdk # -DWINSDK_VER=windows sdk version folder name # +# HOST_ARCH: +# The architecture to build for. +# # LLVM_NATIVE_TOOLCHAIN: # *Absolute path* to a folder containing the toolchain which will be used to # build. At a minimum, this folder should have a bin directory with a @@ -76,18 +80,9 @@ # # IMPORTANT: In order for this to work, you will need a valid copy of the Windows # SDK and C++ STL headers and libraries on your host. Additionally, since the -# Windows libraries and headers are not case-correct, you will need to have these -# mounted in a case-insensitive mount. This requires one command to set up. -# -# ~/src: mkdir winsdk -# ~/src: mkdir winsdk.icase -# ~/src: ciopfs winsdk/ winsdk.icase -# -# Now copy or otherwise install your headers and libraries to the winsdk.icase folder -# and use *that* folder as the path when configuring CMake. -# -# TODO: We could also provide a CMake option -DUSE_ICASE_VFS_OVERLAY=ON/OFF that would -# make this optional. For now, we require ciopfs. +# Windows libraries and headers are not case-correct, this toolchain file sets +# up a VFS overlay for the SDK headers and case-correcting symlinks for the +# libraries when running on a case-sensitive filesystem. # When configuring CMake with a toolchain file against a top-level CMakeLists.txt, @@ -106,16 +101,78 @@ function(init_user_prop prop) endif() endfunction() -# FIXME: We should support target architectures other than x64 +function(generate_winsdk_vfs_overlay winsdk_include_dir output_path) + set(include_dirs) + file(GLOB_RECURSE entries LIST_DIRECTORIES true "${winsdk_include_dir}/*") + foreach(entry ${entries}) + if(IS_DIRECTORY "${entry}") + list(APPEND include_dirs "${entry}") + endif() + endforeach() + + file(WRITE "${output_path}" "version: 0\n") + file(APPEND "${output_path}" "case-sensitive: false\n") + file(APPEND "${output_path}" "roots:\n") + + foreach(dir ${include_dirs}) + file(GLOB headers RELATIVE "${dir}" "${dir}/*.h") + if(NOT headers) + continue() + endif() + + file(APPEND "${output_path}" " - name: \"${dir}\"\n") + file(APPEND "${output_path}" " type: directory\n") + file(APPEND "${output_path}" " contents:\n") + + foreach(header ${headers}) + file(APPEND "${output_path}" " - name: \"${header}\"\n") + file(APPEND "${output_path}" " type: file\n") + file(APPEND "${output_path}" " external-contents: \"${dir}/${header}\"\n") + endforeach() + endforeach() +endfunction() + +function(generate_winsdk_lib_symlinks winsdk_um_lib_dir output_dir) + execute_process(COMMAND "${CMAKE_COMMAND}" -E make_directory "${output_dir}") + file(GLOB libraries RELATIVE "${winsdk_um_lib_dir}" "${winsdk_um_lib_dir}/*") + foreach(library ${libraries}) + string(TOLOWER "${library}" symlink_name) + execute_process(COMMAND "${CMAKE_COMMAND}" + -E create_symlink + "${winsdk_um_lib_dir}/${library}" + "${output_dir}/${symlink_name}") + endforeach() +endfunction() + set(CMAKE_SYSTEM_NAME Windows) set(CMAKE_SYSTEM_VERSION 10.0) set(CMAKE_SYSTEM_PROCESSOR AMD64) +init_user_prop(HOST_ARCH) init_user_prop(LLVM_NATIVE_TOOLCHAIN) init_user_prop(MSVC_BASE) init_user_prop(WINSDK_BASE) init_user_prop(WINSDK_VER) +if(NOT HOST_ARCH) + set(HOST_ARCH x86_64) +endif() +if(HOST_ARCH STREQUAL "aarch64" OR HOST_ARCH STREQUAL "arm64") + set(TRIPLE_ARCH "aarch64") + set(WINSDK_ARCH "arm64") +elseif(HOST_ARCH STREQUAL "armv7" OR HOST_ARCH STREQUAL "arm") + set(TRIPLE_ARCH "armv7") + set(WINSDK_ARCH "arm") +elseif(HOST_ARCH STREQUAL "i686" OR HOST_ARCH STREQUAL "x86") + set(TRIPLE_ARCH "i686") + set(WINSDK_ARCH "x86") +elseif(HOST_ARCH STREQUAL "x86_64" OR HOST_ARCH STREQUAL "x64") + set(TRIPLE_ARCH "x86_64") + set(WINSDK_ARCH "x64") +else() + message(SEND_ERROR "Unknown host architecture ${HOST_ARCH}. Must be aarch64 (or arm64), armv7 (or arm), i686 (or x86), or x86_64 (or x64).") +endif() + set(MSVC_INCLUDE "${MSVC_BASE}/include") set(MSVC_LIB "${MSVC_BASE}/lib") set(WINSDK_INCLUDE "${WINSDK_BASE}/Include/${WINSDK_VER}") @@ -147,6 +204,13 @@ if(NOT EXISTS "${WINSDK_BASE}" OR "Windows SDK installation") endif() +if(NOT EXISTS "${WINSDK_INCLUDE}/um/Windows.h") + message(SEND_ERROR "Cannot find Windows.h") +endif() +if(NOT EXISTS "${WINSDK_INCLUDE}/um/WINDOWS.H") + set(case_sensitive_filesystem TRUE) +endif() + set(CMAKE_C_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "") set(CMAKE_CXX_COMPILER "${LLVM_NATIVE_TOOLCHAIN}/bin/clang-cl" CACHE FILEPATH "") set(CMAKE_LINKER "${LLVM_NATIVE_TOOLCHAIN}/bin/lld-link" CACHE FILEPATH "") @@ -164,12 +228,26 @@ set(CROSS_TOOLCHAIN_FLAGS_NATIVE "${_CTF_NATIVE_DEFAULT}" CACHE STRING "") set(COMPILE_FLAGS -D_CRT_SECURE_NO_WARNINGS + --target=${TRIPLE_ARCH}-windows-msvc + -fms-compatibility-version=19.11 -imsvc "${MSVC_INCLUDE}" -imsvc "${WINSDK_INCLUDE}/ucrt" -imsvc "${WINSDK_INCLUDE}/shared" -imsvc "${WINSDK_INCLUDE}/um" -imsvc "${WINSDK_INCLUDE}/winrt") +if(case_sensitive_filesystem) + # Ensure all sub-configures use the top-level VFS overlay instead of generating their own. + init_user_prop(winsdk_vfs_overlay_path) + if(NOT winsdk_vfs_overlay_path) + set(winsdk_vfs_overlay_path "${CMAKE_BINARY_DIR}/winsdk_vfs_overlay.yaml") + generate_winsdk_vfs_overlay("${WINSDK_BASE}/Include/${WINSDK_VER}" "${winsdk_vfs_overlay_path}") + init_user_prop(winsdk_vfs_overlay_path) + endif() + list(APPEND COMPILE_FLAGS + -Xclang -ivfsoverlay -Xclang "${winsdk_vfs_overlay_path}") +endif() + string(REPLACE ";" " " COMPILE_FLAGS "${COMPILE_FLAGS}") # We need to preserve any flags that were passed in by the user. However, we @@ -188,10 +266,21 @@ set(LINK_FLAGS # Prevent CMake from attempting to invoke mt.exe. It only recognizes the slashed form and not the dashed form. /manifest:no - # FIXME: We should support target architectures other than x64. - -libpath:"${MSVC_LIB}/x64" - -libpath:"${WINSDK_LIB}/ucrt/x64" - -libpath:"${WINSDK_LIB}/um/x64") + -libpath:"${MSVC_LIB}/${WINSDK_ARCH}" + -libpath:"${WINSDK_LIB}/ucrt/${WINSDK_ARCH}" + -libpath:"${WINSDK_LIB}/um/${WINSDK_ARCH}") + +if(case_sensitive_filesystem) + # Ensure all sub-configures use the top-level symlinks dir instead of generating their own. + init_user_prop(winsdk_lib_symlinks_dir) + if(NOT winsdk_lib_symlinks_dir) + set(winsdk_lib_symlinks_dir "${CMAKE_BINARY_DIR}/winsdk_lib_symlinks") + generate_winsdk_lib_symlinks("${WINSDK_BASE}/Lib/${WINSDK_VER}/um/${WINSDK_ARCH}" "${winsdk_lib_symlinks_dir}") + init_user_prop(winsdk_lib_symlinks_dir) + endif() + list(APPEND LINK_FLAGS + -libpath:"${winsdk_lib_symlinks_dir}") +endif() string(REPLACE ";" " " LINK_FLAGS "${LINK_FLAGS}") @@ -211,9 +300,5 @@ set(CMAKE_SHARED_LINKER_FLAGS "${_CMAKE_SHARED_LINKER_FLAGS_INITIAL} ${LINK_FLAG set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE) -# CMake's InstallRequiredSystemLibraries module searches for a Visual Studio -# installation in order to determine where to copy the required DLLs. This -# installation won't exist when cross-compiling, of course, so silence the -# resulting warnings about missing libraries. -set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) - +# Allow clang-cl to work with macOS paths. +set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_CURRENT_LIST_DIR}/ClangClCMakeCompileRules.cmake") diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 1cf30304dfc8..673974dc7e0c 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -84,130 +84,132 @@ names from both the *Processor* and *Alternative Processor* can be used. .. table:: AMDGPU Processors :name: amdgpu-processor-table - =========== =============== ============ ===== ======= ================== - Processor Alternative Target dGPU/ ROCm Example - Processor Triple APU Support Products - Architecture - =========== =============== ============ ===== ======= ================== + =========== =============== ============ ===== ========= ======= ================== + Processor Alternative Target dGPU/ Target ROCm Example + Processor Triple APU Features Support Products + Architecture Supported + [Default] + =========== =============== ============ ===== ========= ======= ================== **Radeon HD 2000/3000 Series (R600)** [AMD-RADEON-HD-2000-3000]_ - ------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- ``r600`` ``r600`` dGPU ``r630`` ``r600`` dGPU ``rs880`` ``r600`` dGPU ``rv670`` ``r600`` dGPU **Radeon HD 4000 Series (R700)** [AMD-RADEON-HD-4000]_ - ------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- ``rv710`` ``r600`` dGPU ``rv730`` ``r600`` dGPU ``rv770`` ``r600`` dGPU **Radeon HD 5000 Series (Evergreen)** [AMD-RADEON-HD-5000]_ - ------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- ``cedar`` ``r600`` dGPU ``redwood`` ``r600`` dGPU ``sumo`` ``r600`` dGPU ``juniper`` ``r600`` dGPU ``cypress`` ``r600`` dGPU **Radeon HD 6000 Series (Northern Islands)** [AMD-RADEON-HD-6000]_ - ------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- ``barts`` ``r600`` dGPU ``turks`` ``r600`` dGPU ``caicos`` ``r600`` dGPU ``cayman`` ``r600`` dGPU **GCN GFX6 (Southern Islands (SI))** [AMD-GCN-GFX6]_ - ------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- ``gfx600`` - ``tahiti`` ``amdgcn`` dGPU ``gfx601`` - ``pitcairn`` ``amdgcn`` dGPU - ``verde`` - ``oland`` - ``hainan`` **GCN GFX7 (Sea Islands (CI))** [AMD-GCN-GFX7]_ - ------------------------------------------------------------------------- - ``gfx700`` - ``bonaire`` ``amdgcn`` dGPU - Radeon HD 7790 - - Radeon HD 8770 - - R7 260 - - R7 260X - \ - ``kaveri`` ``amdgcn`` APU - A6-7000 - - A6 Pro-7050B - - A8-7100 - - A8 Pro-7150B - - A10-7300 - - A10 Pro-7350B - - FX-7500 - - A8-7200P - - A10-7400P - - FX-7600P - ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU ROCm - FirePro W8100 - - FirePro W9100 - - FirePro S9150 - - FirePro S9170 - ``gfx702`` ``amdgcn`` dGPU ROCm - Radeon R9 290 - - Radeon R9 290x - - Radeon R390 - - Radeon R390x - ``gfx703`` - ``kabini`` ``amdgcn`` APU - E1-2100 - - ``mullins`` - E1-2200 - - E1-2500 - - E2-3000 - - E2-3800 - - A4-5000 - - A4-5100 - - A6-5200 - - A4 Pro-3340B + ----------------------------------------------------------------------------------- + ``gfx700`` - ``kaveri`` ``amdgcn`` APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + ``gfx702`` ``amdgcn`` dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + ``gfx703`` - ``kabini`` ``amdgcn`` APU - E1-2100 + - ``mullins`` - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + ``gfx704`` - ``bonaire`` ``amdgcn`` dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X **GCN GFX8 (Volcanic Islands (VI))** [AMD-GCN-GFX8]_ - ------------------------------------------------------------------------- - ``gfx800`` - ``iceland`` ``amdgcn`` dGPU - FirePro S7150 - - FirePro S7100 - - FirePro W7100 - - Radeon R285 - - Radeon R9 380 - - Radeon R9 385 - - Mobile FirePro - M7170 - ``gfx801`` - ``carrizo`` ``amdgcn`` APU - A6-8500P - - Pro A6-8500B - - A8-8600P - - Pro A8-8600B - - FX-8800P - - Pro A12-8800B - \ ``amdgcn`` APU ROCm - A10-8700P - - Pro A10-8700B - - A10-8780P - \ ``amdgcn`` APU - A10-9600P - - A10-9630P - - A12-9700P - - A12-9730P - - FX-9800P - - FX-9830P - \ ``amdgcn`` APU - E2-9010 - - A6-9210 - - A9-9410 - ``gfx802`` - ``tonga`` ``amdgcn`` dGPU ROCm Same as gfx800 - ``gfx803`` - ``fiji`` ``amdgcn`` dGPU ROCm - Radeon R9 Nano - - Radeon R9 Fury - - Radeon R9 FuryX - - Radeon Pro Duo - - FirePro S9300x2 - - Radeon Instinct MI8 - \ - ``polaris10`` ``amdgcn`` dGPU ROCm - Radeon RX 470 - - Radeon RX 480 - - Radeon Instinct MI6 - \ - ``polaris11`` ``amdgcn`` dGPU ROCm - Radeon RX 460 - ``gfx810`` - ``stoney`` ``amdgcn`` APU + ----------------------------------------------------------------------------------- + ``gfx801`` - ``carrizo`` ``amdgcn`` APU - xnack - A6-8500P + [on] - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ ``amdgcn`` APU - xnack ROCm - A10-8700P + [on] - Pro A10-8700B + - A10-8780P + \ ``amdgcn`` APU - xnack - A10-9600P + [on] - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ ``amdgcn`` APU - xnack - E2-9010 + [on] - A6-9210 + - A9-9410 + ``gfx802`` - ``tonga`` ``amdgcn`` dGPU - xnack ROCm - FirePro S7150 + - ``iceland`` [off] - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + ``gfx803`` - ``fiji`` ``amdgcn`` dGPU - xnack ROCm - Radeon R9 Nano + [off] - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + - Radeon Instinct MI8 + \ - ``polaris10`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 470 + [off] - Radeon RX 480 + - Radeon Instinct MI6 + \ - ``polaris11`` ``amdgcn`` dGPU - xnack ROCm - Radeon RX 460 + [off] + ``gfx810`` - ``stoney`` ``amdgcn`` APU - xnack + [on] **GCN GFX9** [AMD-GCN-GFX9]_ - ------------------------------------------------------------------------- - ``gfx900`` ``amdgcn`` dGPU ROCm - Radeon Vega - Frontier Edition - - Radeon RX Vega 56 - - Radeon RX Vega 64 - - Radeon RX Vega 64 - Liquid - - Radeon Instinct MI25 - ``gfx902`` ``amdgcn`` APU *TBA* - - .. TODO - Add product - names. - =========== =============== ============ ===== ======= ================== + ----------------------------------------------------------------------------------- + ``gfx900`` ``amdgcn`` dGPU - xnack ROCm - Radeon Vega + [off] Frontier Edition + - Radeon RX Vega 56 + - Radeon RX Vega 64 + - Radeon RX Vega 64 + Liquid + - Radeon Instinct MI25 + ``gfx902`` ``amdgcn`` APU - xnack *TBA* + [on] + .. TODO + Add product + names. + =========== =============== ============ ===== ========= ======= ================== .. _amdgpu-target-features: @@ -215,11 +217,15 @@ Target Features --------------- Target features control how code is generated to support certain -features. Not all target features are supported by all processors. The -runtime must ensure that the features supported by the device used to -execute the code match the features enabled when generating the -code. A mismatch of features may result in incorrect execution, or a -reduction in performance. +processor specific features. Not all target features are supported by +all processors. The runtime must ensure that the features supported by +the device used to execute the code match the features enabled when +generating the code. A mismatch of features may result in incorrect +execution, or a reduction in performance. + +The target features supported by each processor, and the default value +used if not specified explicitly, is listed in +:ref:`amdgpu-processor-table`. Use the ``clang -m[no-]`` option to specify the AMD GPU target features. @@ -227,34 +233,31 @@ target features. For example: ``-mxnack`` - Enable the *XNACK* feature. + Enable the ``xnack`` feature. ``-mno-xnack`` - Disable the *XNACK* feature. + Disable the ``xnack`` feature. .. table:: AMDGPU Target Features :name: amdgpu-target-feature-table - ============== ======== ================================================== - Target Feature Default Description - ============== ======== ================================================== - -m[no-]xnack disabled Enable/disable generating code that has - memory clauses that are compatible with - having XNACK replay enabled. - - This is used for demand paging and page - migration. If XNACK replay is enabled in - the device, then if a page fault occurs - the code may execute incorrectly if the - XNACK feature is not enabled. Executing - code that has the feature enabled on a - device that does not have XNACK replay - enabled will execute correctly, but may - be less performant than code with the - feature disabled. - - This feature is supported by the - ``amdgcn`` architecture for GFX8-GFX9. - ============== ======== ================================================== + ============== ================================================== + Target Feature Description + ============== ================================================== + -m[no-]xnack Enable/disable generating code that has + memory clauses that are compatible with + having XNACK replay enabled. + + This is used for demand paging and page + migration. If XNACK replay is enabled in + the device, then if a page fault occurs + the code may execute incorrectly if the + ``xnack`` feature is not enabled. Executing + code that has the feature enabled on a + device that does not have XNACK replay + enabled will execute correctly, but may + be less performant than code with the + feature disabled. + ============== ================================================== .. _amdgpu-address-spaces: @@ -517,6 +520,12 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_xxx`` values defined in :ref:`amdgpu-ef-amdgpu-mach-table`. + ``EF_AMDGPU_XNACK`` 0x00000100 Indicates if the ``xnack`` + target feature is + enabled for all code + contained in the code object. + See + :ref:`amdgpu-target-features`. ================================= ========== ============================= .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values @@ -551,7 +560,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 35 ``gfx701`` ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 36 ``gfx702`` ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 37 ``gfx703`` - ``EF_AMDGPU_MACH_AMDGCN_GFX800`` 38 ``gfx800`` + ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 38 ``gfx704`` ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 39 ``gfx801`` ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 40 ``gfx802`` ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 41 ``gfx803`` @@ -1290,11 +1299,16 @@ non-AMD key names should be prefixed by "*vendor-name*.". be launched with a matching corresponding work-group size. - "IsXNACKEnabled" boolean Indicates if the - generated machine - code is capable of - supporting XNACK. See - :ref:`amdgpu-target-features`. + "NumSpilledSGPRs" integer Number of stores from + a scalar register to + a register allocator + created spill + location. + "NumSpilledVGPRs" integer Number of stores from + a vector register to + a register allocator + created spill + location. ============================ ============== ========= ===================== .. @@ -1539,7 +1553,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. must be executed with the specified work-group size for Z. - 383:271 14 Reserved, must be 0. + 383:272 14 Reserved, must be 0. bytes 415:384 4 bytes ComputePgmRsrc1 Compute Shader (CS) program settings used by @@ -1579,10 +1593,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. should always be 0. 457 1 bit EnableSGPRGridWorkgroupCountZ Not implemented in CP and should always be 0. - 462:458 5 bits Reserved, must be 0. - 463 1 bit IsXNACKEnabled Indicates if the generated - machine code is capable of - supporting XNACK. + 463:458 6 bits Reserved, must be 0. 511:464 6 Reserved, must be 0. bytes 512 **Total size 64 bytes.** @@ -4131,8 +4142,6 @@ Additional Documentation .. [AMD-GCN-GFX7] `AMD Sea Islands Series ISA `_ .. [AMD-GCN-GFX8] `AMD GCN3 Instruction Set Architecture `__ .. [AMD-GCN-GFX9] `AMD "Vega" Instruction Set Architecture `__ -.. [AMD-OpenCL_Programming-Guide] `AMD Accelerated Parallel Processing OpenCL Programming Guide `_ -.. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation `__ .. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing `__ .. [AMD-ROCm-github] `ROCm github `__ .. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ @@ -4141,4 +4150,3 @@ Additional Documentation .. [YAML] `YAML Ain't Markup Language (YAMLâ„¢) Version 1.2 `__ .. [OpenCL] `The OpenCL Specification Version 2.0 `__ .. [HRF] `Heterogeneous-race-free Memory Models `__ -.. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface `__ diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 98a3156e0825..39a7a925e680 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -62,10 +62,12 @@ understanding the encoding. Magic Numbers ------------- -The first two bytes of a bitcode file are 'BC' (``0x42``, ``0x43``). The second -two bytes are an application-specific magic number. Generic bitcode tools can -look at only the first two bytes to verify the file is bitcode, while -application-specific programs will want to look at all four. +The first four bytes of a bitstream are used as an application-specific magic +number. Generic bitcode tools may look at the first four bytes to determine +whether the stream is a known stream type. However, these tools should *not* +determine whether a bitstream is valid based on its magic number alone. New +application-specific bitstream formats are being developed all the time; tools +should not reject them just because they have a hitherto unseen magic number. .. _primitives: @@ -496,12 +498,9 @@ LLVM IR Magic Number The magic number for LLVM IR files is: :raw-html:`
` -[0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`] +['B'\ :sub:`8`, 'C'\ :sub:`8`, 0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`] :raw-html:`
` -When combined with the bitcode magic number and viewed as bytes, this is -``"BC 0xC0DE"``. - .. _Signed VBRs: Signed VBRs @@ -904,7 +903,7 @@ PARAMATTR_CODE_ENTRY Record The ``ENTRY`` record (code 2) contains a variable number of values describing a unique set of function parameter attributes. Each *attrgrp* value is used as a -key with which to look up an entry in the the attribute group table described +key with which to look up an entry in the attribute group table described in the ``PARAMATTR_GROUP_BLOCK`` block. .. _PARAMATTR_CODE_ENTRY_OLD: @@ -1052,6 +1051,9 @@ The integer codes are mapped to well-known attributes as follows. * code 50: ``inaccessiblememonly_or_argmemonly`` * code 51: ``allocsize([, ])`` * code 52: ``writeonly`` +* code 53: ``speculatable`` +* code 54: ``strictfp`` +* code 55: ``sanitize_hwaddress`` .. note:: The ``allocsize`` attribute has a special encoding for its arguments. Its two diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index bcdc72283566..7329f3d1fe61 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1578,6 +1578,17 @@ which lowers MCInst's into machine code bytes and relocations. This is important if you want to support direct .o file emission, or would like to implement an assembler for your target. +Emitting function stack size information +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A section containing metadata on function stack sizes will be emitted when +``TargetLoweringObjectFile::StackSizesSection`` is not null, and +``TargetOptions::EmitStackSizeSection`` is set (-stack-size-section). The +section will contain an array of pairs of function symbol values (pointer size) +and stack sizes (unsigned LEB128). The stack size values only include the space +allocated in the function prologue. Functions with dynamic stack allocations are +not included. + VLIW Packetizer --------------- diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst index 5094259f9f95..11dfc902d20c 100644 --- a/docs/CommandGuide/llc.rst +++ b/docs/CommandGuide/llc.rst @@ -132,6 +132,14 @@ End-user Options Specify which EABI version should conform to. Valid EABI versions are *gnu*, *4* and *5*. Default value (*default*) depends on the triple. +.. option:: -stack-size-section + + Emit the .stack_sizes section which contains stack size metadata. The section + contains an array of pairs of function symbol values (pointer size) and stack + sizes (unsigned LEB128). The stack size values only include the space allocated + in the function prologue. Functions with dynamic stack allocations are not + included. + Tuning/Configuration Options ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst index 6ee05ee1a0a8..85c8dde64961 100644 --- a/docs/CommandGuide/llvm-cov.rst +++ b/docs/CommandGuide/llvm-cov.rst @@ -361,14 +361,15 @@ EXPORT COMMAND SYNOPSIS ^^^^^^^^ -:program:`llvm-cov export` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] +:program:`llvm-cov export` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] [*SOURCES*] DESCRIPTION ^^^^^^^^^^^ The :program:`llvm-cov export` command exports regions, functions, expansions, and summaries of the coverage of the binaries *BIN*,... using the profile data -*PROFILE* as JSON. +*PROFILE* as JSON. It can optionally be filtered to only export the coverage +for the files listed in *SOURCES*. For information on compiling programs for coverage and generating profile data, see :ref:`llvm-cov-show`. @@ -382,3 +383,10 @@ OPTIONS It is an error to specify an architecture that is not included in the universal binary or to use an architecture that does not match a non-universal binary. + +.. option:: -summary-only + + Export only summary information for each file in the coverage data. This mode + will not export coverage information for smaller units such as individual + functions or regions. The result will be the same as produced by :program: + `llvm-cov report` command, but presented in JSON format rather than text. diff --git a/docs/Contributing.rst b/docs/Contributing.rst new file mode 100644 index 000000000000..6fcc0820b489 --- /dev/null +++ b/docs/Contributing.rst @@ -0,0 +1,122 @@ +================================== +Contributing to LLVM +================================== + + +Thank you for your interest in contributing to LLVM! There are multiple ways to +contribute, and we appreciate all contributions. In case you +have questions, you can either use the `Developer's List (llvm-dev)`_ +or the #llvm channel on `irc.oftc.net`_. + +If you want to contribute code, please familiarize yourself with the :doc:`DeveloperPolicy`. + +.. contents:: + :local: + + +Ways to Contribute +================== + +Bug Reports +----------- +If you are working with LLVM and run into a bug, we definitely want to know +about it. Please let us know and follow the instructions in +:doc:`HowToSubmitABug` to create a bug report. + +Bug Fixes +--------- +If you are interested in contributing code to LLVM, bugs labeled with the +`beginner keyword`_ in the `bug tracker`_ are a good way to get familiar with +the code base. If you are interested in fixing a bug, please create an account +for the bug tracker and assign it to yourself, to let people know you are working on +it. + +Then try to reproduce and fix the bug with upstream LLVM. Start by building +LLVM from source as described in :doc:`GettingStarted` and +and use the built binaries to reproduce the failure described in the bug. Use +a debug build (`-DCMAKE_BUILD_TYPE=Debug`) or a build with assertions +(`-DLLVM_ENABLE_ASSERTIONS=On`, enabled for Debug builds). + +Bigger Pieces of Work +--------------------- +In case you are interested in taking on a bigger piece of work, a list of +interesting projects is maintained at the `LLVM's Open Projects page`_. In case +you are interested in working on any of these projects, please send a mail to +the `LLVM Developer's mailing list`_, so that we know the project is being +worked on. + + +How to Submit a Patch +===================== +Once you have a patch ready, it is time to submit it. The patch should: + +* include a small unit test +* conform to the :doc:`CodingStandards`. You can use the `clang-format-diff.py`_ or `git-clang-format`_ tools to automatically format your patch properly. +* not contain any unrelated changes +* be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier. + +To get a patch accepted, it has to be reviewed by the LLVM community. This can +be done using `LLVM's Phabricator`_ or the llvm-commits mailing list. +Please follow :ref:`Phabricator#requesting-a-review-via-the-web-interface ` +to request a review using Phabricator. + +To make sure the right people see your patch, please select suitable reviewers +and add them to your patch when requesting a review. Suitable reviewers are the +code owner (see CODE_OWNERS.txt) and other people doing work in the area your +patch touches. If you are using Phabricator, add them to the `Reviewers` field +when creating a review and if you are using `llvm-commits`, add them to the CC of +your email. + +A reviewer may request changes or ask questions during the review. If you are +uncertain on how to provide test cases, documentation, etc., feel free to ask +for guidance during the review. Please address the feedback and re-post an +updated version of your patch. This cycle continues until all requests and comments +have been addressed and a reviewer accepts the patch with a `Looks good to me` or `LGTM`. +Once that is done the change can be committed. If you do not have commit +access, please let people know during the review and someone should commit it +on your behalf. + +If you have received no comments on your patch for a week, you can request a +review by 'ping'ing a patch by responding to the email thread containing the +patch, or the Phabricator review with "Ping." The common courtesy 'ping' rate +is once a week. Please remember that you are asking for valuable time from other +professional developers. + + +Helpful Information About LLVM +============================== +:doc:`LLVM's documentation ` provides a wealth of information about LLVM's internals as +well as various user guides. The pages listed below should provide a good overview +of LLVM's high-level design, as well as its internals: + +`Intro to LLVM`__ + Book chapter providing a compiler hacker's introduction to LLVM. + + .. __: http://www.aosabook.org/en/llvm.html + +:doc:`GettingStarted` + Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. + +:doc:`LangRef` + Defines the LLVM intermediate representation. + +:doc:`ProgrammersManual` + Introduction to the general layout of the LLVM sourcebase, important classes + and APIs, and some tips & tricks. + +:ref:`index-subsystem-docs` + A collection of pages documenting various subsystems of LLVM. + + + +.. _Developer's List (llvm-dev): http://lists.llvm.org/mailman/listinfo/llvm-dev +.. _irc.oftc.net: irc://irc.oftc.net/llvm +.. _beginner keyword: https://bugs.llvm.org/buglist.cgi?bug_status=NEW&bug_status=REOPENED&keywords=beginner%2C%20&keywords_type=allwords&list_id=130748&query_format=advanced&resolution=--- +.. _bug tracker: https://bugs.llvm.org +.. _clang-format-diff.py: https://reviews.llvm.org/source/clang/browse/cfe/trunk/tools/clang-format/clang-format-diff.py +.. _git-clang-format: https://reviews.llvm.org/source/clang/browse/cfe/trunk/tools/clang-format/git-clang-format +.. _LLVM's Phabricator: https://reviews.llvm.org/ +.. _LLVM's Open Projects page: https://llvm.org/OpenProjects.html#what +.. _LLVM Developer's mailing list: http://lists.llvm.org/mailman/listinfo/llvm-dev diff --git a/docs/Extensions.rst b/docs/Extensions.rst index 14fea30204b4..32eeadd78ba6 100644 --- a/docs/Extensions.rst +++ b/docs/Extensions.rst @@ -288,3 +288,31 @@ standard stack probe emission. The MSVC environment does not emit code for VLAs currently. +Windows on ARM64 +---------------- + +Stack Probe Emission +^^^^^^^^^^^^^^^^^^^^ + +The reference implementation (Microsoft Visual Studio 2017) emits stack probes +in the following fashion: + +.. code-block:: gas + + mov x15, #constant + bl __chkstk + sub sp, sp, x15, lsl #4 + +However, this has the limitation of 256 MiB (±128MiB). In order to accommodate +larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB +(±4GiB) range via a slight deviation. It will generate an indirect jump as +follows: + +.. code-block:: gas + + mov x15, #constant + adrp x16, __chkstk + add x16, x16, :lo12:__chkstk + blr x16 + sub sp, sp, x15, lsl #4 + diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index a90a4b05dd11..ed2e936d1360 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -57,7 +57,7 @@ Here's the short story for getting up and running quickly with LLVM: * ``cd where-you-want-llvm-to-live`` * ``cd llvm/tools/clang/tools`` * ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra`` - + #. Checkout LLD linker **[Optional]**: * ``cd where-you-want-llvm-to-live`` @@ -466,34 +466,13 @@ populate it with the LLVM source code, Makefiles, test directories, and local copies of documentation files. If you want to get a specific release (as opposed to the most recent revision), -you can checkout it from the '``tags``' directory (instead of '``trunk``'). The +you can check it out from the '``tags``' directory (instead of '``trunk``'). The following releases are located in the following subdirectories of the '``tags``' directory: -* Release 3.4: **RELEASE_34/final** -* Release 3.3: **RELEASE_33/final** -* Release 3.2: **RELEASE_32/final** -* Release 3.1: **RELEASE_31/final** -* Release 3.0: **RELEASE_30/final** -* Release 2.9: **RELEASE_29/final** -* Release 2.8: **RELEASE_28** -* Release 2.7: **RELEASE_27** -* Release 2.6: **RELEASE_26** -* Release 2.5: **RELEASE_25** -* Release 2.4: **RELEASE_24** -* Release 2.3: **RELEASE_23** -* Release 2.2: **RELEASE_22** -* Release 2.1: **RELEASE_21** -* Release 2.0: **RELEASE_20** -* Release 1.9: **RELEASE_19** -* Release 1.8: **RELEASE_18** -* Release 1.7: **RELEASE_17** -* Release 1.6: **RELEASE_16** -* Release 1.5: **RELEASE_15** -* Release 1.4: **RELEASE_14** -* Release 1.3: **RELEASE_13** -* Release 1.2: **RELEASE_12** -* Release 1.1: **RELEASE_11** +* Release 3.5.0 and later: **RELEASE_350/final** and so on +* Release 2.9 through 3.4: **RELEASE_29/final** and so on +* Release 1.1 through 2.8: **RELEASE_11** and so on * Release 1.0: **RELEASE_1** If you would like to get the LLVM test suite (a separate package as of 1.4), you diff --git a/docs/HowToSubmitABug.rst b/docs/HowToSubmitABug.rst index 25cb2c8c80d3..7881a6e8dcc3 100644 --- a/docs/HowToSubmitABug.rst +++ b/docs/HowToSubmitABug.rst @@ -38,7 +38,7 @@ Crashing Bugs More often than not, bugs in the compiler cause it to crash---often due to an assertion failure of some sort. The most important piece of the puzzle -is to figure out if it is crashing in the GCC front-end or if it is one of +is to figure out if it is crashing in the Clang front-end or if it is one of the LLVM libraries (e.g. the optimizer or code generator) that has problems. diff --git a/docs/LangRef.rst b/docs/LangRef.rst index a091cc1dd2ae..beb0721dab5c 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -883,8 +883,8 @@ The selection kind must be one of the following: The linker may choose any COMDAT key but the sections must contain the same amount of data. -Note that the Mach-O platform doesn't support COMDATs and ELF only supports -``any`` as a selection kind. +Note that the Mach-O platform doesn't support COMDATs, and ELF and WebAssembly +only support ``any`` as a selection kind. Here is an example of a COMDAT group where a function will only be selected if the COMDAT key's section is the largest: @@ -1064,6 +1064,8 @@ Currently, only the following parameter attributes are defined: to trap and to be properly aligned. This is not a valid attribute for return values. +.. _attr_align: + ``align `` This indicates that the pointer value may be assumed by the optimizer to have the specified alignment. @@ -1597,6 +1599,10 @@ example: ``sanitize_thread`` This attribute indicates that ThreadSanitizer checks (dynamic thread safety analysis) are enabled for this function. +``sanitize_hwaddress`` + This attribute indicates that HWAddressSanitizer checks + (dynamic address safety analysis based on tagged pointers) are enabled for + this function. ``speculatable`` This function attribute indicates that the function does not have any effects besides calculating its result and does not have undefined behavior. @@ -4490,7 +4496,7 @@ The current supported vocabulary is limited: - ``DW_OP_plus_uconst, 93`` adds ``93`` to the working expression. - ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8`` here, respectively) of the variable fragment from the working expression. Note - that contrary to DW_OP_bit_piece, the offset is describing the the location + that contrary to DW_OP_bit_piece, the offset is describing the location within the described source variable. - ``DW_OP_swap`` swaps top two stack entries. - ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top @@ -6827,10 +6833,12 @@ Both arguments must have identical types. Semantics: """""""""" -This instruction returns the *remainder* of a division. The remainder -has the same sign as the dividend. This instruction can also take any -number of :ref:`fast-math flags `, which are optimization hints -to enable otherwise unsafe floating point optimizations: +Return the same value as a libm '``fmod``' function but without trapping or +setting ``errno``. + +The remainder has the same sign as the dividend. This instruction can also +take any number of :ref:`fast-math flags `, which are optimization +hints to enable otherwise unsafe floating-point optimizations: Example: """""""" @@ -9027,9 +9035,11 @@ This instruction requires several arguments: #. Arguments with the :ref:`inalloca ` attribute are forwarded in place. - Both markers imply that the callee does not access allocas or varargs from - the caller. Calls marked ``musttail`` must obey the following additional - rules: + Both markers imply that the callee does not access allocas from the caller. + The ``tail`` marker additionally implies that the callee does not access + varargs from the caller, while ``musttail`` implies that varargs from the + caller are passed to the callee. Calls marked ``musttail`` must obey the + following additional rules: - The call must immediately precede a :ref:`ret ` instruction, or a pointer bitcast followed by a ret instruction. @@ -10333,9 +10343,9 @@ support all bit widths however. :: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* , i8* , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10344,7 +10354,7 @@ The '``llvm.memcpy.*``' intrinsics copy a block of memory from the source location to the destination location. Note that, unlike the standard libc function, the ``llvm.memcpy.*`` -intrinsics do not return a value, takes extra alignment/isvolatile +intrinsics do not return a value, takes extra isvolatile arguments and the pointers can be in specified address spaces. Arguments: @@ -10352,13 +10362,11 @@ Arguments: The first argument is a pointer to the destination, the second is a pointer to the source. The third argument is an integer argument -specifying the number of bytes to copy, the fourth argument is the -alignment of the source and destination locations, and the fifth is a +specifying the number of bytes to copy, and the fourth is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that both the source and destination pointers -are aligned to that boundary. +The :ref:`align ` parameter attribute can be provided +for the first and second arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is a :ref:`volatile operation `. The detailed access behavior is not @@ -10388,9 +10396,9 @@ bit widths however. :: declare void @llvm.memmove.p0i8.p0i8.i32(i8* , i8* , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memmove.p0i8.p0i8.i64(i8* , i8* , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10401,21 +10409,19 @@ source location to the destination location. It is similar to the overlap. Note that, unlike the standard libc function, the ``llvm.memmove.*`` -intrinsics do not return a value, takes extra alignment/isvolatile -arguments and the pointers can be in specified address spaces. +intrinsics do not return a value, takes an extra isvolatile +argument and the pointers can be in specified address spaces. Arguments: """""""""" The first argument is a pointer to the destination, the second is a pointer to the source. The third argument is an integer argument -specifying the number of bytes to copy, the fourth argument is the -alignment of the source and destination locations, and the fifth is a +specifying the number of bytes to copy, and the fourth is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that the source and destination pointers are -aligned to that boundary. +The :ref:`align ` parameter attribute can be provided +for the first and second arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call is a :ref:`volatile operation `. The detailed access behavior is @@ -10445,9 +10451,9 @@ support all bit widths. :: declare void @llvm.memset.p0i8.i32(i8* , i8 , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memset.p0i8.i64(i8* , i8 , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10456,8 +10462,8 @@ The '``llvm.memset.*``' intrinsics fill a block of memory with a particular byte value. Note that, unlike the standard libc function, the ``llvm.memset`` -intrinsic does not return a value and takes extra alignment/volatile -arguments. Also, the destination can be in an arbitrary address space. +intrinsic does not return a value and takes an extra volatile +argument. Also, the destination can be in an arbitrary address space. Arguments: """""""""" @@ -10465,11 +10471,10 @@ Arguments: The first argument is a pointer to the destination to fill, the second is the byte value with which to fill it, the third argument is an integer argument specifying the number of bytes to fill, and the fourth -argument is the known alignment of the destination location. +is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that the destination pointer is aligned to -that boundary. +The :ref:`align ` parameter attribute can be provided +for the first arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is a :ref:`volatile operation `. The detailed access behavior is not @@ -10479,9 +10484,7 @@ Semantics: """""""""" The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting -at the destination location. If the argument is known to be aligned to -some boundary, this can be specified as the fourth argument, otherwise -it should be set to 0 or 1 (both meaning no alignment). +at the destination location. '``llvm.sqrt.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -13201,7 +13204,7 @@ Semantics: This function returns the nonnegative square root of the specified value. If the value is less than negative zero, a floating point exception occurs -and the the return value is architecture specific. +and the return value is architecture specific. '``llvm.experimental.constrained.pow``' Intrinsic diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index d4e33cb0670e..7a105e5ed129 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -246,6 +246,10 @@ The most important command line options are: the process is treated as a failure case. The limit is checked in a separate thread every second. If running w/o ASAN/MSAN, you may use 'ulimit -v' instead. +``-malloc_limit_mb`` + If non-zero, the fuzzer will exit if the target tries to allocate this + number of Mb with one malloc call. + If zero (default) same limit as rss_limit_mb is applied. ``-timeout_exitcode`` Exit code (default 77) used if libFuzzer reports a timeout. ``-error_exitcode`` @@ -707,6 +711,8 @@ Trophies * `Linux Kernel's BPF verifier `_ +* `Linux Kernel's Crypto code `_ + * Capstone: `[1] `__ `[2] `__ * file:`[1] `__ `[2] `__ `[3] `__ `[4] `__ diff --git a/docs/MIRLangRef.rst b/docs/MIRLangRef.rst index b4ca8f2347a7..17f64e88b07c 100644 --- a/docs/MIRLangRef.rst +++ b/docs/MIRLangRef.rst @@ -121,6 +121,8 @@ Tests are more accessible and future proof when simplified: contains dummy functions (see above). The .mir loader will create the IR functions automatically in this case. +.. _limitations: + Limitations ----------- @@ -238,6 +240,8 @@ in the block's definition: The block's name should be identical to the name of the IR block that this machine block is based on. +.. _block-references: + Block References ^^^^^^^^^^^^^^^^ @@ -246,13 +250,25 @@ blocks are referenced using the following syntax: .. code-block:: text - %bb.[.] + %bb. -Examples: +Example: .. code-block:: llvm %bb.0 + +The following syntax is also supported, but the former syntax is preferred for +block references: + +.. code-block:: text + + %bb.[.] + +Example: + +.. code-block:: llvm + %bb.1.then Successors @@ -349,14 +365,34 @@ machine instructions. Instruction Flags ^^^^^^^^^^^^^^^^^ -The flag ``frame-setup`` can be specified before the instruction's name: +The flag ``frame-setup`` or ``frame-destroy`` can be specified before the +instruction's name: .. code-block:: text %fp = frame-setup ADDXri %sp, 0, 0 +.. code-block:: text + + %x21, %x20 = frame-destroy LDPXi %sp + .. _registers: +Bundled Instructions +^^^^^^^^^^^^^^^^^^^^ + +The syntax for bundled instructions is the following: + +.. code-block:: text + + BUNDLE implicit-def %r0, implicit-def %r1, implicit %r2 { + %r0 = SOME_OP %r2 + %r1 = ANOTHER_OP internal %r0 + } + +The first instruction is often a bundle header. The instructions between ``{`` +and ``}`` are bundled with the first instruction. + Registers --------- @@ -418,7 +454,40 @@ immediate machine operand ``-42``: %eax = MOV32ri -42 -.. TODO: Describe the CIMM (Rare) and FPIMM immediate operands. +An immediate operand is also used to represent a subregister index when the +machine instruction has one of the following opcodes: + +- ``EXTRACT_SUBREG`` + +- ``INSERT_SUBREG`` + +- ``REG_SEQUENCE`` + +- ``SUBREG_TO_REG`` + +In case this is true, the Machine Operand is printed according to the target. + +For example: + +In AArch64RegisterInfo.td: + +.. code-block:: text + + def sub_32 : SubRegIndex<32>; + +If the third operand is an immediate with the value ``15`` (target-dependent +value), based on the instruction's opcode and the operand's index the operand +will be printed as ``%subreg.sub_32``: + +.. code-block:: text + + %1:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32 + +For integers > 64bit, we use a special machine operand, ``MO_CImmediate``, +which stores the immediate in a ``ConstantInt`` using an ``APInt`` (LLVM's +arbitrary precision integers). + +.. TODO: Describe the FPIMM immediate operands. .. _register-operands: @@ -484,6 +553,9 @@ corresponding internal ``llvm::RegState`` representation: * - ``debug-use`` - ``RegState::Debug`` + * - ``renamable`` + - ``RegState::Renamable`` + .. _subregister-indices: Subregister Indices @@ -501,6 +573,53 @@ lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1: The names of the subregister indices are target specific, and are typically defined in the target's ``*RegisterInfo.td`` file. +Constant Pool Indices +^^^^^^^^^^^^^^^^^^^^^ + +A constant pool index (CPI) operand is printed using its index in the +function's ``MachineConstantPool`` and an offset. + +For example, a CPI with the index 1 and offset 8: + +.. code-block:: text + + %1:gr64 = MOV64ri %const.1 + 8 + +For a CPI with the index 0 and offset -12: + +.. code-block:: text + + %1:gr64 = MOV64ri %const.0 - 12 + +A constant pool entry is bound to a LLVM IR ``Constant`` or a target-specific +``MachineConstantPoolValue``. When serializing all the function's constants the +following format is used: + +.. code-block:: text + + constants: + - id: + value: + alignment: + isTargetSpecific: + +where ```` is a 32-bit unsigned integer, ```` is a `LLVM IR Constant +`_, alignment is a 32-bit +unsigned integer, and ```` is either true or false. + +Example: + +.. code-block:: text + + constants: + - id: 0 + value: 'double 3.250000e+00' + alignment: 8 + - id: 1 + value: 'g-(LPC0+8)' + alignment: 4 + isTargetSpecific: true + Global Value Operands ^^^^^^^^^^^^^^^^^^^^^ @@ -520,24 +639,133 @@ If the identifier doesn't match the regular expression The unnamed global values are represented using an unsigned numeric value with the '@' prefix, like in the following examples: ``@0``, ``@989``. +Target-dependent Index Operands +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A target index operand is a target-specific index and an offset. The +target-specific index is printed using target-specific names and a positive or +negative offset. + +For example, the ``amdgpu-constdata-start`` is associated with the index ``0`` +in the AMDGPU backend. So if we have a target index operand with the index 0 +and the offset 8: + +.. code-block:: text + + %sgpr2 = S_ADD_U32 _, target-index(amdgpu-constdata-start) + 8, implicit-def _, implicit-def _ + +Jump-table Index Operands +^^^^^^^^^^^^^^^^^^^^^^^^^ + +A jump-table index operand with the index 0 is printed as following: + +.. code-block:: text + + tBR_JTr killed %r0, %jump-table.0 + +A machine jump-table entry contains a list of ``MachineBasicBlocks``. When serializing all the function's jump-table entries, the following format is used: + +.. code-block:: text + + jumpTable: + kind: + entries: + - id: + blocks: [ , , ... ] + +where ```` is describing how the jump table is represented and emitted (plain address, relocations, PIC, etc.), and each ```` is a 32-bit unsigned integer and ``blocks`` contains a list of :ref:`machine basic block references `. + +Example: + +.. code-block:: text + + jumpTable: + kind: inline + entries: + - id: 0 + blocks: [ '%bb.3', '%bb.9', '%bb.4.d3' ] + - id: 1 + blocks: [ '%bb.7', '%bb.7', '%bb.4.d3', '%bb.5' ] + +External Symbol Operands +^^^^^^^^^^^^^^^^^^^^^^^^^ + +An external symbol operand is represented using an identifier with the ``$`` +prefix. The identifier is surrounded with ""'s and escaped if it has any +special non-printable characters in it. + +Example: + +.. code-block:: text + + CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp + +MCSymbol Operands +^^^^^^^^^^^^^^^^^ + +A MCSymbol operand is holding a pointer to a ``MCSymbol``. For the limitations +of this operand in MIR, see :ref:`limitations `. + +The syntax is: + +.. code-block:: text + + EH_LABEL + +CFIIndex Operands +^^^^^^^^^^^^^^^^^ + +A CFI Index operand is holding an index into a per-function side-table, +``MachineFunction::getFrameInstructions()``, which references all the frame +instructions in a ``MachineFunction``. A ``CFI_INSTRUCTION`` may look like it +contains multiple operands, but the only operand it contains is the CFI Index. +The other operands are tracked by the ``MCCFIInstruction`` object. + +The syntax is: + +.. code-block:: text + + CFI_INSTRUCTION offset %w30, -16 + +which may be emitted later in the MC layer as: + +.. code-block:: text + + .cfi_offset w30, -16 + +IntrinsicID Operands +^^^^^^^^^^^^^^^^^^^^ + +An Intrinsic ID operand contains a generic intrinsic ID or a target-specific ID. + +The syntax for the ``returnaddress`` intrinsic is: + +.. code-block:: text + + %x0 = COPY intrinsic(@llvm.returnaddress) + +Predicate Operands +^^^^^^^^^^^^^^^^^^ + +A Predicate operand contains an IR predicate from ``CmpInst::Predicate``, like +``ICMP_EQ``, etc. + +For an int eq predicate ``ICMP_EQ``, the syntax is: + +.. code-block:: text + + %2:gpr(s32) = G_ICMP intpred(eq), %0, %1 + .. TODO: Describe the parsers default behaviour when optional YAML attributes are missing. -.. TODO: Describe the syntax for the bundled instructions. .. TODO: Describe the syntax for virtual register YAML definitions. .. TODO: Describe the machine function's YAML flag attributes. -.. TODO: Describe the syntax for the external symbol and register - mask machine operands. +.. TODO: Describe the syntax for the register mask machine operands. .. TODO: Describe the frame information YAML mapping. .. TODO: Describe the syntax of the stack object machine operands and their YAML definitions. -.. TODO: Describe the syntax of the constant pool machine operands and their - YAML definitions. -.. TODO: Describe the syntax of the jump table machine operands and their - YAML definitions. .. TODO: Describe the syntax of the block address machine operands. -.. TODO: Describe the syntax of the CFI index machine operands. .. TODO: Describe the syntax of the metadata machine operands, and the instructions debug location attribute. -.. TODO: Describe the syntax of the target index machine operands. .. TODO: Describe the syntax of the register live out machine operands. .. TODO: Describe the syntax of the machine memory operands. diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst index 159fe078653c..38222afbc63a 100644 --- a/docs/NVPTXUsage.rst +++ b/docs/NVPTXUsage.rst @@ -499,7 +499,7 @@ The output we get from ``llc`` (as of LLVM 3.4): .reg .s32 %r<2>; .reg .s64 %rl<8>; - // BB#0: // %entry + // %bb.0: // %entry ld.param.u64 %rl1, [kernel_param_0]; mov.u32 %r1, %tid.x; mul.wide.s32 %rl2, %r1, 4; @@ -897,7 +897,7 @@ This gives us the following PTX (excerpt): .reg .s32 %r<21>; .reg .s64 %rl<8>; - // BB#0: // %entry + // %bb.0: // %entry ld.param.u64 %rl2, [kernel_param_0]; mov.u32 %r3, %tid.x; ld.param.u64 %rl3, [kernel_param_1]; @@ -921,7 +921,7 @@ This gives us the following PTX (excerpt): abs.f32 %f4, %f1; setp.gtu.f32 %p4, %f4, 0f7F800000; @%p4 bra BB0_4; - // BB#3: // %__nv_isnanf.exit5.i + // %bb.3: // %__nv_isnanf.exit5.i abs.f32 %f5, %f2; setp.le.f32 %p5, %f5, 0f7F800000; @%p5 bra BB0_5; @@ -953,7 +953,7 @@ This gives us the following PTX (excerpt): selp.f32 %f110, 0f7F800000, %f99, %p16; setp.eq.f32 %p17, %f110, 0f7F800000; @%p17 bra BB0_28; - // BB#27: + // %bb.27: fma.rn.f32 %f110, %f110, %f108, %f110; BB0_28: // %__internal_accurate_powf.exit.i setp.lt.f32 %p18, %f1, 0f00000000; diff --git a/docs/PDB/MsfFile.rst b/docs/PDB/MsfFile.rst index bdceca3aeb39..dfbbf9ded7fb 100644 --- a/docs/PDB/MsfFile.rst +++ b/docs/PDB/MsfFile.rst @@ -5,6 +5,44 @@ The MSF File Format .. contents:: :local: +.. _msf_layout: + +File Layout +=========== + +The MSF file format consists of the following components: + +1. :ref:`msf_superblock` +2. :ref:`msf_freeblockmap` (also know as Free Page Map, or FPM) +3. Data + +Each component is stored as an indexed block, the length of which is specified +in ``SuperBlock::BlockSize``. The file consists of 1 or more iterations of the +following pattern (sometimes referred to as an "interval"): + +1. 1 block of data +2. Free Block Map 1 (corresponds to ``SuperBlock::FreeBlockMapBlock`` 1) +3. Free Block Map 2 (corresponds to ``SuperBlock::FreeBlockMapBlock`` 2) +4. ``SuperBlock::BlockSize - 3`` blocks of data + +In the first interval, the first data block is used to store +:ref:`msf_superblock`. + +The following diagram demonstrates the general layout of the file (\| denotes +the end of an interval, and is for visualization purposes only): + ++-------------+-----------------------+------------------+------------------+----------+----+------+------+------+-------------+----+-----+ +| Block Index | 0 | 1 | 2 | 3 - 4095 | \| | 4096 | 4097 | 4098 | 4099 - 8191 | \| | ... | ++=============+=======================+==================+==================+==========+====+======+======+======+=============+====+=====+ +| Meaning | :ref:`msf_superblock` | Free Block Map 1 | Free Block Map 2 | Data | \| | Data | FPM1 | FPM2 | Data | \| | ... | ++-------------+-----------------------+------------------+------------------+----------+----+------+------+------+-------------+----+-----+ + +The file may end after any block, including immediately after a FPM1. + +.. note:: + LLVM only supports 4096 byte blocks (sometimes referred to as the "BigMsf" + variant), so the rest of this document will assume a block size of 4096. + .. _msf_superblock: The Superblock @@ -32,14 +70,9 @@ follows: sizes of 4KiB, and all further discussion assumes a block size of 4KiB. - **FreeBlockMapBlock** - The index of a block within the file, at which begins a bitfield representing the set of all blocks within the file which are "free" - (i.e. the data within that block is not used). This bitfield is spread across - the MSF file at ``BlockSize`` intervals. - **Important**: ``FreeBlockMapBlock`` can only be ``1`` or ``2``! This field - is designed to support incremental and atomic updates of the underlying MSF - file. While writing to an MSF file, if the value of this field is `1`, you - can write your new modified bitfield to page 2, and vice versa. Only when - you commit the file to disk do you need to swap the value in the SuperBlock - to point to the new ``FreeBlockMapBlock``. + (i.e. the data within that block is not used). See :ref:`msf_freeblockmap` for + more information. + **Important**: ``FreeBlockMapBlock`` can only be ``1`` or ``2``! - **NumBlocks** - The total number of blocks in the file. ``NumBlocks * BlockSize`` should equal the size of the file on disk. - **NumDirectoryBytes** - The size of the stream directory, in bytes. The stream @@ -53,7 +86,32 @@ follows: contains the list of blocks that the stream directory occupies, and the stream directory itself can be stitched together accordingly. The number of ``ulittle32_t``'s in this array is given by ``ceil(NumDirectoryBytes / BlockSize)``. - + +.. _msf_freeblockmap: + +The Free Block Map +================== + +The Free Block Map (sometimes referred to as the Free Page Map, or FPM) is a +series of blocks which contains a bit flag for every block in the file. The +flag will be set to 0 if the block is in use, and 1 if the block is unused. + +Each file contains two FPMs, one of which is active at any given time. This +feature is designed to support incremental and atomic updates of the underlying +MSF file. While writing to an MSF file, if the active FPM is FPM1, you can +write your new modified bitfield to FPM2, and vice versa. Only when you commit +the file to disk do you need to swap the value in the SuperBlock to point to +the new ``FreeBlockMapBlock``. + +The Free Block Maps are stored as a series of single blocks thoughout the file +at intervals of BlockSize. Because each FPM block is of size ``BlockSize`` +bytes, it contains 8 times as many bits as an interval has blocks. This means +that the first block of each FPM refers to the first 8 intervals of the file +(the first 32768 blocks), the second block of each FPM refers to the next 8 +blocks, and so on. This results in far more FPM blocks being present than are +required, but in order to maintain backwards compatibility the format must stay +this way. + The Stream Directory ==================== The Stream Directory is the root of all access to the other streams in an MSF @@ -66,10 +124,10 @@ file. Beginning at byte 0 of the stream directory is the following structure: ulittle32_t StreamSizes[NumStreams]; ulittle32_t StreamBlocks[NumStreams][]; }; - + And this structure occupies exactly ``SuperBlock->NumDirectoryBytes`` bytes. Note that each of the last two arrays is of variable length, and in particular -that the second array is jagged. +that the second array is jagged. **Example:** Suppose a hypothetical PDB file with a 4KiB block size, and 4 streams of lengths {1000 bytes, 8000 bytes, 16000 bytes, 9000 bytes}. @@ -97,7 +155,7 @@ like: {10, 15, 12} }; }; - + In total, this occupies ``15 * 4 = 60`` bytes, so ``SuperBlock->NumDirectoryBytes`` would equal ``60``, and ``SuperBlock->BlockMapAddr`` would be an array of one ``ulittle32_t``, since ``60 <= SuperBlock->BlockSize``. diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index cc8484cc1e3e..53cb3b5980a9 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -38,6 +38,8 @@ the command line. To get you set up, follow the You can learn more about how to use arc to interact with Phabricator in the `Arcanist User Guide`_. +.. _phabricator-request-review-web: + Requesting a review via the web interface ----------------------------------------- @@ -63,15 +65,16 @@ To upload a new patch: * Click *Differential*. * Click *+ Create Diff*. * Paste the text diff or browse to the patch file. Click *Create Diff*. -* Leave the Repository field blank. +* Leave this first Repository field blank. (We'll fill in the Repository + later, when sending the review.) * Leave the drop down on *Create a new Revision...* and click *Continue*. * Enter a descriptive title and summary. The title and summary are usually in the form of a :ref:`commit message `. -* Add reviewers (see below for advice) and subscribe mailing - lists that you want to be included in the review. If your patch is - for LLVM, add llvm-commits as a Subscriber; if your patch is for Clang, - add cfe-commits. -* Leave the Repository and Project fields blank. +* Add reviewers (see below for advice). (If you set the Repository field + correctly, llvm-commits or cfe-commits will be subscribed automatically; + otherwise, you will have to manually subscribe them.) +* In the Repository field, enter the name of the project (LLVM, Clang, + etc.) to which the review should be sent. * Click *Save*. To submit an updated patch: @@ -81,7 +84,8 @@ To submit an updated patch: * Paste the updated diff or browse to the updated patch file. Click *Create Diff*. * Select the review you want to from the *Attach To* dropdown and click *Continue*. -* Leave the Repository and Project fields blank. +* Leave the Repository field blank. (We previously filled out the Repository + for the review request.) * Add comments about the changes in the new diff. Click *Save*. Choosing reviewers: You typically pick one or two people as initial reviewers. diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 719d3997594e..07048a52319e 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -1040,7 +1040,7 @@ line argument: .. code-block:: c++ - DEBUG(errs() << "I am here!\n"); + DEBUG(dbgs() << "I am here!\n"); Then you can run your pass like this: @@ -1076,10 +1076,10 @@ follows: .. code-block:: c++ #define DEBUG_TYPE "foo" - DEBUG(errs() << "'foo' debug type\n"); + DEBUG(dbgs() << "'foo' debug type\n"); #undef DEBUG_TYPE #define DEBUG_TYPE "bar" - DEBUG(errs() << "'bar' debug type\n")); + DEBUG(dbgs() << "'bar' debug type\n"); #undef DEBUG_TYPE Then you can run your pass like this: @@ -1120,8 +1120,8 @@ preceding example could be written as: .. code-block:: c++ - DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n"); - DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n")); + DEBUG_WITH_TYPE("foo", dbgs() << "'foo' debug type\n"); + DEBUG_WITH_TYPE("bar", dbgs() << "'bar' debug type\n"); .. _Statistic: diff --git a/docs/Proposals/VectorizationPlan.rst b/docs/Proposals/VectorizationPlan.rst index f9700d177d23..6d6a38890c06 100644 --- a/docs/Proposals/VectorizationPlan.rst +++ b/docs/Proposals/VectorizationPlan.rst @@ -212,7 +212,7 @@ Related LLVM components Polly [7]_. 3. Loop Vectorizer: the Vectorization Plan aims to upgrade the infrastructure of - the Loop Vectorizer and extend it to handle outer loops [8,9]_. + the Loop Vectorizer and extend it to handle outer loops [8]_, [9]_. References ---------- diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 4b6d7931e848..92dfb8e3bbb4 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -1,12 +1,12 @@ ======================== -LLVM 6.0.0 Release Notes +LLVM 7.0.0 Release Notes ======================== .. contents:: :local: .. warning:: - These are in-progress notes for the upcoming LLVM 6 release. + These are in-progress notes for the upcoming LLVM 7 release. Release notes for previous releases can be found on `the Download Page `_. @@ -15,7 +15,7 @@ Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, -release 5.0.0. Here we describe the status of LLVM, including major improvements +release 7.0.0. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site `_. @@ -40,15 +40,6 @@ Non-comprehensive list of changes in this release functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. -* The ``Redirects`` argument of ``llvm::sys::ExecuteAndWait`` and - ``llvm::sys::ExecuteNoWait`` was changed to an ``ArrayRef`` of optional - ``StringRef``'s to make it safer and more convenient to use. - -* The backend name was added to the Target Registry to allow run-time - information to be fed back into TableGen. Out-of-tree targets will need to add - the name used in the `def X : Target` definition to the call to - `RegisterTarget`. - * Note.. .. NOTE @@ -108,7 +99,7 @@ Changes to the C API During this release ... -External Open Source Projects Using LLVM 6 +External Open Source Projects Using LLVM 7 ========================================== * A project... diff --git a/docs/ReleaseProcess.rst b/docs/ReleaseProcess.rst index d7f703126019..5822360cd1df 100644 --- a/docs/ReleaseProcess.rst +++ b/docs/ReleaseProcess.rst @@ -9,9 +9,9 @@ How To Validate a New Release Introduction ============ -This document contains information about testing the release candidates that will -ultimately be the next LLVM release. For more information on how to manage the -actual release, please refer to :doc:`HowToReleaseLLVM`. +This document contains information about testing the release candidates that +will ultimately be the next LLVM release. For more information on how to +manage the actual release, please refer to :doc:`HowToReleaseLLVM`. Overview of the Release Process ------------------------------- @@ -21,26 +21,28 @@ and it'll be the role of each volunteer to: * Test and benchmark the previous release -* Test and benchmark each release candidate, comparing to the previous release and candidates +* Test and benchmark each release candidate, comparing to the previous release + and candidates * Identify, reduce and report every regression found during tests and benchmarks * Make sure the critical bugs get fixed and merged to the next release candidate Not all bugs or regressions are show-stoppers and it's a bit of a grey area what -should be fixed before the next candidate and what can wait until the next release. +should be fixed before the next candidate and what can wait until the next +release. It'll depend on: -* The severity of the bug, how many people it affects and if it's a regression or a - known bug. Known bugs are "unsupported features" and some bugs can be disabled if - they have been implemented recently. +* The severity of the bug, how many people it affects and if it's a regression + or a known bug. Known bugs are "unsupported features" and some bugs can be + disabled if they have been implemented recently. -* The stage in the release. Less critical bugs should be considered to be fixed between - RC1 and RC2, but not so much at the end of it. +* The stage in the release. Less critical bugs should be considered to be + fixed between RC1 and RC2, but not so much at the end of it. -* If it's a correctness or a performance regression. Performance regression tends to be - taken more lightly than correctness. +* If it's a correctness or a performance regression. Performance regression + tends to be taken more lightly than correctness. .. _scripts: @@ -52,10 +54,12 @@ The scripts are in the ``utils/release`` directory. test-release.sh --------------- -This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``, -``libcxx``, ``libomp`` and ``clang-extra-tools``) in three stages, and will test the final stage. -It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and -that's the one you should use for the test-suite and other external tests. +This script will check-out, configure and compile LLVM+Clang (+ most add-ons, +like ``compiler-rt``, ``libcxx``, ``libomp`` and ``clang-extra-tools``) in +three stages, and will test the final stage. +It'll have installed the final binaries on the Phase3/Releasei(+Asserts) +directory, and that's the one you should use for the test-suite and other +external tests. To run the script on a specific release candidate run:: @@ -66,25 +70,32 @@ To run the script on a specific release candidate run:: -test-asserts \ -no-compare-files -Each system will require different options. For instance, x86_64 will obviously not need -``-no-64bit`` while 32-bit systems will, or the script will fail. +Each system will require different options. For instance, x86_64 will +obviously not need ``-no-64bit`` while 32-bit systems will, or the script will +fail. The important flags to get right are: -* On the pre-release, you should change ``-rc 1`` to ``-final``. On RC2, change it to ``-rc 2`` and so on. +* On the pre-release, you should change ``-rc 1`` to ``-final``. On RC2, + change it to ``-rc 2`` and so on. -* On non-release testing, you can use ``-final`` in conjunction with ``-no-checkout``, but you'll have to - create the ``final`` directory by hand and link the correct source dir to ``final/llvm.src``. +* On non-release testing, you can use ``-final`` in conjunction with + ``-no-checkout``, but you'll have to create the ``final`` directory by hand + and link the correct source dir to ``final/llvm.src``. -* For release candidates, you need ``-test-asserts``, or it won't create a "Release+Asserts" directory, - which is needed for release testing and benchmarking. This will take twice as long. +* For release candidates, you need ``-test-asserts``, or it won't create a + "Release+Asserts" directory, which is needed for release testing and + benchmarking. This will take twice as long. -* On the final candidate you just need Release builds, and that's the binary directory you'll have to pack. +* On the final candidate you just need Release builds, and that's the binary + directory you'll have to pack. -This script builds three phases of Clang+LLVM twice each (Release and Release+Asserts), so use -screen or nohup to avoid headaches, since it'll take a long time. +This script builds three phases of Clang+LLVM twice each (Release and +Release+Asserts), so use screen or nohup to avoid headaches, since it'll take +a long time. -Use the ``--help`` option to see all the options and chose it according to your needs. +Use the ``--help`` option to see all the options and chose it according to +your needs. findRegressions-nightly.py @@ -100,9 +111,12 @@ Test Suite .. contents:: :local: -Follow the `LNT Quick Start Guide `__ link on how to set-up the test-suite +Follow the `LNT Quick Start Guide +`__ link on how to set-up the +test-suite -The binary location you'll have to use for testing is inside the ``rcN/Phase3/Release+Asserts/llvmCore-REL-RC.install``. +The binary location you'll have to use for testing is inside the +``rcN/Phase3/Release+Asserts/llvmCore-REL-RC.install``. Link that directory to an easier location and run the test-suite. An example on the run command line, assuming you created a link from the correct @@ -116,13 +130,16 @@ install directory to ``~/devel/llvm/install``:: --cc ~/devel/llvm/install/bin/clang \ --cxx ~/devel/llvm/install/bin/clang++ -It should have no new regressions, compared to the previous release or release candidate. You don't need to fix -all the bugs in the test-suite, since they're not necessarily meant to pass on all architectures all the time. This is -due to the nature of the result checking, which relies on direct comparison, and most of the time, the failures are -related to bad output checking, rather than bad code generation. +It should have no new regressions, compared to the previous release or release +candidate. You don't need to fix all the bugs in the test-suite, since they're +not necessarily meant to pass on all architectures all the time. This is +due to the nature of the result checking, which relies on direct comparison, +and most of the time, the failures are related to bad output checking, rather +than bad code generation. -If the errors are in LLVM itself, please report every single regression found as blocker, and all the other bugs -as important, but not necessarily blocking the release to proceed. They can be set as "known failures" and to be +If the errors are in LLVM itself, please report every single regression found +as blocker, and all the other bugs as important, but not necessarily blocking +the release to proceed. They can be set as "known failures" and to be fix on a future date. .. _pre-release-process: @@ -134,23 +151,26 @@ Pre-Release Process :local: When the release process is announced on the mailing list, you should prepare -for the testing, by applying the same testing you'll do on the release candidates, -on the previous release. +for the testing, by applying the same testing you'll do on the release +candidates, on the previous release. You should: -* Download the previous release sources from http://llvm.org/releases/download.html. +* Download the previous release sources from + http://llvm.org/releases/download.html. -* Run the test-release.sh script on ``final`` mode (change ``-rc 1`` to ``-final``). +* Run the test-release.sh script on ``final`` mode (change ``-rc 1`` to + ``-final``). * Once all three stages are done, it'll test the final stage. -* Using the ``Phase3/Release+Asserts/llvmCore-MAJ.MIN-final.install`` base, run the test-suite. +* Using the ``Phase3/Release+Asserts/llvmCore-MAJ.MIN-final.install`` base, + run the test-suite. -If the final phase's ``make check-all`` failed, it's a good idea to also test the -intermediate stages by going on the obj directory and running ``make check-all`` to find -if there's at least one stage that passes (helps when reducing the error for bug report -purposes). +If the final phase's ``make check-all`` failed, it's a good idea to also test +the intermediate stages by going on the obj directory and running +``make check-all`` to find if there's at least one stage that passes (helps +when reducing the error for bug report purposes). .. _release-process: @@ -166,22 +186,23 @@ to them), and run the release test as above. You should: -* Download the current candidate sources from where the release manager points you - (ex. http://llvm.org/pre-releases/3.3/rc1/). +* Download the current candidate sources from where the release manager points + you (ex. http://llvm.org/pre-releases/3.3/rc1/). -* Repeat the steps above with ``-rc 1``, ``-rc 2`` etc modes and run the test-suite - the same way. +* Repeat the steps above with ``-rc 1``, ``-rc 2`` etc modes and run the + test-suite the same way. * Compare the results, report all errors on Bugzilla and publish the binary blob where the release manager can grab it. -Once the release manages announces that the latest candidate is the good one, you -have to pack the ``Release`` (no Asserts) install directory on ``Phase3`` and that -will be the official binary. +Once the release manages announces that the latest candidate is the good one, +you have to pack the ``Release`` (no Asserts) install directory on ``Phase3`` +and that will be the official binary. * Rename (or link) ``clang+llvm-REL-ARCH-ENV`` to the .install directory -* Tar that into the same name with ``.tar.gz`` extensioan from outside the directory +* Tar that into the same name with ``.tar.gz`` extensioan from outside the + directory * Make it available for the release manager to download @@ -196,15 +217,15 @@ Bug Reporting Process If you found regressions or failures when comparing a release candidate with the previous release, follow the rules below: -* Critical bugs on compilation should be fixed as soon as possible, possibly before - releasing the binary blobs. +* Critical bugs on compilation should be fixed as soon as possible, possibly + before releasing the binary blobs. -* Check-all tests should be fixed before the next release candidate, but can wait - until the test-suite run is finished. +* Check-all tests should be fixed before the next release candidate, but can + wait until the test-suite run is finished. * Bugs in the test suite or unimportant check-all tests can be fixed in between release candidates. -* New features or recent big changes, when close to the release, should have done - in a way that it's easy to disable. If they misbehave, prefer disabling them than - releasing an unstable (but untested) binary package. +* New features or recent big changes, when close to the release, should have + done in a way that it's easy to disable. If they misbehave, prefer disabling + them than releasing an unstable (but untested) binary package. diff --git a/docs/ScudoHardenedAllocator.rst b/docs/ScudoHardenedAllocator.rst index 562a39144829..c493f0eadacb 100644 --- a/docs/ScudoHardenedAllocator.rst +++ b/docs/ScudoHardenedAllocator.rst @@ -26,32 +26,45 @@ meaning Shield in Spanish and Portuguese). Design ====== +Allocator +--------- +Scudo can be considered a Frontend to the Sanitizers' common allocator (later +referenced as the Backend). It is split between a Primary allocator, fast and +efficient, that services smaller allocation sizes, and a Secondary allocator +that services larger allocation sizes and is backed by the operating system +memory mapping primitives. + +Scudo was designed with security in mind, but aims at striking a good balance +between security and performance. It is highly tunable and configurable. + Chunk Header ------------ Every chunk of heap memory will be preceded by a chunk header. This has two purposes, the first one being to store various information about the chunk, the second one being to detect potential heap overflows. In order to achieve -this, the header will be checksumed, involving the pointer to the chunk itself +this, the header will be checksummed, involving the pointer to the chunk itself and a global secret. Any corruption of the header will be detected when said header is accessed, and the process terminated. The following information is stored in the header: - the 16-bit checksum; -- the unused bytes amount for that chunk, which is necessary for computing the - size of the chunk; +- the class ID for that chunk, which is the "bucket" where the chunk resides + for Primary backed allocations, or 0 for Secondary backed allocations; +- the size (Primary) or unused bytes amount (Secondary) for that chunk, which is + necessary for computing the size of the chunk; - the state of the chunk (available, allocated or quarantined); - the allocation type (malloc, new, new[] or memalign), to detect potential mismatches in the allocation APIs used; - the offset of the chunk, which is the distance in bytes from the beginning of - the returned chunk to the beginning of the backend allocation; -- a 8-bit salt. + the returned chunk to the beginning of the Backend allocation; This header fits within 8 bytes, on all platforms supported. The checksum is computed as a CRC32 (made faster with hardware support) of the global secret, the chunk pointer itself, and the 8 bytes of header with -the checksum field zeroed out. +the checksum field zeroed out. It is not intended to be cryptographically +strong. The header is atomically loaded and stored to prevent races. This is important as two consecutive chunks could belong to different threads. We also want to @@ -60,9 +73,9 @@ local copies of the header for this purpose. Delayed Freelist ----------------- -A delayed freelist allows us to not return a chunk directly to the backend, but +A delayed freelist allows us to not return a chunk directly to the Backend, but to keep it aside for a while. Once a criterion is met, the delayed freelist is -emptied, and the quarantined chunks are returned to the backend. This helps +emptied, and the quarantined chunks are returned to the Backend. This helps mitigate use-after-free vulnerabilities by reducing the determinism of the allocation and deallocation patterns. @@ -107,13 +120,21 @@ and then use it with existing binaries as follows: LD_PRELOAD=`pwd`/scudo-allocator.so ./a.out +Clang +----- +With a recent version of Clang (post rL317337), the allocator can be linked with +a binary at compilation using the ``-fsanitize=scudo`` command-line argument, if +the target platform is supported. Currently, the only other Sanitizer Scudo is +compatible with is UBSan (eg: ``-fsanitize=scudo,undefined``). Compiling with +Scudo will also enforce PIE for the output binary. + Options ------- Several aspects of the allocator can be configured through the following ways: - by defining a ``__scudo_default_options`` function in one's program that returns the options string to be parsed. Said function must have the following - prototype: ``extern "C" const char* __scudo_default_options()``. + prototype: ``extern "C" const char* __scudo_default_options(void)``. - through the environment variable SCUDO_OPTIONS, containing the options string to be parsed. Options defined this way will override any definition made diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst index 8cffee4b1bbb..5f34c70540b4 100644 --- a/docs/WritingAnLLVMBackend.rst +++ b/docs/WritingAnLLVMBackend.rst @@ -1008,7 +1008,7 @@ Instruction Scheduling ---------------------- Instruction itineraries can be queried using MCDesc::getSchedClass(). The -value can be named by an enumemation in llvm::XXX::Sched namespace generated +value can be named by an enumeration in llvm::XXX::Sched namespace generated by TableGen in XXXGenInstrInfo.inc. The name of the schedule classes are the same as provided in XXXSchedule.td plus a default NoItinerary class. diff --git a/docs/XRay.rst b/docs/XRay.rst index e9ecc13e3b28..ebf025678305 100644 --- a/docs/XRay.rst +++ b/docs/XRay.rst @@ -143,17 +143,30 @@ variable, where we list down the options and their defaults below. | | | | instrumentation points | | | | | before main. | +-------------------+-----------------+---------------+------------------------+ -| xray_naive_log | ``bool`` | ``true`` | Whether to install | -| | | | the naive log | -| | | | implementation. | +| xray_mode | ``const char*`` | ``""`` | Default mode to | +| | | | install and initialize | +| | | | before ``main``. | +-------------------+-----------------+---------------+------------------------+ | xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the | | | | | XRay logfile. | +-------------------+-----------------+---------------+------------------------+ -| xray_fdr_log | ``bool`` | ``false`` | Whether to install the | -| | | | Flight Data Recorder | +| xray_naive_log | ``bool`` | ``false`` | **DEPRECATED:** Use | +| | | | xray_mode=xray-basic | +| | | | instead. Whether to | +| | | | install the basic log | +| | | | the naive log | +| | | | implementation. | ++-------------------+-----------------+---------------+------------------------+ +| xray_fdr_log | ``bool`` | ``false`` | **DEPRECATED:** Use | +| | | | xray_mode=xray-fdr | +| | | | instead. Whether to | +| | | | install the Flight | +| | | | Data Recorder | | | | | (FDR) mode. | +-------------------+-----------------+---------------+------------------------+ +| verbosity | ``int`` | ``0`` | Runtime verbosity | +| | | | level. | ++-------------------+-----------------+---------------+------------------------+ If you choose to not use the default logging implementation that comes with the @@ -241,6 +254,14 @@ following API: - ``__xray_set_log_impl(...)``: This function takes a struct of type ``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of the XRay compiler-rt installation. +- ``__xray_log_register_mode(...)``: Register a logging implementation against + a string Mode. The implementation is an instance of ``XRayLogImpl`` defined + in ``xray/xray_log_interface.h``. +- ``__xray_log_select_mode(...)``: Select the mode to install, associated with + a string Mode. Only implementations registered with + ``__xray_log_register_mode(...)`` can be chosen with this function. When + successful, has the same effects as calling ``__xray_set_log_impl(...)`` with + the registered logging implementation. - ``__xray_log_init(...)``: This function allows for initializing and re-initializing an installed logging implementation. See ``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt @@ -258,8 +279,11 @@ supports the following subcommands: - ``account``: Performs basic function call accounting statistics with various options for sorting, and output formats (supports CSV, YAML, and console-friendly TEXT). -- ``convert``: Converts an XRay log file from one format to another. Currently - only converts to YAML. +- ``convert``: Converts an XRay log file from one format to another. We can + convert from binary XRay traces (both naive and FDR mode) to YAML, + `flame-graph `_ friendly text + formats, as well as `Chrome Trace Viewer (catapult) + ` formats. - ``graph``: Generates a DOT graph of the function call relationships between functions found in an XRay trace. - ``stack``: Reconstructs function call stacks from a timeline of function diff --git a/docs/XRayExample.rst b/docs/XRayExample.rst index 56f17507d82f..f8e7d943fedd 100644 --- a/docs/XRayExample.rst +++ b/docs/XRayExample.rst @@ -60,7 +60,7 @@ to enable XRay at application start. To do this, XRay checks the $ ./bin/llc input.ll # We need to set the XRAY_OPTIONS to enable some features. - $ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll + $ XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/llc input.ll ==69819==XRay: Log file in 'xray-log.llc.m35qPB' At this point we now have an XRay trace we can start analysing. diff --git a/docs/YamlIO.rst b/docs/YamlIO.rst index 0b728ed8ec1e..4c07820b6f99 100644 --- a/docs/YamlIO.rst +++ b/docs/YamlIO.rst @@ -466,7 +466,7 @@ looks like: return StringRef(); } // Determine if this scalar needs quotes. - static bool mustQuote(StringRef) { return true; } + static QuotingType mustQuote(StringRef) { return QuotingType::Single; } }; Block Scalars diff --git a/docs/conf.py b/docs/conf.py index 92eb9813ecf9..ce7df14ac3af 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,9 +48,9 @@ # built documents. # # The short version. -version = '6' +version = '7' # The full version, including alpha/beta/rc tags. -release = '6' +release = '7' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index 47c2f0473931..2173f94459dd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -242,6 +242,8 @@ For developers of applications which use LLVM as a library. :doc:`OptBisect` A command line option for debugging optimization-induced failures. +.. _index-subsystem-docs: + Subsystem Documentation ======================= @@ -431,6 +433,7 @@ Information about LLVM's development process. .. toctree:: :hidden: + Contributing DeveloperPolicy Projects LLVMBuild @@ -439,6 +442,9 @@ Information about LLVM's development process. ReleaseProcess Phabricator +:doc:`Contributing` + An overview on how to contribute to LLVM. + :doc:`DeveloperPolicy` The LLVM project's policy towards developers and their contributions. diff --git a/docs/tutorial/LangImpl04.rst b/docs/tutorial/LangImpl04.rst index 921c4dcc21ad..b8e55b0fb210 100644 --- a/docs/tutorial/LangImpl04.rst +++ b/docs/tutorial/LangImpl04.rst @@ -380,7 +380,7 @@ demonstrates very basic functionality, but can we do more? Function definitions and calls also work, but something went very wrong on that last line. The call looks valid, so what happened? As you may have guessed from -the the API a Module is a unit of allocation for the JIT, and testfunc was part +the API a Module is a unit of allocation for the JIT, and testfunc was part of the same module that contained anonymous expression. When we removed that module from the JIT to free the memory for the anonymous expression, we deleted the definition of ``testfunc`` along with it. Then, when we tried to call diff --git a/docs/tutorial/LangImpl09.rst b/docs/tutorial/LangImpl09.rst index fe5a95a5769e..d81f9fa0001c 100644 --- a/docs/tutorial/LangImpl09.rst +++ b/docs/tutorial/LangImpl09.rst @@ -197,7 +197,7 @@ expressions: if (DblTy) return DblTy; - DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float); + DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float); return DblTy; } @@ -208,7 +208,8 @@ And then later on in ``main`` when we're constructing our module: DBuilder = new DIBuilder(*TheModule); KSDbgInfo.TheCU = DBuilder->createCompileUnit( - dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0); + dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."), + "Kaleidoscope Compiler", 0, "", 0); There are a couple of things to note here. First, while we're producing a compile unit for a language called Kaleidoscope we used the language diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt index 543b9f73b4fe..3822cdd9e1c4 100644 --- a/examples/Kaleidoscope/CMakeLists.txt +++ b/examples/Kaleidoscope/CMakeLists.txt @@ -14,3 +14,4 @@ add_subdirectory(Chapter5) add_subdirectory(Chapter6) add_subdirectory(Chapter7) add_subdirectory(Chapter8) +add_subdirectory(Chapter9) diff --git a/examples/Kaleidoscope/Chapter9/toy.cpp b/examples/Kaleidoscope/Chapter9/toy.cpp index 1b13e45ec460..821cf4d25a65 100644 --- a/examples/Kaleidoscope/Chapter9/toy.cpp +++ b/examples/Kaleidoscope/Chapter9/toy.cpp @@ -823,7 +823,7 @@ DIType *DebugInfo::getDoubleTy() { if (DblTy) return DblTy; - DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float); + DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float); return DblTy; } @@ -1436,7 +1436,8 @@ int main() { // Currently down as "fib.ks" as a filename since we're redirecting stdin // but we'd like actual source locations. KSDbgInfo.TheCU = DBuilder->createCompileUnit( - dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0); + dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."), + "Kaleidoscope Compiler", 0, "", 0); // Run the main "interpreter loop" now. MainLoop(); diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt index deeee072b33c..c42dfc85c14a 100644 --- a/examples/ParallelJIT/CMakeLists.txt +++ b/examples/ParallelJIT/CMakeLists.txt @@ -11,4 +11,4 @@ add_llvm_example(ParallelJIT ParallelJIT.cpp ) -target_link_libraries(ParallelJIT ${LLVM_PTHREAD_LIB}) +target_link_libraries(ParallelJIT PRIVATE ${LLVM_PTHREAD_LIB}) diff --git a/include/llvm-c/DebugInfo.h b/include/llvm-c/DebugInfo.h index d17c690be4da..a5e5653630c1 100644 --- a/include/llvm-c/DebugInfo.h +++ b/include/llvm-c/DebugInfo.h @@ -52,6 +52,8 @@ typedef enum { LLVMDIFlagBitField = 1 << 19, LLVMDIFlagNoReturn = 1 << 20, LLVMDIFlagMainSubprogram = 1 << 21, + LLVMDIFlagTypePassByValue = 1 << 22, + LLVMDIFlagTypePassByReference = 1 << 23, LLVMDIFlagIndirectVirtualBase = (1 << 2) | (1 << 5), LLVMDIFlagAccessibility = LLVMDIFlagPrivate | LLVMDIFlagProtected | LLVMDIFlagPublic, diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 8d45b7832041..55f3e46c45ed 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -757,17 +757,17 @@ extern void thinlto_codegen_add_cross_referenced_symbol(thinlto_code_gen_t cg, * @ingroup LLVMCTLTO * * These entry points control the ThinLTO cache. The cache is intended to - * support incremental build, and thus needs to be persistent accross build. - * The client enabled the cache by supplying a path to an existing directory. + * support incremental builds, and thus needs to be persistent across builds. + * The client enables the cache by supplying a path to an existing directory. * The code generator will use this to store objects files that may be reused * during a subsequent build. * To avoid filling the disk space, a few knobs are provided: - * - The pruning interval limit the frequency at which the garbage collector - * will try to scan the cache directory to prune it from expired entries. - * Setting to -1 disable the pruning (default). + * - The pruning interval limits the frequency at which the garbage collector + * will try to scan the cache directory to prune expired entries. + * Setting to a negative number disables the pruning. * - The pruning expiration time indicates to the garbage collector how old an * entry needs to be to be removed. - * - Finally, the garbage collector can be instructed to prune the cache till + * - Finally, the garbage collector can be instructed to prune the cache until * the occupied space goes below a threshold. * @{ */ @@ -782,7 +782,7 @@ extern void thinlto_codegen_set_cache_dir(thinlto_code_gen_t cg, const char *cache_dir); /** - * Sets the cache pruning interval (in seconds). A negative value disable the + * Sets the cache pruning interval (in seconds). A negative value disables the * pruning. An unspecified default value will be applied, and a value of 0 will * be ignored. * diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index c81363cc16b7..118c62eec87c 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -1279,7 +1279,7 @@ class LLVM_NODISCARD APInt { /// \returns true if *this >= RHS when considered unsigned. bool uge(uint64_t RHS) const { return !ult(RHS); } - /// \brief Signed greather or equal comparison + /// \brief Signed greater or equal comparison /// /// Regards both *this and RHS as signed quantities and compares them for /// validity of the greater-or-equal relationship. diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index 2811d5c1e21b..0f073fab2a96 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -27,124 +27,166 @@ namespace llvm { -template class Optional { +namespace optional_detail { +/// Storage for any type. +template struct OptionalStorage { AlignedCharArrayUnion storage; bool hasVal = false; -public: - using value_type = T; - - Optional(NoneType) {} - explicit Optional() {} - - Optional(const T &y) : hasVal(true) { new (storage.buffer) T(y); } + OptionalStorage() = default; - Optional(const Optional &O) : hasVal(O.hasVal) { + OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); } + OptionalStorage(const OptionalStorage &O) : hasVal(O.hasVal) { if (hasVal) - new (storage.buffer) T(*O); + new (storage.buffer) T(*O.getPointer()); } - - Optional(T &&y) : hasVal(true) { new (storage.buffer) T(std::forward(y)); } - - Optional(Optional &&O) : hasVal(O) { - if (O) { - new (storage.buffer) T(std::move(*O)); + OptionalStorage(T &&y) : hasVal(true) { + new (storage.buffer) T(std::forward(y)); + } + OptionalStorage(OptionalStorage &&O) : hasVal(O.hasVal) { + if (O.hasVal) { + new (storage.buffer) T(std::move(*O.getPointer())); O.reset(); } } - ~Optional() { reset(); } - - Optional &operator=(T &&y) { + OptionalStorage &operator=(T &&y) { if (hasVal) - **this = std::move(y); + *getPointer() = std::move(y); else { new (storage.buffer) T(std::move(y)); hasVal = true; } return *this; } - - Optional &operator=(Optional &&O) { - if (!O) + OptionalStorage &operator=(OptionalStorage &&O) { + if (!O.hasVal) reset(); else { - *this = std::move(*O); + *this = std::move(*O.getPointer()); O.reset(); } return *this; } - /// Create a new object by constructing it in place with the given arguments. - template void emplace(ArgTypes &&... Args) { - reset(); - hasVal = true; - new (storage.buffer) T(std::forward(Args)...); - } - - static inline Optional create(const T *y) { - return y ? Optional(*y) : Optional(); - } - // FIXME: these assignments (& the equivalent const T&/const Optional& ctors) // could be made more efficient by passing by value, possibly unifying them // with the rvalue versions above - but this could place a different set of // requirements (notably: the existence of a default ctor) when implemented // in that way. Careful SFINAE to avoid such pitfalls would be required. - Optional &operator=(const T &y) { + OptionalStorage &operator=(const T &y) { if (hasVal) - **this = y; + *getPointer() = y; else { new (storage.buffer) T(y); hasVal = true; } return *this; } - - Optional &operator=(const Optional &O) { - if (!O) + OptionalStorage &operator=(const OptionalStorage &O) { + if (!O.hasVal) reset(); else - *this = *O; + *this = *O.getPointer(); return *this; } + ~OptionalStorage() { reset(); } + void reset() { if (hasVal) { - (**this).~T(); + (*getPointer()).~T(); hasVal = false; } } - const T *getPointer() const { - assert(hasVal); - return reinterpret_cast(storage.buffer); - } T *getPointer() { assert(hasVal); return reinterpret_cast(storage.buffer); } - const T &getValue() const LLVM_LVALUE_FUNCTION { + const T *getPointer() const { assert(hasVal); - return *getPointer(); + return reinterpret_cast(storage.buffer); } - T &getValue() LLVM_LVALUE_FUNCTION { - assert(hasVal); - return *getPointer(); +}; + +#if !defined(__GNUC__) || defined(__clang__) // GCC up to GCC7 miscompiles this. +/// Storage for trivially copyable types only. +template struct OptionalStorage { + AlignedCharArrayUnion storage; + bool hasVal = false; + + OptionalStorage() = default; + + OptionalStorage(const T &y) : hasVal(true) { new (storage.buffer) T(y); } + OptionalStorage &operator=(const T &y) { + *reinterpret_cast(storage.buffer) = y; + hasVal = true; + return *this; } - explicit operator bool() const { return hasVal; } - bool hasValue() const { return hasVal; } - const T *operator->() const { return getPointer(); } - T *operator->() { return getPointer(); } - const T &operator*() const LLVM_LVALUE_FUNCTION { - assert(hasVal); - return *getPointer(); + void reset() { hasVal = false; } +}; +#endif +} // namespace optional_detail + +template class Optional { + optional_detail::OptionalStorage::value> Storage; + +public: + using value_type = T; + + constexpr Optional() {} + constexpr Optional(NoneType) {} + + Optional(const T &y) : Storage(y) {} + Optional(const Optional &O) = default; + + Optional(T &&y) : Storage(std::forward(y)) {} + Optional(Optional &&O) = default; + + Optional &operator=(T &&y) { + Storage = std::move(y); + return *this; } - T &operator*() LLVM_LVALUE_FUNCTION { - assert(hasVal); - return *getPointer(); + Optional &operator=(Optional &&O) = default; + + /// Create a new object by constructing it in place with the given arguments. + template void emplace(ArgTypes &&... Args) { + reset(); + Storage.hasVal = true; + new (getPointer()) T(std::forward(Args)...); + } + + static inline Optional create(const T *y) { + return y ? Optional(*y) : Optional(); + } + + Optional &operator=(const T &y) { + Storage = y; + return *this; + } + Optional &operator=(const Optional &O) = default; + + void reset() { Storage.reset(); } + + const T *getPointer() const { + assert(Storage.hasVal); + return reinterpret_cast(Storage.storage.buffer); + } + T *getPointer() { + assert(Storage.hasVal); + return reinterpret_cast(Storage.storage.buffer); } + const T &getValue() const LLVM_LVALUE_FUNCTION { return *getPointer(); } + T &getValue() LLVM_LVALUE_FUNCTION { return *getPointer(); } + + explicit operator bool() const { return Storage.hasVal; } + bool hasValue() const { return Storage.hasVal; } + const T *operator->() const { return getPointer(); } + T *operator->() { return getPointer(); } + const T &operator*() const LLVM_LVALUE_FUNCTION { return *getPointer(); } + T &operator*() LLVM_LVALUE_FUNCTION { return *getPointer(); } template constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION { @@ -152,14 +194,8 @@ template class Optional { } #if LLVM_HAS_RVALUE_REFERENCE_THIS - T &&getValue() && { - assert(hasVal); - return std::move(*getPointer()); - } - T &&operator*() && { - assert(hasVal); - return std::move(*getPointer()); - } + T &&getValue() && { return std::move(*getPointer()); } + T &&operator*() && { return std::move(*getPointer()); } template T getValueOr(U &&value) && { diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index bcd992b4a716..be38e5d61980 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -101,6 +101,7 @@ class function_ref { public: function_ref() = default; + function_ref(std::nullptr_t) {} template function_ref(Callable &&callable, @@ -861,6 +862,11 @@ OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P) { return std::copy_if(adl_begin(Range), adl_end(Range), Out, P); } +template +OutputIt copy(R &&Range, OutputIt Out) { + return std::copy(adl_begin(Range), adl_end(Range), Out); +} + /// Wrapper function around std::find to detect if an element exists /// in a container. template diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index a9ac98d1ad4c..3d17e70bad6d 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -339,9 +339,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase::value> { SmallVectorImpl(const SmallVectorImpl &) = delete; ~SmallVectorImpl() { - // Destroy the constructed elements in the vector. - this->destroy_range(this->begin(), this->end()); - + // Subclass has already destructed this vector's elements. // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) free(this->begin()); @@ -868,6 +866,11 @@ class SmallVector : public SmallVectorImpl { public: SmallVector() : SmallVectorImpl(N) {} + ~SmallVector() { + // Destroy the constructed elements in the vector. + this->destroy_range(this->begin(), this->end()); + } + explicit SmallVector(size_t Size, const T &Value = T()) : SmallVectorImpl(N) { this->assign(Size, Value); diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index a0e0d7d64f27..60652f8c55c5 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -78,6 +78,20 @@ inline bool isAlpha(char C) { /// lowercase letter as classified by "C" locale. inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); } +/// Returns the corresponding lowercase character if \p x is uppercase. +inline char toLower(char x) { + if (x >= 'A' && x <= 'Z') + return x - 'A' + 'a'; + return x; +} + +/// Returns the corresponding uppercase character if \p x is lowercase. +inline char toUpper(char x) { + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + return x; +} + inline std::string utohexstr(uint64_t X, bool LowerCase = false) { char Buffer[17]; char *BufPtr = std::end(Buffer); @@ -254,6 +268,9 @@ inline StringRef getOrdinalSuffix(unsigned Val) { /// it if it is not printable or if it is an escape char. void PrintEscapedString(StringRef Name, raw_ostream &Out); +/// printLowerCase - Print each character as lowercase if it is uppercase. +void printLowerCase(StringRef String, raw_ostream &Out); + namespace detail { template diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index 6c2830b44914..d34d5ed7e609 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -37,12 +37,12 @@ template class StringMapKeyIterator; /// StringMapEntryBase - Shared base class of StringMapEntry instances. class StringMapEntryBase { - unsigned StrLen; + size_t StrLen; public: - explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {} + explicit StringMapEntryBase(size_t Len) : StrLen(Len) {} - unsigned getKeyLength() const { return StrLen; } + size_t getKeyLength() const { return StrLen; } }; /// StringMapImpl - This is the base class of StringMap that is shared among @@ -127,10 +127,10 @@ class StringMapEntry : public StringMapEntryBase { public: ValueTy second; - explicit StringMapEntry(unsigned strLen) + explicit StringMapEntry(size_t strLen) : StringMapEntryBase(strLen), second() {} template - StringMapEntry(unsigned strLen, InitTy &&... InitVals) + StringMapEntry(size_t strLen, InitTy &&... InitVals) : StringMapEntryBase(strLen), second(std::forward(InitVals)...) {} StringMapEntry(StringMapEntry &E) = delete; @@ -155,13 +155,12 @@ class StringMapEntry : public StringMapEntryBase { template static StringMapEntry *Create(StringRef Key, AllocatorTy &Allocator, InitTy &&... InitVals) { - unsigned KeyLength = Key.size(); + size_t KeyLength = Key.size(); // Allocate a new item with space for the string at the end and a null // terminator. - unsigned AllocSize = static_cast(sizeof(StringMapEntry))+ - KeyLength+1; - unsigned Alignment = alignof(StringMapEntry); + size_t AllocSize = sizeof(StringMapEntry) + KeyLength + 1; + size_t Alignment = alignof(StringMapEntry); StringMapEntry *NewItem = static_cast(Allocator.Allocate(AllocSize,Alignment)); @@ -203,8 +202,7 @@ class StringMapEntry : public StringMapEntryBase { template void Destroy(AllocatorTy &Allocator) { // Free memory referenced by the item. - unsigned AllocSize = - static_cast(sizeof(StringMapEntry)) + getKeyLength() + 1; + size_t AllocSize = sizeof(StringMapEntry) + getKeyLength() + 1; this->~StringMapEntry(); Allocator.Deallocate(static_cast(this), AllocSize); } diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index 79740713f75b..73573d65e2b3 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -97,6 +97,7 @@ class TinyPtrVector { if (RHS.Val.template is()) { V->clear(); V->push_back(RHS.front()); + RHS.Val = (EltTy)nullptr; return *this; } delete V; diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 74fc8eb8ccbf..13b63738db37 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -660,9 +660,9 @@ class Triple { return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be; } - /// Tests wether the target supports comdat + /// Tests whether the target supports comdat bool supportsCOMDAT() const { - return !isOSBinFormatMachO() && !isOSBinFormatWasm(); + return !isOSBinFormatMachO(); } /// @} diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 41bb03cac07b..362096b08e13 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -95,19 +95,101 @@ enum AliasResult { /// /// This is no access at all, a modification, a reference, or both /// a modification and a reference. These are specifically structured such that -/// they form a two bit matrix and bit-tests for 'mod' or 'ref' work with any -/// of the possible values. -enum ModRefInfo { +/// they form a three bit matrix and bit-tests for 'mod' or 'ref' or 'must' +/// work with any of the possible values. +enum class ModRefInfo { + /// Must is provided for completeness, but no routines will return only + /// Must today. See definition of Must below. + Must = 0, + /// The access may reference the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustRef = 1, + /// The access may modify the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustMod = 2, + /// The access may reference, modify or both the value stored in memory, + /// a mustAlias relation was found, and no mayAlias or partialAlias found. + MustModRef = MustRef | MustMod, /// The access neither references nor modifies the value stored in memory. - MRI_NoModRef = 0, - /// The access references the value stored in memory. - MRI_Ref = 1, - /// The access modifies the value stored in memory. - MRI_Mod = 2, - /// The access both references and modifies the value stored in memory. - MRI_ModRef = MRI_Ref | MRI_Mod + NoModRef = 4, + /// The access may reference the value stored in memory. + Ref = NoModRef | MustRef, + /// The access may modify the value stored in memory. + Mod = NoModRef | MustMod, + /// The access may reference and may modify the value stored in memory. + ModRef = Ref | Mod, + + /// About Must: + /// Must is set in a best effort manner. + /// We usually do not try our best to infer Must, instead it is merely + /// another piece of "free" information that is presented when available. + /// Must set means there was certainly a MustAlias found. For calls, + /// where multiple arguments are checked (argmemonly), this translates to + /// only MustAlias or NoAlias was found. + /// Must is not set for RAR accesses, even if the two locations must + /// alias. The reason is that two read accesses translate to an early return + /// of NoModRef. An additional alias check to set Must may be + /// expensive. Other cases may also not set Must(e.g. callCapturesBefore). + /// We refer to Must being *set* when the most significant bit is *cleared*. + /// Conversely we *clear* Must information by *setting* the Must bit to 1. }; +LLVM_NODISCARD inline bool isNoModRef(const ModRefInfo MRI) { + return (static_cast(MRI) & static_cast(ModRefInfo::MustModRef)) == + static_cast(ModRefInfo::Must); +} +LLVM_NODISCARD inline bool isModOrRefSet(const ModRefInfo MRI) { + return static_cast(MRI) & static_cast(ModRefInfo::MustModRef); +} +LLVM_NODISCARD inline bool isModAndRefSet(const ModRefInfo MRI) { + return (static_cast(MRI) & static_cast(ModRefInfo::MustModRef)) == + static_cast(ModRefInfo::MustModRef); +} +LLVM_NODISCARD inline bool isModSet(const ModRefInfo MRI) { + return static_cast(MRI) & static_cast(ModRefInfo::MustMod); +} +LLVM_NODISCARD inline bool isRefSet(const ModRefInfo MRI) { + return static_cast(MRI) & static_cast(ModRefInfo::MustRef); +} +LLVM_NODISCARD inline bool isMustSet(const ModRefInfo MRI) { + return !(static_cast(MRI) & static_cast(ModRefInfo::NoModRef)); +} + +LLVM_NODISCARD inline ModRefInfo setMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) | + static_cast(ModRefInfo::MustMod)); +} +LLVM_NODISCARD inline ModRefInfo setRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) | + static_cast(ModRefInfo::MustRef)); +} +LLVM_NODISCARD inline ModRefInfo setMust(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) & + static_cast(ModRefInfo::MustModRef)); +} +LLVM_NODISCARD inline ModRefInfo setModAndRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) | + static_cast(ModRefInfo::MustModRef)); +} +LLVM_NODISCARD inline ModRefInfo clearMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) & static_cast(ModRefInfo::Ref)); +} +LLVM_NODISCARD inline ModRefInfo clearRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) & static_cast(ModRefInfo::Mod)); +} +LLVM_NODISCARD inline ModRefInfo clearMust(const ModRefInfo MRI) { + return ModRefInfo(static_cast(MRI) | + static_cast(ModRefInfo::NoModRef)); +} +LLVM_NODISCARD inline ModRefInfo unionModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast(MRI1) | static_cast(MRI2)); +} +LLVM_NODISCARD inline ModRefInfo intersectModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast(MRI1) & static_cast(MRI2)); +} + /// The locations at which a function might access memory. /// /// These are primarily used in conjunction with the \c AccessKind bits to @@ -117,11 +199,11 @@ enum FunctionModRefLocation { /// Base case is no access to memory. FMRL_Nowhere = 0, /// Access to memory via argument pointers. - FMRL_ArgumentPointees = 4, + FMRL_ArgumentPointees = 8, /// Memory that is inaccessible via LLVM IR. - FMRL_InaccessibleMem = 8, + FMRL_InaccessibleMem = 16, /// Access to any memory. - FMRL_Anywhere = 16 | FMRL_InaccessibleMem | FMRL_ArgumentPointees + FMRL_Anywhere = 32 | FMRL_InaccessibleMem | FMRL_ArgumentPointees }; /// Summary of how a function affects memory in the program. @@ -135,27 +217,31 @@ enum FunctionModRefBehavior { /// This property corresponds to the GCC 'const' attribute. /// This property corresponds to the LLVM IR 'readnone' attribute. /// This property corresponds to the IntrNoMem LLVM intrinsic flag. - FMRB_DoesNotAccessMemory = FMRL_Nowhere | MRI_NoModRef, + FMRB_DoesNotAccessMemory = + FMRL_Nowhere | static_cast(ModRefInfo::NoModRef), /// The only memory references in this function (if it has any) are /// non-volatile loads from objects pointed to by its pointer-typed /// arguments, with arbitrary offsets. /// /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag. - FMRB_OnlyReadsArgumentPointees = FMRL_ArgumentPointees | MRI_Ref, + FMRB_OnlyReadsArgumentPointees = + FMRL_ArgumentPointees | static_cast(ModRefInfo::Ref), /// The only memory references in this function (if it has any) are /// non-volatile loads and stores from objects pointed to by its /// pointer-typed arguments, with arbitrary offsets. /// /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. - FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef, + FMRB_OnlyAccessesArgumentPointees = + FMRL_ArgumentPointees | static_cast(ModRefInfo::ModRef), /// The only memory references in this function (if it has any) are /// references of memory that is otherwise inaccessible via LLVM IR. /// /// This property corresponds to the LLVM IR inaccessiblememonly attribute. - FMRB_OnlyAccessesInaccessibleMem = FMRL_InaccessibleMem | MRI_ModRef, + FMRB_OnlyAccessesInaccessibleMem = + FMRL_InaccessibleMem | static_cast(ModRefInfo::ModRef), /// The function may perform non-volatile loads and stores of objects /// pointed to by its pointer-typed arguments, with arbitrary offsets, and @@ -165,7 +251,8 @@ enum FunctionModRefBehavior { /// This property corresponds to the LLVM IR /// inaccessiblemem_or_argmemonly attribute. FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem | - FMRL_ArgumentPointees | MRI_ModRef, + FMRL_ArgumentPointees | + static_cast(ModRefInfo::ModRef), /// This function does not perform any non-local stores or volatile loads, /// but may read from any memory location. @@ -173,20 +260,30 @@ enum FunctionModRefBehavior { /// This property corresponds to the GCC 'pure' attribute. /// This property corresponds to the LLVM IR 'readonly' attribute. /// This property corresponds to the IntrReadMem LLVM intrinsic flag. - FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref, + FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast(ModRefInfo::Ref), // This function does not read from memory anywhere, but may write to any // memory location. // // This property corresponds to the LLVM IR 'writeonly' attribute. // This property corresponds to the IntrWriteMem LLVM intrinsic flag. - FMRB_DoesNotReadMemory = FMRL_Anywhere | MRI_Mod, + FMRB_DoesNotReadMemory = FMRL_Anywhere | static_cast(ModRefInfo::Mod), /// This indicates that the function could not be classified into one of the /// behaviors above. - FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef + FMRB_UnknownModRefBehavior = + FMRL_Anywhere | static_cast(ModRefInfo::ModRef) }; +// Wrapper method strips bits significant only in FunctionModRefBehavior, +// to obtain a valid ModRefInfo. The benefit of using the wrapper is that if +// ModRefInfo enum changes, the wrapper can be updated to & with the new enum +// entry with all bits set to 1. +LLVM_NODISCARD inline ModRefInfo +createModRefInfo(const FunctionModRefBehavior FMRB) { + return ModRefInfo(FMRB & static_cast(ModRefInfo::ModRef)); +} + class AAResults { public: // Make these results default constructable and movable. We have to spell @@ -286,7 +383,7 @@ class AAResults { /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note /// that these bits do not necessarily account for the overall behavior of /// the function, but rather only provide additional per-argument - /// information. + /// information. This never sets ModRefInfo::Must. ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); /// Return the behavior of the given call site. @@ -354,13 +451,13 @@ class AAResults { /// Checks if functions with the specified behavior are known to only read /// from non-volatile memory (or not access memory at all). static bool onlyReadsMemory(FunctionModRefBehavior MRB) { - return !(MRB & MRI_Mod); + return !isModSet(createModRefInfo(MRB)); } /// Checks if functions with the specified behavior are known to only write /// memory (or not access memory at all). static bool doesNotReadMemory(FunctionModRefBehavior MRB) { - return !(MRB & MRI_Ref); + return !isRefSet(createModRefInfo(MRB)); } /// Checks if functions with the specified behavior are known to read and @@ -374,7 +471,8 @@ class AAResults { /// read or write from objects pointed to be their pointer-typed arguments /// (with arbitrary offsets). static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { - return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees); + return isModOrRefSet(createModRefInfo(MRB)) && + (MRB & FMRL_ArgumentPointees); } /// Checks if functions with the specified behavior are known to read and @@ -386,7 +484,7 @@ class AAResults { /// Checks if functions with the specified behavior are known to potentially /// read or write from memory that is inaccessible from LLVM IR. static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { - return (MRB & MRI_ModRef) && (MRB & FMRL_InaccessibleMem); + return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); } /// Checks if functions with the specified behavior are known to read and @@ -520,14 +618,7 @@ class AAResults { const Optional &OptLoc) { if (OptLoc == None) { if (auto CS = ImmutableCallSite(I)) { - auto MRB = getModRefBehavior(CS); - if ((MRB & MRI_ModRef) == MRI_ModRef) - return MRI_ModRef; - if (MRB & MRI_Ref) - return MRI_Ref; - if (MRB & MRI_Mod) - return MRI_Mod; - return MRI_NoModRef; + return createModRefInfo(getModRefBehavior(CS)); } } @@ -549,7 +640,7 @@ class AAResults { case Instruction::CatchRet: return getModRefInfo((const CatchReturnInst *)I, Loc); default: - return MRI_NoModRef; + return ModRefInfo::NoModRef; } } @@ -570,8 +661,10 @@ class AAResults { /// \brief Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I - /// in a BasicBlock. A ordered basic block \p OBB can be used to speed up + /// in a BasicBlock. An ordered basic block \p OBB can be used to speed up /// instruction ordering queries inside the BasicBlock containing \p I. + /// Early exits in callCapturesBefore may lead to ModRefInfo::Must not being + /// set. ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB = nullptr); @@ -850,7 +943,7 @@ template class AAResultBase { } ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - return MRI_ModRef; + return ModRefInfo::ModRef; } FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) { @@ -862,11 +955,11 @@ template class AAResultBase { } ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - return MRI_ModRef; + return ModRefInfo::ModRef; } }; diff --git a/include/llvm/Analysis/AliasAnalysisEvaluator.h b/include/llvm/Analysis/AliasAnalysisEvaluator.h index 214574852655..cd2f631a01f4 100644 --- a/include/llvm/Analysis/AliasAnalysisEvaluator.h +++ b/include/llvm/Analysis/AliasAnalysisEvaluator.h @@ -35,19 +35,23 @@ class AAEvaluator : public PassInfoMixin { int64_t FunctionCount; int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount; int64_t NoModRefCount, ModCount, RefCount, ModRefCount; + int64_t MustCount, MustRefCount, MustModCount, MustModRefCount; public: AAEvaluator() : FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(), MustAliasCount(), NoModRefCount(), ModCount(), RefCount(), - ModRefCount() {} + ModRefCount(), MustCount(), MustRefCount(), MustModCount(), + MustModRefCount() {} AAEvaluator(AAEvaluator &&Arg) : FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount), MayAliasCount(Arg.MayAliasCount), PartialAliasCount(Arg.PartialAliasCount), MustAliasCount(Arg.MustAliasCount), NoModRefCount(Arg.NoModRefCount), ModCount(Arg.ModCount), RefCount(Arg.RefCount), - ModRefCount(Arg.ModRefCount) { + ModRefCount(Arg.ModRefCount), MustCount(Arg.MustCount), + MustRefCount(Arg.MustRefCount), MustModCount(Arg.MustModCount), + MustModRefCount(Arg.MustModRefCount) { Arg.FunctionCount = 0; } ~AAEvaluator(); diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 228934cb3013..40c40b80bc89 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1314,9 +1314,12 @@ BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, return false; } else { const BlockT *BB = getBlock(Node); - for (const auto Succ : children(BB)) - if (!addToDist(Dist, OuterLoop, Node, getNode(Succ), - getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) + for (auto SI = GraphTraits::child_begin(BB), + SE = GraphTraits::child_end(BB); + SI != SE; ++SI) + if (!addToDist( + Dist, OuterLoop, Node, getNode(*SI), + getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI)))) // Irreducible backedge. return false; } @@ -1338,7 +1341,7 @@ raw_ostream &BlockFrequencyInfoImpl::print(raw_ostream &OS) const { << ", int = " << getBlockFreq(&BB).getFrequency(); if (Optional ProfileCount = BlockFrequencyInfoImplBase::getBlockProfileCount( - *F->getFunction(), getNode(&BB))) + F->getFunction(), getNode(&BB))) OS << ", count = " << ProfileCount.getValue(); if (Optional IrrLoopHeaderWeight = BB.getIrrLoopHeaderWeight()) diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index cb314e3766cf..6d4eef412525 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -102,6 +102,13 @@ Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, Constant *ConstantFoldExtractValueInstruction(Constant *Agg, ArrayRef Idxs); +/// \brief Attempt to constant fold an insertelement instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldInsertElementInstruction(Constant *Val, + Constant *Elt, + Constant *Idx); + /// \brief Attempt to constant fold an extractelement instruction with the /// specified operands and indices. The constant result is returned if /// successful; if not, null is returned. diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index be0f32ef444a..4f896bddff87 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -161,6 +161,10 @@ Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const SimplifyQuery &Q); +/// Given operands for an InsertElement, fold the result or return null. +Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, + const SimplifyQuery &Q); + /// Given operands for an ExtractValueInst, fold the result or return null. Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, const SimplifyQuery &Q); @@ -193,6 +197,9 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given a callsite, fold the result or return null. +Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q); + /// Given a function and iterators over arguments, fold the result or return /// null. Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index d788665fc10a..293033458429 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -92,12 +92,12 @@ class ProfileSummaryInfo { bool hasHugeWorkingSetSize(); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); - /// Returns true if \p F has hot function entry or hot call edge. - bool isFunctionHotInCallGraph(const Function *F); + /// Returns true if \p F contains hot code. + bool isFunctionHotInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F has cold function entry. bool isFunctionEntryCold(const Function *F); - /// Returns true if \p F has cold function entry or cold call edge. - bool isFunctionColdInCallGraph(const Function *F); + /// Returns true if \p F contains only cold code. + bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); /// \brief Returns true if \p F is a hot function. bool isHotCount(uint64_t C); /// \brief Returns true if count \p C is considered cold. @@ -110,6 +110,14 @@ class ProfileSummaryInfo { bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// \brief Returns true if Callsite \p CS is considered cold. bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); + /// \brief Returns HotCountThreshold if set. + uint64_t getHotCountThreshold() { + return HotCountThreshold ? HotCountThreshold.getValue() : 0; + } + /// \brief Returns ColdCountThreshold if set. + uint64_t getColdCountThreshold() { + return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; + } }; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index 6e522354dd9b..eb6baac2d5e4 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -254,23 +254,23 @@ std::string RegionBase::getNameStr() const { template void RegionBase::verifyBBInRegion(BlockT *BB) const { if (!contains(BB)) - llvm_unreachable("Broken region found: enumerated BB not in region!"); + report_fatal_error("Broken region found: enumerated BB not in region!"); BlockT *entry = getEntry(), *exit = getExit(); for (BlockT *Succ : make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) { if (!contains(Succ) && exit != Succ) - llvm_unreachable("Broken region found: edges leaving the region must go " - "to the exit node!"); + report_fatal_error("Broken region found: edges leaving the region must go " + "to the exit node!"); } if (entry != BB) { for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB), InvBlockTraits::child_end(BB))) { if (!contains(Pred)) - llvm_unreachable("Broken region found: edges entering the region must " - "go to the entry node!"); + report_fatal_error("Broken region found: edges entering the region must " + "go to the entry node!"); } } } @@ -557,7 +557,7 @@ void RegionInfoBase::verifyBBMap(const RegionT *R) const { } else { BlockT *BB = Element->template getNodeAs(); if (getRegionFor(BB) != R) - llvm_unreachable("BB map does not match region nesting"); + report_fatal_error("BB map does not match region nesting"); } } } diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 96309debd84a..21b72f3e13c2 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -1272,9 +1272,6 @@ class ScalarEvolution { /// function as they are computed. DenseMap PredicatedBackedgeTakenCounts; - // Cache the calculated exit limits for the loops. - DenseMap ExitLimits; - /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid /// potentially expensive recomputation of these properties. An instruction @@ -1426,9 +1423,6 @@ class ScalarEvolution { ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates = false); - ExitLimit computeExitLimitImpl(const Loop *L, BasicBlock *ExitingBlock, - bool AllowPredicates = false); - /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of ExitCond, /// TBB, and FBB. @@ -1668,9 +1662,8 @@ class ScalarEvolution { /// to be a constant. Optional computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - /// Drop memoized information computed for S. Only erase Exit Limits info if - /// we expect that the operation we have made is going to change it. - void forgetMemoizedResults(const SCEV *S, bool EraseExitLimit = true); + /// Drop memoized information computed for S. + void forgetMemoizedResults(const SCEV *S); /// Return an existing SCEV for V if there is one, otherwise return nullptr. const SCEV *getExistingSCEV(Value *V); @@ -1891,6 +1884,11 @@ class PredicatedScalarEvolution { /// The printed text is indented by \p Depth. void print(raw_ostream &OS, unsigned Depth) const; + /// Check if \p AR1 and \p AR2 are equal, while taking into account + /// Equal predicates in Preds. + bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, + const SCEVAddRecExpr *AR2) const; + private: /// Increments the version number of the predicate. This needs to be called /// every time the SCEV predicate changes. diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 4578e0da8ab2..3df04e98bd24 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -47,7 +47,7 @@ namespace llvm { ScalarEvolution &SE; const DataLayout &DL; - // New instructions receive a name to identifies them with the current pass. + // New instructions receive a name to identify them with the current pass. const char* IVName; // InsertedExpressions caches Values for reuse, so must track RAUW. diff --git a/include/llvm/Analysis/SyntheticCountsUtils.h b/include/llvm/Analysis/SyntheticCountsUtils.h new file mode 100644 index 000000000000..b0848eaee430 --- /dev/null +++ b/include/llvm/Analysis/SyntheticCountsUtils.h @@ -0,0 +1,33 @@ +//===- SyntheticCountsUtils.h - utilities for count propagation--*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for synthetic counts propagation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H +#define LLVM_ANALYSIS_SYNTHETIC_COUNTS_UTILS_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/Support/ScaledNumber.h" + +namespace llvm { + +class CallGraph; +class Function; + +using Scaled64 = ScaledNumber; +void propagateSyntheticCounts( + const CallGraph &CG, function_ref GetCallSiteRelFreq, + function_ref GetCount, + function_ref AddToCount); +} // namespace llvm + +#endif diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 9cbe917c146d..a461ed813b9b 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -457,6 +457,15 @@ TLI_DEFINE_STRING_INTERNAL("bcopy") /// void bzero(void *s, size_t n); TLI_DEFINE_ENUM_INTERNAL(bzero) TLI_DEFINE_STRING_INTERNAL("bzero") +/// double cabs(double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabs) +TLI_DEFINE_STRING_INTERNAL("cabs") +/// float cabs(float complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsf) +TLI_DEFINE_STRING_INTERNAL("cabsf") +/// long double cabs(long double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsl) +TLI_DEFINE_STRING_INTERNAL("cabsl") /// void *calloc(size_t count, size_t size); TLI_DEFINE_ENUM_INTERNAL(calloc) TLI_DEFINE_STRING_INTERNAL("calloc") diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 90b71e93947e..c20f20cfbe4d 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -862,12 +862,6 @@ class TargetTransformInfo { unsigned SrcAlign, unsigned DestAlign) const; - /// \returns True if we want to test the new memcpy lowering functionality in - /// Transform/Utils. - /// Temporary. Will be removed once we move to the new functionality and - /// remove the old. - bool useWideIRMemcpyLoopLowering() const; - /// \returns True if the two functions have compatible attributes for inlining /// purposes. bool areInlineCompatible(const Function *Caller, diff --git a/include/llvm/Analysis/ValueLattice.h b/include/llvm/Analysis/ValueLattice.h index 18a43aafa8ca..6fb8f79aad85 100644 --- a/include/llvm/Analysis/ValueLattice.h +++ b/include/llvm/Analysis/ValueLattice.h @@ -49,14 +49,73 @@ class ValueLatticeElement { overdefined }; - /// Val: This stores the current lattice value along with the Constant* for - /// the constant if this is a 'constant' or 'notconstant' value. ValueLatticeElementTy Tag; - Constant *Val; - ConstantRange Range; + + /// The union either stores a pointer to a constant or a constant range, + /// associated to the lattice element. We have to ensure that Range is + /// initialized or destroyed when changing state to or from constantrange. + union { + Constant *ConstVal; + ConstantRange Range; + }; public: - ValueLatticeElement() : Tag(undefined), Val(nullptr), Range(1, true) {} + // Const and Range are initialized on-demand. + ValueLatticeElement() : Tag(undefined) {} + + /// Custom destructor to ensure Range is properly destroyed, when the object + /// is deallocated. + ~ValueLatticeElement() { + switch (Tag) { + case overdefined: + case undefined: + case constant: + case notconstant: + break; + case constantrange: + Range.~ConstantRange(); + break; + }; + } + + /// Custom copy constructor, to ensure Range gets initialized when + /// copying a constant range lattice element. + ValueLatticeElement(const ValueLatticeElement &Other) : Tag(undefined) { + *this = Other; + } + + /// Custom assignment operator, to ensure Range gets initialized when + /// assigning a constant range lattice element. + ValueLatticeElement &operator=(const ValueLatticeElement &Other) { + // If we change the state of this from constant range to non constant range, + // destroy Range. + if (isConstantRange() && !Other.isConstantRange()) + Range.~ConstantRange(); + + // If we change the state of this from a valid ConstVal to another a state + // without a valid ConstVal, zero the pointer. + if ((isConstant() || isNotConstant()) && !Other.isConstant() && + !Other.isNotConstant()) + ConstVal = nullptr; + + switch (Other.Tag) { + case constantrange: + if (!isConstantRange()) + new (&Range) ConstantRange(Other.Range); + else + Range = Other.Range; + break; + case constant: + case notconstant: + ConstVal = Other.ConstVal; + break; + case overdefined: + case undefined: + break; + } + Tag = Other.Tag; + return *this; + } static ValueLatticeElement get(Constant *C) { ValueLatticeElement Res; @@ -89,12 +148,12 @@ class ValueLatticeElement { Constant *getConstant() const { assert(isConstant() && "Cannot get the constant of a non-constant!"); - return Val; + return ConstVal; } Constant *getNotConstant() const { assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); - return Val; + return ConstVal; } const ConstantRange &getConstantRange() const { @@ -104,10 +163,10 @@ class ValueLatticeElement { } Optional asConstantInteger() const { - if (isConstant() && isa(Val)) { - return cast(Val)->getValue(); - } else if (isConstantRange() && Range.isSingleElement()) { - return *Range.getSingleElement(); + if (isConstant() && isa(getConstant())) { + return cast(getConstant())->getValue(); + } else if (isConstantRange() && getConstantRange().isSingleElement()) { + return *getConstantRange().getSingleElement(); } return None; } @@ -116,6 +175,10 @@ class ValueLatticeElement { void markOverdefined() { if (isOverdefined()) return; + if (isConstant() || isNotConstant()) + ConstVal = nullptr; + if (isConstantRange()) + Range.~ConstantRange(); Tag = overdefined; } @@ -132,7 +195,7 @@ class ValueLatticeElement { "Marking constant with different value"); assert(isUndefined()); Tag = constant; - Val = V; + ConstVal = V; } void markNotConstant(Constant *V) { @@ -150,7 +213,7 @@ class ValueLatticeElement { "Marking !constant with different value"); assert(isUndefined() || isConstant()); Tag = notconstant; - Val = V; + ConstVal = V; } void markConstantRange(ConstantRange NewR) { @@ -168,7 +231,7 @@ class ValueLatticeElement { markOverdefined(); else { Tag = constantrange; - Range = std::move(NewR); + new (&Range) ConstantRange(std::move(NewR)); } } @@ -189,14 +252,14 @@ class ValueLatticeElement { } if (isConstant()) { - if (RHS.isConstant() && Val == RHS.Val) + if (RHS.isConstant() && getConstant() == RHS.getConstant()) return false; markOverdefined(); return true; } if (isNotConstant()) { - if (RHS.isNotConstant() && Val == RHS.Val) + if (RHS.isNotConstant() && getNotConstant() == RHS.getNotConstant()) return false; markOverdefined(); return true; @@ -209,7 +272,7 @@ class ValueLatticeElement { markOverdefined(); return true; } - ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + ConstantRange NewR = getConstantRange().unionWith(RHS.getConstantRange()); if (NewR.isFullSet()) markOverdefined(); else diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 2fbfd3d2ffcd..1c51523b1573 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -366,6 +366,10 @@ class Value; /// operands are not memory dependent. bool mayBeMemoryDependent(const Instruction &I); + /// Return true if it is an intrinsic that cannot be speculated but also + /// cannot trap. + bool isAssumeLikeIntrinsic(const Instruction *I); + /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the /// context instruction, CxtI. diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h index b395db6eaa83..a55c544dfe90 100644 --- a/include/llvm/BinaryFormat/COFF.h +++ b/include/llvm/BinaryFormat/COFF.h @@ -91,11 +91,11 @@ struct BigObjHeader { uint32_t NumberOfSymbols; }; -enum MachineTypes { +enum MachineTypes : unsigned { MT_Invalid = 0xffff, IMAGE_FILE_MACHINE_UNKNOWN = 0x0, - IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AM33 = 0x1D3, IMAGE_FILE_MACHINE_AMD64 = 0x8664, IMAGE_FILE_MACHINE_ARM = 0x1C0, IMAGE_FILE_MACHINE_ARMNT = 0x1C4, @@ -118,7 +118,7 @@ enum MachineTypes { IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 }; -enum Characteristics { +enum Characteristics : unsigned { C_Invalid = 0, /// The file does not contain base relocations and must be loaded at its @@ -158,7 +158,7 @@ enum Characteristics { IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 }; -enum ResourceTypeID { +enum ResourceTypeID : unsigned { RID_Cursor = 1, RID_Bitmap = 2, RID_Icon = 3, @@ -234,7 +234,7 @@ enum SymbolStorageClass { IMAGE_SYM_CLASS_CLR_TOKEN = 107 }; -enum SymbolBaseType { +enum SymbolBaseType : unsigned { IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). @@ -253,7 +253,7 @@ enum SymbolBaseType { IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. }; -enum SymbolComplexType { +enum SymbolComplexType : unsigned { IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. @@ -325,7 +325,7 @@ struct relocation { uint16_t Type; }; -enum RelocationTypeI386 { +enum RelocationTypeI386 : unsigned { IMAGE_REL_I386_ABSOLUTE = 0x0000, IMAGE_REL_I386_DIR16 = 0x0001, IMAGE_REL_I386_REL16 = 0x0002, @@ -339,7 +339,7 @@ enum RelocationTypeI386 { IMAGE_REL_I386_REL32 = 0x0014 }; -enum RelocationTypeAMD64 { +enum RelocationTypeAMD64 : unsigned { IMAGE_REL_AMD64_ABSOLUTE = 0x0000, IMAGE_REL_AMD64_ADDR64 = 0x0001, IMAGE_REL_AMD64_ADDR32 = 0x0002, @@ -359,7 +359,7 @@ enum RelocationTypeAMD64 { IMAGE_REL_AMD64_SSPAN32 = 0x0010 }; -enum RelocationTypesARM { +enum RelocationTypesARM : unsigned { IMAGE_REL_ARM_ABSOLUTE = 0x0000, IMAGE_REL_ARM_ADDR32 = 0x0001, IMAGE_REL_ARM_ADDR32NB = 0x0002, @@ -377,7 +377,7 @@ enum RelocationTypesARM { IMAGE_REL_ARM_BLX23T = 0x0015 }; -enum RelocationTypesARM64 { +enum RelocationTypesARM64 : unsigned { IMAGE_REL_ARM64_ABSOLUTE = 0x0000, IMAGE_REL_ARM64_ADDR32 = 0x0001, IMAGE_REL_ARM64_ADDR32NB = 0x0002, @@ -397,7 +397,7 @@ enum RelocationTypesARM64 { IMAGE_REL_ARM64_BRANCH14 = 0x0010, }; -enum COMDATType { +enum COMDATType : unsigned { IMAGE_COMDAT_SELECT_NODUPLICATES = 1, IMAGE_COMDAT_SELECT_ANY, IMAGE_COMDAT_SELECT_SAME_SIZE, @@ -430,7 +430,7 @@ struct AuxiliaryWeakExternal { uint8_t unused[10]; }; -enum WeakExternalCharacteristics { +enum WeakExternalCharacteristics : unsigned { IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 @@ -572,7 +572,7 @@ struct DataDirectory { uint32_t Size; }; -enum DataDirectoryIndex { +enum DataDirectoryIndex : unsigned { EXPORT_TABLE = 0, IMPORT_TABLE, RESOURCE_TABLE, @@ -592,7 +592,7 @@ enum DataDirectoryIndex { NUM_DATA_DIRECTORIES }; -enum WindowsSubsystem { +enum WindowsSubsystem : unsigned { IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. @@ -611,7 +611,7 @@ enum WindowsSubsystem { IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. }; -enum DLLCharacteristics { +enum DLLCharacteristics : unsigned { /// ASLR with 64 bit address space. IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, /// DLL can be relocated at load time. @@ -637,7 +637,7 @@ enum DLLCharacteristics { IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 }; -enum DebugType { +enum DebugType : unsigned { IMAGE_DEBUG_TYPE_UNKNOWN = 0, IMAGE_DEBUG_TYPE_COFF = 1, IMAGE_DEBUG_TYPE_CODEVIEW = 2, @@ -657,7 +657,7 @@ enum DebugType { IMAGE_DEBUG_TYPE_REPRO = 16, }; -enum BaseRelocationType { +enum BaseRelocationType : unsigned { IMAGE_REL_BASED_ABSOLUTE = 0, IMAGE_REL_BASED_HIGH = 1, IMAGE_REL_BASED_LOW = 2, @@ -670,9 +670,13 @@ enum BaseRelocationType { IMAGE_REL_BASED_DIR64 = 10 }; -enum ImportType { IMPORT_CODE = 0, IMPORT_DATA = 1, IMPORT_CONST = 2 }; +enum ImportType : unsigned { + IMPORT_CODE = 0, + IMPORT_DATA = 1, + IMPORT_CONST = 2 +}; -enum ImportNameType { +enum ImportNameType : unsigned { /// Import is by ordinal. This indicates that the value in the Ordinal/Hint /// field of the import header is the import's ordinal. If this constant is /// not specified, then the Ordinal/Hint field should always be interpreted @@ -707,6 +711,7 @@ struct ImportHeader { enum CodeViewIdentifiers { DEBUG_SECTION_MAGIC = 0x4, + DEBUG_HASHES_SECTION_MAGIC = 0x133C9C5 }; inline bool isReservedSectionNumber(int32_t SectionNumber) { diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def index 3ade3ea0d338..c3e2ed718658 100644 --- a/include/llvm/BinaryFormat/Dwarf.def +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// // TODO: Add other DW-based macros. -#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT || \ - defined HANDLE_DW_FORM || defined HANDLE_DW_OP || \ - defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \ - defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ - defined HANDLE_DW_CC || defined HANDLE_DW_LNS || \ - defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT || \ - defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE || \ - defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY || \ - defined HANDLE_DW_UT || defined HANDLE_DWARF_SECTION) +#if !( \ + defined HANDLE_DW_TAG || defined HANDLE_DW_AT || defined HANDLE_DW_FORM || \ + defined HANDLE_DW_OP || defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \ + defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ + defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \ + defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \ + defined HANDLE_DW_RLE || defined HANDLE_DW_CFA || \ + defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \ + defined HANDLE_DWARF_SECTION || defined HANDLE_DW_IDX) #error "Missing macro definition of HANDLE_DW*" #endif @@ -96,6 +96,10 @@ #define HANDLE_DWARF_SECTION(ENUM_NAME, ELF_NAME, CMDLINE_NAME) #endif +#ifndef HANDLE_DW_IDX +#define HANDLE_DW_IDX(ID, NAME) +#endif + HANDLE_DW_TAG(0x0000, null, 2, DWARF) HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) @@ -839,6 +843,7 @@ HANDLE_DWARF_SECTION(DebugLine, ".debug_line", "debug-line") HANDLE_DWARF_SECTION(DebugLoc, ".debug_loc", "debug-loc") HANDLE_DWARF_SECTION(DebugFrame, ".debug_frame", "debug-frame") HANDLE_DWARF_SECTION(DebugMacro, ".debug_macro", "debug-macro") +HANDLE_DWARF_SECTION(DebugNames, ".debug_names", "debug-names") HANDLE_DWARF_SECTION(DebugRanges, ".debug_ranges", "debug-ranges") HANDLE_DWARF_SECTION(DebugPubnames, ".debug_pubnames", "debug-pubnames") HANDLE_DWARF_SECTION(DebugPubtypes, ".debug_pubtypes", "debug-pubtypes") @@ -855,6 +860,12 @@ HANDLE_DWARF_SECTION(AppleNamespaces, ".apple_namespaces", "apple-namespaces") HANDLE_DWARF_SECTION(AppleObjC, ".apple_objc", "apple-objc") HANDLE_DWARF_SECTION(GdbIndex, ".gdb_index", "gdb-index") +HANDLE_DW_IDX(0x01, compile_unit) +HANDLE_DW_IDX(0x02, type_unit) +HANDLE_DW_IDX(0x03, die_offset) +HANDLE_DW_IDX(0x04, parent) +HANDLE_DW_IDX(0x05, type_hash) + #undef HANDLE_DW_TAG #undef HANDLE_DW_AT @@ -874,3 +885,4 @@ HANDLE_DWARF_SECTION(GdbIndex, ".gdb_index", "gdb-index") #undef HANDLE_DW_APPLE_PROPERTY #undef HANDLE_DW_UT #undef HANDLE_DWARF_SECTION +#undef HANDLE_DW_IDX diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h index a0e5367b412c..6e2b2ce093c7 100644 --- a/include/llvm/BinaryFormat/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -125,7 +125,7 @@ enum LocationAtom { DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata. }; -enum TypeKind { +enum TypeKind : uint8_t { #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID, #include "llvm/BinaryFormat/Dwarf.def" DW_ATE_lo_user = 0x80, @@ -325,6 +325,13 @@ enum UnitType : unsigned char { DW_UT_hi_user = 0xff }; +enum Index { +#define HANDLE_DW_IDX(ID, NAME) DW_IDX_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_IDX_lo_user = 0x2000, + DW_IDX_hi_user = 0x3fff +}; + inline bool isUnitType(uint8_t UnitType) { switch (UnitType) { case DW_UT_compile: @@ -420,6 +427,7 @@ StringRef UnitTypeString(unsigned); StringRef AtomTypeString(unsigned Atom); StringRef GDBIndexEntryKindString(GDBIndexEntryKind Kind); StringRef GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage); +StringRef IndexString(unsigned Idx); /// @} /// \defgroup DwarfConstantsParsing Dwarf constants parsing functions diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index 5cedd99fdc01..c902972d93bd 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -584,6 +584,7 @@ enum { EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55 EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60 EF_HEXAGON_MACH_V62 = 0x00000062, // Hexagon V62 + EF_HEXAGON_MACH_V65 = 0x00000065, // Hexagon V65 // Highest ISA version flags EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0] @@ -595,6 +596,7 @@ enum { EF_HEXAGON_ISA_V55 = 0x00000050, // Hexagon V55 ISA EF_HEXAGON_ISA_V60 = 0x00000060, // Hexagon V60 ISA EF_HEXAGON_ISA_V62 = 0x00000062, // Hexagon V62 ISA + EF_HEXAGON_ISA_V65 = 0x00000065, // Hexagon V65 ISA }; // Hexagon-specific section indexes for common small data diff --git a/include/llvm/BinaryFormat/ELFRelocs/AVR.def b/include/llvm/BinaryFormat/ELFRelocs/AVR.def index 5692d6cb9aa0..696fc60b0f5a 100644 --- a/include/llvm/BinaryFormat/ELFRelocs/AVR.def +++ b/include/llvm/BinaryFormat/ELFRelocs/AVR.def @@ -33,8 +33,9 @@ ELF_RELOC(R_AVR_8, 26) ELF_RELOC(R_AVR_8_LO8, 27) ELF_RELOC(R_AVR_8_HI8, 28) ELF_RELOC(R_AVR_8_HLO8, 29) -ELF_RELOC(R_AVR_SYM_DIFF, 30) -ELF_RELOC(R_AVR_16_LDST, 31) +ELF_RELOC(R_AVR_DIFF8, 30) +ELF_RELOC(R_AVR_DIFF16, 31) +ELF_RELOC(R_AVR_DIFF32, 32) ELF_RELOC(R_AVR_LDS_STS_16, 33) ELF_RELOC(R_AVR_PORT6, 34) ELF_RELOC(R_AVR_PORT5, 35) diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h index 7293ed78dfd3..060fbe162ad2 100644 --- a/include/llvm/BinaryFormat/MachO.h +++ b/include/llvm/BinaryFormat/MachO.h @@ -481,7 +481,7 @@ enum RelocationInfoType { enum { VM_PROT_READ = 0x1, VM_PROT_WRITE = 0x2, VM_PROT_EXECUTE = 0x4 }; // Values for platform field in build_version_command. -enum { +enum PlatformType { PLATFORM_MACOS = 1, PLATFORM_IOS = 2, PLATFORM_TVOS = 3, diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index 26475c27df38..d2ebe187cea6 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -66,6 +66,7 @@ struct WasmInitExpr { }; struct WasmGlobal { + uint32_t Index; int32_t Type; bool Mutable; WasmInitExpr InitExpr; @@ -89,8 +90,13 @@ struct WasmLocalDecl { }; struct WasmFunction { + uint32_t Index; std::vector Locals; ArrayRef Body; + uint32_t CodeSectionOffset; + uint32_t Size; + StringRef Name; // from the "names" section + StringRef Comdat; // from the "comdat info" section }; struct WasmDataSegment { @@ -100,6 +106,7 @@ struct WasmDataSegment { StringRef Name; uint32_t Alignment; uint32_t Flags; + StringRef Comdat; // from the "comdat info" section }; struct WasmElemSegment { @@ -110,13 +117,24 @@ struct WasmElemSegment { struct WasmRelocation { uint32_t Type; // The type of the relocation. - uint32_t Index; // Index into function to global index space. + uint32_t Index; // Index into function or global index space. uint64_t Offset; // Offset from the start of the section. int64_t Addend; // A value to add to the symbol. }; +struct WasmInitFunc { + uint32_t Priority; + uint32_t FunctionIndex; +}; + +struct WasmFunctionName { + uint32_t Index; + StringRef Name; +}; + struct WasmLinkingData { uint32_t DataSize; + std::vector InitFunctions; }; enum : unsigned { @@ -163,11 +181,6 @@ enum : unsigned { WASM_OPCODE_F64_CONST = 0x44, }; -enum : unsigned { - WASM_NAMES_FUNCTION = 0x1, - WASM_NAMES_LOCAL = 0x2, -}; - enum : unsigned { WASM_LIMITS_FLAG_HAS_MAX = 0x1, }; @@ -180,27 +193,40 @@ enum class ValType { F64 = WASM_TYPE_F64, }; -// Linking metadata kinds. +// Kind codes used in the custom "name" section +enum : unsigned { + WASM_NAMES_FUNCTION = 0x1, + WASM_NAMES_LOCAL = 0x2, +}; + +// Kind codes used in the custom "linking" section enum : unsigned { - WASM_STACK_POINTER = 0x1, WASM_SYMBOL_INFO = 0x2, WASM_DATA_SIZE = 0x3, - WASM_DATA_ALIGNMENT = 0x4, WASM_SEGMENT_INFO = 0x5, + WASM_INIT_FUNCS = 0x6, + WASM_COMDAT_INFO = 0x7, }; -const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; - +// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO enum : unsigned { - WASM_SYMBOL_BINDING_GLOBAL = 0x0, - WASM_SYMBOL_BINDING_WEAK = 0x1, - WASM_SYMBOL_BINDING_LOCAL = 0x2, + WASM_COMDAT_DATA = 0x0, + WASM_COMDAT_FUNCTION = 0x1, }; +const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; +const unsigned WASM_SYMBOL_VISIBILITY_MASK = 0x4; + +const unsigned WASM_SYMBOL_BINDING_GLOBAL = 0x0; +const unsigned WASM_SYMBOL_BINDING_WEAK = 0x1; +const unsigned WASM_SYMBOL_BINDING_LOCAL = 0x2; +const unsigned WASM_SYMBOL_VISIBILITY_DEFAULT = 0x0; +const unsigned WASM_SYMBOL_VISIBILITY_HIDDEN = 0x4; + #define WASM_RELOC(name, value) name = value, enum : unsigned { -#include "WasmRelocs/WebAssembly.def" +#include "WasmRelocs.def" }; #undef WASM_RELOC diff --git a/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def b/include/llvm/BinaryFormat/WasmRelocs.def similarity index 100% rename from include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def rename to include/llvm/BinaryFormat/WasmRelocs.def diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 9f869639399c..70194c043479 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -560,6 +560,7 @@ enum AttributeKindCodes { ATTR_KIND_WRITEONLY = 52, ATTR_KIND_SPECULATABLE = 53, ATTR_KIND_STRICT_FP = 54, + ATTR_KIND_SANITIZE_HWADDRESS = 55, }; enum ComdatSelectionKindCodes { diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 1d65f703b84d..282d1a626f62 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -295,6 +295,8 @@ class AsmPrinter : public MachineFunctionPass { void emitFrameAlloc(const MachineInstr &MI); + void emitStackSizeSection(const MachineFunction &MF); + enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug }; CFIMoveType needsCFIMoves() const; @@ -506,7 +508,12 @@ class AsmPrinter : public MachineFunctionPass { /// When possible, emit a DwarfStringPool section offset without any /// relocations, and without using the symbol. Otherwise, defers to \a /// emitDwarfSymbolReference(). - void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const; + void emitDwarfStringOffset(DwarfStringPoolEntry S) const; + + /// Emit the 4-byte offset of a string from the start of its section. + void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { + emitDwarfStringOffset(S.getEntry()); + } /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified. virtual unsigned getISAEncoding() { return 0; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index bb5e7f9e8e30..526ddb1b9706 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -302,9 +302,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } unsigned getFPOpCost(Type *Ty) { - // By default, FP instructions are no more expensive since they are - // implemented in HW. Target specific TTI can override this. - return TargetTransformInfo::TCC_Basic; + // Check whether FADD is available, as a proxy for floating-point in + // general. + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(DL, Ty); + if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) + return TargetTransformInfo::TCC_Basic; + return TargetTransformInfo::TCC_Expensive; } unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index e1d0b0c77cfb..d30a27328c01 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -201,6 +201,7 @@ class CCState { unsigned MaxStackArgAlign; SmallVector UsedRegs; SmallVector PendingLocs; + SmallVector PendingArgFlags; // ByValInfo and SmallVector ByValRegs: // @@ -508,6 +509,11 @@ class CCState { return PendingLocs; } + // Get a list of argflags for pending assignments. + SmallVectorImpl &getPendingArgFlags() { + return PendingArgFlags; + } + /// Compute the remaining unused register parameters that would be used for /// the given value type. This is useful when varargs are passed in the /// registers that normal prototyped parameters would be passed in, or for diff --git a/include/llvm/CodeGen/CommandFlags.def b/include/llvm/CodeGen/CommandFlags.def index 83cbeb0341fa..d7a5c946ad54 100644 --- a/include/llvm/CodeGen/CommandFlags.def +++ b/include/llvm/CodeGen/CommandFlags.def @@ -255,6 +255,10 @@ static cl::opt DebuggerTuningOpt( clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"), clEnumValN(DebuggerKind::SCE, "sce", "SCE targets (e.g. PS4)"))); +static cl::opt EnableStackSizeSection( + "stack-size-section", + cl::desc("Emit a section containing stack size metadata"), cl::init(false)); + // Common utility function tightly tied to the options listed here. Initializes // a TargetOptions object with CodeGen flags and returns it. static TargetOptions InitTargetOptionsFromCodeGenFlags() { @@ -281,6 +285,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() { Options.UniqueSectionNames = UniqueSectionNames; Options.EmulatedTLS = EmulatedTLS; Options.ExceptionModel = ExceptionModel; + Options.EmitStackSizeSection = EnableStackSizeSection; Options.MCOptions = InitMCTargetOptionsFromFlags(); @@ -321,6 +326,26 @@ LLVM_ATTRIBUTE_UNUSED static std::string getFeaturesStr() { return Features.getString(); } +LLVM_ATTRIBUTE_UNUSED static std::vector getFeatureList() { + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + // not all Sandybridge processors support AVX. + if (MCPU == "native") { + StringMap HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + + return Features.getFeatures(); +} + /// \brief Set function attributes of functions in Module M based on CPU, /// Features, and command line flags. LLVM_ATTRIBUTE_UNUSED static void diff --git a/include/llvm/CodeGen/DwarfStringPoolEntry.h b/include/llvm/CodeGen/DwarfStringPoolEntry.h index fc2b5ddd2d2c..e6c0483cfc35 100644 --- a/include/llvm/CodeGen/DwarfStringPoolEntry.h +++ b/include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -41,6 +41,8 @@ struct DwarfStringPoolEntryRef { unsigned getOffset() const { return I->second.Offset; } unsigned getIndex() const { return I->second.Index; } StringRef getString() const { return I->first(); } + /// Return the entire string pool entry for convenience. + DwarfStringPoolEntry getEntry() const { return I->getValue(); } bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; } bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; } diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h deleted file mode 100644 index f4db8b7322da..000000000000 --- a/include/llvm/CodeGen/ExecutionDepsFix.h +++ /dev/null @@ -1,230 +0,0 @@ -//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file Execution Dependency Fix pass. -/// -/// Some X86 SSE instructions like mov, and, or, xor are available in different -/// variants for different operand types. These variant instructions are -/// equivalent, but on Nehalem and newer cpus there is extra latency -/// transferring data between integer and floating point domains. ARM cores -/// have similar issues when they are configured with both VFP and NEON -/// pipelines. -/// -/// This pass changes the variant instructions to minimize domain crossings. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H -#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/MathExtras.h" -#include -#include -#include -#include - -namespace llvm { - -class MachineBasicBlock; -class MachineInstr; -class TargetInstrInfo; - -/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track -/// of execution domains. -/// -/// An open DomainValue represents a set of instructions that can still switch -/// execution domain. Multiple registers may refer to the same open -/// DomainValue - they will eventually be collapsed to the same execution -/// domain. -/// -/// A collapsed DomainValue represents a single register that has been forced -/// into one of more execution domains. There is a separate collapsed -/// DomainValue for each register, but it may contain multiple execution -/// domains. A register value is initially created in a single execution -/// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact that the register is now available in multiple -/// domains. -struct DomainValue { - // Basic reference counting. - unsigned Refs = 0; - - // Bitmask of available domains. For an open DomainValue, it is the still - // possible domains for collapsing. For a collapsed DomainValue it is the - // domains where the register is available for free. - unsigned AvailableDomains; - - // Pointer to the next DomainValue in a chain. When two DomainValues are - // merged, Victim.Next is set to point to Victor, so old DomainValue - // references can be updated by following the chain. - DomainValue *Next; - - // Twiddleable instructions using or defining these registers. - SmallVector Instrs; - - DomainValue() { clear(); } - - // A collapsed DomainValue has no instructions to twiddle - it simply keeps - // track of the domains where the registers are already available. - bool isCollapsed() const { return Instrs.empty(); } - - // Is domain available? - bool hasDomain(unsigned domain) const { - assert(domain < - static_cast(std::numeric_limits::digits) && - "undefined behavior"); - return AvailableDomains & (1u << domain); - } - - // Mark domain as available. - void addDomain(unsigned domain) { - AvailableDomains |= 1u << domain; - } - - // Restrict to a single domain available. - void setSingleDomain(unsigned domain) { - AvailableDomains = 1u << domain; - } - - // Return bitmask of domains that are available and in mask. - unsigned getCommonDomains(unsigned mask) const { - return AvailableDomains & mask; - } - - // First domain available. - unsigned getFirstDomain() const { - return countTrailingZeros(AvailableDomains); - } - - // Clear this DomainValue and point to next which has all its data. - void clear() { - AvailableDomains = 0; - Next = nullptr; - Instrs.clear(); - } -}; - -/// Information about a live register. -struct LiveReg { - /// Value currently in this register, or NULL when no value is being tracked. - /// This counts as a DomainValue reference. - DomainValue *Value; - - /// Instruction that defined this register, relative to the beginning of the - /// current basic block. When a LiveReg is used to represent a live-out - /// register, this value is relative to the end of the basic block, so it - /// will be a negative number. - int Def; -}; - -class ExecutionDepsFix : public MachineFunctionPass { - SpecificBumpPtrAllocator Allocator; - SmallVector Avail; - - const TargetRegisterClass *const RC; - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RegClassInfo; - std::vector> AliasMap; - const unsigned NumRegs; - LiveReg *LiveRegs; - struct MBBInfo { - // Keeps clearance and domain information for all registers. Note that this - // is different from the usual definition notion of liveness. The CPU - // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs = nullptr; - - // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted = false; - - // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed = 0; - - // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming = 0; - - // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted = 0; - - MBBInfo() = default; - }; - using MBBInfoMap = DenseMap; - MBBInfoMap MBBInfos; - - /// List of undefined register reads in this block in forward order. - std::vector> UndefReads; - - /// Storage for register unit liveness. - LivePhysRegs LiveRegSet; - - /// Current instruction number. - /// The first instruction in each basic block is 0. - int CurInstr; - -public: - ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) - : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); - } - -private: - iterator_range::const_iterator> - regIndices(unsigned Reg) const; - // DomainValue allocation. - DomainValue *alloc(int domain = -1); - DomainValue *retain(DomainValue *DV) { - if (DV) ++DV->Refs; - return DV; - } - void release(DomainValue*); - DomainValue *resolve(DomainValue*&); - - // LiveRegs manipulations. - void setLiveReg(int rx, DomainValue *DV); - void kill(int rx); - void force(int rx, unsigned domain); - void collapse(DomainValue *dv, unsigned domain); - bool merge(DomainValue *A, DomainValue *B); - - void enterBasicBlock(MachineBasicBlock*); - void leaveBasicBlock(MachineBasicBlock*); - bool isBlockDone(MachineBasicBlock *); - void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); - bool visitInstr(MachineInstr *); - void processDefs(MachineInstr *, bool breakDependency, bool Kill); - void visitSoftInstr(MachineInstr*, unsigned mask); - void visitHardInstr(MachineInstr*, unsigned domain); - bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, - unsigned Pref); - bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); - void processUndefReads(MachineBasicBlock*); -}; - -} // end namepsace llvm - -#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H diff --git a/include/llvm/CodeGen/ExecutionDomainFix.h b/include/llvm/CodeGen/ExecutionDomainFix.h new file mode 100644 index 000000000000..338c214dd073 --- /dev/null +++ b/include/llvm/CodeGen/ExecutionDomainFix.h @@ -0,0 +1,213 @@ +//==-- llvm/CodeGen/ExecutionDomainFix.h - Execution Domain Fix -*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Execution Domain Fix pass. +/// +/// Some X86 SSE instructions like mov, and, or, xor are available in different +/// variants for different operand types. These variant instructions are +/// equivalent, but on Nehalem and newer cpus there is extra latency +/// transferring data between integer and floating point domains. ARM cores +/// have similar issues when they are configured with both VFP and NEON +/// pipelines. +/// +/// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXECUTIONDOMAINFIX_H +#define LLVM_CODEGEN_EXECUTIONDOMAINFIX_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; +class TargetInstrInfo; + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact that the register is now available in multiple +/// domains. +struct DomainValue { + /// Basic reference counting. + unsigned Refs = 0; + + /// Bitmask of available domains. For an open DomainValue, it is the still + /// possible domains for collapsing. For a collapsed DomainValue it is the + /// domains where the register is available for free. + unsigned AvailableDomains; + + /// Pointer to the next DomainValue in a chain. When two DomainValues are + /// merged, Victim.Next is set to point to Victor, so old DomainValue + /// references can be updated by following the chain. + DomainValue *Next; + + /// Twiddleable instructions using or defining these registers. + SmallVector Instrs; + + DomainValue() { clear(); } + + /// A collapsed DomainValue has no instructions to twiddle - it simply keeps + /// track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + /// Is domain available? + bool hasDomain(unsigned domain) const { + assert(domain < + static_cast(std::numeric_limits::digits) && + "undefined behavior"); + return AvailableDomains & (1u << domain); + } + + /// Mark domain as available. + void addDomain(unsigned domain) { AvailableDomains |= 1u << domain; } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { AvailableDomains = 1u << domain; } + + /// Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + /// First domain available. + unsigned getFirstDomain() const { + return countTrailingZeros(AvailableDomains); + } + + /// Clear this DomainValue and point to next which has all its data. + void clear() { + AvailableDomains = 0; + Next = nullptr; + Instrs.clear(); + } +}; + +class ExecutionDomainFix : public MachineFunctionPass { + SpecificBumpPtrAllocator Allocator; + SmallVector Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + std::vector> AliasMap; + const unsigned NumRegs; + /// Value currently in each register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + using LiveRegsDVInfo = std::vector; + LiveRegsDVInfo LiveRegs; + /// Keeps domain information for all registers. Note that this + /// is different from the usual definition notion of liveness. The CPU + /// doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = SmallVector; + OutRegsInfoMap MBBOutRegsInfos; + + ReachingDefAnalysis *RDA; + +public: + ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC) + : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Translate TRI register number to a list of indices into our smaller tables + /// of interesting registers. + iterator_range::const_iterator> + regIndices(unsigned Reg) const; + + /// DomainValue allocation. + DomainValue *alloc(int domain = -1); + + /// Add reference to DV. + DomainValue *retain(DomainValue *DV) { + if (DV) + ++DV->Refs; + return DV; + } + + /// Release a reference to DV. When the last reference is released, + /// collapse if needed. + void release(DomainValue *); + + /// Follow the chain of dead DomainValues until a live DomainValue is reached. + /// Update the referenced pointer when necessary. + DomainValue *resolve(DomainValue *&); + + /// Set LiveRegs[rx] = dv, updating reference counts. + void setLiveReg(int rx, DomainValue *DV); + + /// Kill register rx, recycle or collapse any DomainValue. + void kill(int rx); + + /// Force register rx into domain. + void force(int rx, unsigned domain); + + /// Collapse open DomainValue into given domain. If there are multiple + /// registers using dv, they each get a unique collapsed DomainValue. + void collapse(DomainValue *dv, unsigned domain); + + /// All instructions and registers in B are moved to A, and B is released. + bool merge(DomainValue *A, DomainValue *B); + + /// Set up LiveRegs by merging predecessor live-out values. + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update live-out values. + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Process he given basic block. + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Visit given insturcion. + bool visitInstr(MachineInstr *); + + /// Update def-ages for registers defined by MI. + /// If Kill is set, also kill off DomainValues clobbered by the defs. + void processDefs(MachineInstr *, bool Kill); + + /// A soft instruction can be changed to work in other domains given by mask. + void visitSoftInstr(MachineInstr *, unsigned mask); + + /// A hard instruction only works in one domain. All input registers will be + /// forced into that domain. + void visitHardInstr(MachineInstr *, unsigned domain); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_EXECUTIONDOMAINFIX_H diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 550e45a4be2a..ae0055ce6919 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -111,9 +111,12 @@ enum { /// - InsnID - Instruction ID /// - The predicate to test GIM_CheckAPFloatImmPredicate, - /// Check a memory operation is non-atomic. + /// Check a memory operation has the specified atomic ordering. /// - InsnID - Instruction ID - GIM_CheckNonAtomic, + /// - Ordering - The AtomicOrdering value + GIM_CheckAtomicOrdering, + GIM_CheckAtomicOrderingOrStrongerThan, + GIM_CheckAtomicOrderingWeakerThan, /// Check the type for the specified operand /// - InsnID - Instruction ID @@ -232,6 +235,11 @@ enum { /// - RendererID - The renderer to call /// - RenderOpID - The suboperand to render. GIR_ComplexSubOperandRenderer, + /// Render operands to the specified instruction using a custom function + /// - InsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to get the matched operand from + /// - RendererFnID - Custom renderer function to call + GIR_CustomRenderer, /// Render a G_CONSTANT operator as a sign-extended immediate. /// - NewInsnID - Instruction ID to modify @@ -279,10 +287,6 @@ enum { /// Provides the logic to select generic machine instructions. class InstructionSelector { public: - using I64ImmediatePredicateFn = bool (*)(int64_t); - using APIntImmediatePredicateFn = bool (*)(const APInt &); - using APFloatImmediatePredicateFn = bool (*)(const APFloat &); - virtual ~InstructionSelector() = default; /// Select the (possibly generic) instruction \p I to only use target-specific @@ -312,14 +316,13 @@ class InstructionSelector { }; public: - template - struct MatcherInfoTy { + template + struct ISelInfoTy { const LLT *TypeObjects; const PredicateBitset *FeatureBitsets; - const I64ImmediatePredicateFn *I64ImmPredicateFns; - const APIntImmediatePredicateFn *APIntImmPredicateFns; - const APFloatImmediatePredicateFn *APFloatImmPredicateFns; const ComplexMatcherMemFn *ComplexPredicates; + const CustomRendererFn *CustomRenderers; }; protected: @@ -328,15 +331,26 @@ class InstructionSelector { /// Execute a given matcher table and return true if the match was successful /// and false otherwise. template + class ComplexMatcherMemFn, class CustomRendererFn> bool executeMatchTable( TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, - const MatcherInfoTy &MatcherInfo, + const ISelInfoTy + &ISelInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, CodeGenCoverage &CoverageInfo) const; + virtual bool testImmPredicate_I64(unsigned, int64_t) const { + llvm_unreachable("Subclasses must override this to use tablegen"); + } + virtual bool testImmPredicate_APInt(unsigned, const APInt &) const { + llvm_unreachable("Subclasses must override this to use tablegen"); + } + virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const { + llvm_unreachable("Subclasses must override this to use tablegen"); + } + /// Constrain a register operand of an instruction \p I to a specified /// register class. This could involve inserting COPYs before (for uses) or /// after (for defs) and may replace the operand of \p I. @@ -347,20 +361,6 @@ class InstructionSelector { const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const; - /// Mutate the newly-selected instruction \p I to constrain its (possibly - /// generic) virtual register operands to the instruction's register class. - /// This could involve inserting COPYs before (for uses) or after (for defs). - /// This requires the number of operands to match the instruction description. - /// \returns whether operand regclass constraining succeeded. - /// - // FIXME: Not all instructions have the same number of operands. We should - // probably expose a constrain helper per operand and let the target selector - // constrain individual registers, like fast-isel. - bool constrainSelectedInstRegOperands(MachineInstr &I, - const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; - bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index bf2cf734efef..460bfcca37ab 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -43,10 +44,11 @@ enum { }; template + class ComplexMatcherMemFn, class CustomRendererFn> bool InstructionSelector::executeMatchTable( TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, - const MatcherInfoTy &MatcherInfo, + const ISelInfoTy + &ISelInfo, const int64_t *MatchTable, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures, @@ -124,8 +126,8 @@ bool InstructionSelector::executeMatchTable( dbgs() << CurrentIdx << ": GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID << ")\n"); - if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) != - MatcherInfo.FeatureBitsets[ExpectedBitsetID]) { + if ((AvailableFeatures & ISelInfo.FeatureBitsets[ExpectedBitsetID]) != + ISelInfo.FeatureBitsets[ExpectedBitsetID]) { if (handleReject() == RejectAndGiveUp) return false; } @@ -181,7 +183,7 @@ bool InstructionSelector::executeMatchTable( else llvm_unreachable("Expected Imm or CImm operand"); - if (!MatcherInfo.I64ImmPredicateFns[Predicate](Value)) + if (!testImmPredicate_I64(Predicate, Value)) if (handleReject() == RejectAndGiveUp) return false; break; @@ -202,7 +204,7 @@ bool InstructionSelector::executeMatchTable( else llvm_unreachable("Expected Imm or CImm operand"); - if (!MatcherInfo.APIntImmPredicateFns[Predicate](Value)) + if (!testImmPredicate_APInt(Predicate, Value)) if (handleReject() == RejectAndGiveUp) return false; break; @@ -221,32 +223,67 @@ bool InstructionSelector::executeMatchTable( assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate"); APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF(); - if (!MatcherInfo.APFloatImmPredicateFns[Predicate](Value)) + if (!testImmPredicate_APFloat(Predicate, Value)) if (handleReject() == RejectAndGiveUp) return false; break; } - case GIM_CheckNonAtomic: { + case GIM_CheckAtomicOrdering: { int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIM_CheckNonAtomic(MIs[" - << InsnID << "])\n"); + dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (MMO->getOrdering() != Ordering) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAtomicOrderingOrStrongerThan: { + int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - assert((State.MIs[InsnID]->getOpcode() == TargetOpcode::G_LOAD || - State.MIs[InsnID]->getOpcode() == TargetOpcode::G_STORE) && - "Expected G_LOAD/G_STORE"); if (!State.MIs[InsnID]->hasOneMemOperand()) if (handleReject() == RejectAndGiveUp) return false; for (const auto &MMO : State.MIs[InsnID]->memoperands()) - if (MMO->getOrdering() != AtomicOrdering::NotAtomic) + if (!isAtLeastOrStrongerThan(MMO->getOrdering(), Ordering)) if (handleReject() == RejectAndGiveUp) return false; break; } + case GIM_CheckAtomicOrderingWeakerThan: { + int64_t InsnID = MatchTable[CurrentIdx++]; + AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckAtomicOrderingWeakerThan(MIs[" + << InsnID << "], " << (uint64_t)Ordering << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (!isStrongerThan(Ordering, MMO->getOrdering())) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckType: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -257,7 +294,7 @@ bool InstructionSelector::executeMatchTable( << "), TypeID=" << TypeID << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) != - MatcherInfo.TypeObjects[TypeID]) { + ISelInfo.TypeObjects[TypeID]) { if (handleReject() == RejectAndGiveUp) return false; } @@ -321,7 +358,7 @@ bool InstructionSelector::executeMatchTable( assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); // FIXME: Use std::invoke() when it's available. ComplexRendererFns Renderer = - (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])( State.MIs[InsnID]->getOperand(OpIdx)); if (Renderer.hasValue()) State.Renderers[RendererID] = Renderer.getValue(); @@ -340,6 +377,11 @@ bool InstructionSelector::executeMatchTable( << InsnID << "]->getOperand(" << OpIdx << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // isOperandImmEqual() will sign-extend to 64-bits, so should we. + LLT Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); + Value = SignExtend64(Value, Ty.getSizeInBits()); + if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, MRI)) { if (handleReject() == RejectAndGiveUp) @@ -609,6 +651,19 @@ bool InstructionSelector::executeMatchTable( break; } + case GIR_CustomRenderer: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t RendererFnID = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs[" + << InsnID << "], MIs[" << OldInsnID << "], " + << RendererFnID << ")\n"); + (ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID], + *State.MIs[OldInsnID]); + break; + } case GIR_ConstrainOperandRC: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -670,7 +725,7 @@ bool InstructionSelector::executeMatchTable( int64_t TypeID = MatchTable[CurrentIdx++]; State.TempRegisters[TempRegID] = - MRI.createGenericVirtualRegister(MatcherInfo.TypeObjects[TypeID]); + MRI.createGenericVirtualRegister(ISelInfo.TypeObjects[TypeID]); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": TempRegs[" << TempRegID << "] = GIR_MakeTempReg(" << TypeID << ")\n"); diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index b6735d538b37..9ee428a93796 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -121,8 +121,8 @@ class LegalizerInfo { } } - typedef std::pair SizeAndAction; - typedef std::vector SizeAndActionsVec; + using SizeAndAction = std::pair; + using SizeAndActionsVec = std::vector; using SizeChangeStrategy = std::function; @@ -441,7 +441,7 @@ class LegalizerInfo { static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; // Data structures used temporarily during construction of legality data: - typedef DenseMap TypeMap; + using TypeMap = DenseMap; SmallVector SpecifiedActions[LastOp - FirstOp + 1]; SmallVector ScalarSizeChangeStrategies[LastOp - FirstOp + 1]; diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 5fe3137d6d70..aa875c11d86f 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -204,7 +204,7 @@ class MachineIRBuilder { const MDNode *Variable, const MDNode *Expr); - /// Build and insert \p Res = G_FRAME_INDEX \p Idx + /// Build and insert \p Res = G_FRAME_INDEX \p Idx /// /// G_FRAME_INDEX materializes the address of an alloca value or other /// stack-based object. @@ -215,7 +215,7 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx); - /// Build and insert \p Res = G_GLOBAL_VALUE \p GV + /// Build and insert \p Res = G_GLOBAL_VALUE \p GV /// /// G_GLOBAL_VALUE materializes the address of the specified global /// into \p Res. @@ -227,7 +227,7 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV); - /// Build and insert \p Res = G_ADD \p Op0, \p Op1 + /// Build and insert \p Res = G_ADD \p Op0, \p Op1 /// /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -245,7 +245,7 @@ class MachineIRBuilder { return buildAdd(Res, (getRegFromArg(UseArgs))...); } - /// Build and insert \p Res = G_SUB \p Op0, \p Op1 + /// Build and insert \p Res = G_SUB \p Op0, \p Op1 /// /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -258,7 +258,7 @@ class MachineIRBuilder { MachineInstrBuilder buildSub(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res = G_MUL \p Op0, \p Op1 + /// Build and insert \p Res = G_MUL \p Op0, \p Op1 /// /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1, /// truncated to their width. @@ -271,7 +271,7 @@ class MachineIRBuilder { MachineInstrBuilder buildMul(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res = G_GEP \p Op0, \p Op1 + /// Build and insert \p Res = G_GEP \p Op0, \p Op1 /// /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0, /// storing the resulting pointer in \p Res. @@ -285,7 +285,7 @@ class MachineIRBuilder { MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1); - /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value) + /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value) /// /// G_GEP adds \p Value bytes to the pointer specified by \p Op0, /// storing the resulting pointer in \p Res. If \p Value is zero then no @@ -305,7 +305,7 @@ class MachineIRBuilder { const LLT &ValueTy, uint64_t Value); - /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits + /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits /// /// G_PTR_MASK clears the low bits of a pointer operand without destroying its /// pointer properties. This has the effect of rounding the address *down* to @@ -321,7 +321,7 @@ class MachineIRBuilder { MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits); - /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0, + /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0, /// \p Op1, \p CarryIn /// /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit @@ -338,7 +338,7 @@ class MachineIRBuilder { MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0, unsigned Op1, unsigned CarryIn); - /// Build and insert \p Res = G_AND \p Op0, \p Op1 + /// Build and insert \p Res = G_AND \p Op0, \p Op1 /// /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p /// Op1. @@ -355,7 +355,7 @@ class MachineIRBuilder { MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res = G_OR \p Op0, \p Op1 + /// Build and insert \p Res = G_OR \p Op0, \p Op1 /// /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p /// Op1. @@ -367,7 +367,7 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1); - /// Build and insert \p Res = G_ANYEXT \p Op0 + /// Build and insert \p Res = G_ANYEXT \p Op0 /// /// G_ANYEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are unspecified @@ -387,7 +387,7 @@ class MachineIRBuilder { return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg)); } - /// Build and insert \p Res = G_SEXT \p Op + /// Build and insert \p Res = G_SEXT \p Op /// /// G_SEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are duplicated from the @@ -401,7 +401,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildSExt(unsigned Res, unsigned Op); - /// Build and insert \p Res = G_ZEXT \p Op + /// Build and insert \p Res = G_ZEXT \p Op /// /// G_ZEXT produces a register of the specified width, with bits 0 to /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are 0. For a vector @@ -415,7 +415,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildZExt(unsigned Res, unsigned Op); - /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or + /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. /// /// /// \pre setBasicBlock or setMI must have been called. @@ -425,7 +425,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op); - /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or + /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. /// /// /// \pre setBasicBlock or setMI must have been called. @@ -435,7 +435,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op); - // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or + // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op. /// /// /// \pre setBasicBlock or setMI must have been called. @@ -449,7 +449,7 @@ class MachineIRBuilder { } MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op); - /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p + /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p /// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and /// \p Op. /// /// @@ -534,7 +534,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val); - /// Build and insert \p Res = COPY Op + /// Build and insert \p Res = COPY Op /// /// Register-to-register COPY sets \p Res to \p Op. /// @@ -547,7 +547,7 @@ class MachineIRBuilder { return buildCopy(getDestFromArg(Res), getRegFromArg(Src)); } - /// Build and insert `Res = G_LOAD Addr, MMO`. + /// Build and insert `Res = G_LOAD Addr, MMO`. /// /// Loads the value stored at \p Addr. Puts the result in \p Res. /// @@ -571,7 +571,7 @@ class MachineIRBuilder { MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO); - /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`. + /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`. /// /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res and \p Src must be generic virtual registers. @@ -598,7 +598,7 @@ class MachineIRBuilder { void buildSequence(unsigned Res, ArrayRef Ops, ArrayRef Indices); - /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... + /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ... /// /// G_MERGE_VALUES combines the input elements contiguously into a larger /// register. @@ -611,7 +611,7 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildMerge(unsigned Res, ArrayRef Ops); - /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op + /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op /// /// G_UNMERGE_VALUES splits contiguous bits of the input into multiple /// @@ -639,7 +639,7 @@ class MachineIRBuilder { MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res, bool HasSideEffects); - /// Build and insert \p Res = G_FPTRUNC \p Op + /// Build and insert \p Res = G_FPTRUNC \p Op /// /// G_FPTRUNC converts a floating-point value into one with a smaller type. /// @@ -651,7 +651,7 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op); - /// Build and insert \p Res = G_TRUNC \p Op + /// Build and insert \p Res = G_TRUNC \p Op /// /// G_TRUNC extracts the low bits of a type. For a vector type each element is /// truncated independently before being packed into the destination. @@ -711,7 +711,7 @@ class MachineIRBuilder { MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst, unsigned Op0, unsigned Op1); - /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, + /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, /// \p Elt, \p Idx /// /// \pre setBasicBlock or setMI must have been called. @@ -724,7 +724,7 @@ class MachineIRBuilder { MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val, unsigned Elt, unsigned Idx); - /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx + /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx /// /// \pre setBasicBlock or setMI must have been called. /// \pre \p Res must be a generic virtual register with scalar type. @@ -734,6 +734,24 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val, unsigned Idx); + + /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, + /// MMO`. + /// + /// Atomically replace the value at \p Addr with \p NewVal if it is currently + /// \p CmpVal otherwise leaves it unchanged. Puts the original value from \p + /// Addr in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p OldValRes must be a generic virtual register of scalar type. + /// \pre \p Addr must be a generic virtual register with pointer type. + /// \pre \p OldValRes, \p CmpVal, and \p NewVal must be generic virtual + /// registers of the same type. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO); }; } // End namespace llvm. diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 02868b220984..82fd7eddb68a 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -622,6 +622,8 @@ class RegisterBankInfo { /// \pre \p Reg is a virtual register that either has a bank or a class. /// \returns The constrained register class, or nullptr if there is none. /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass + /// \note Use MachineRegisterInfo::constrainRegAttrs instead for any non-isel + /// purpose, including non-select passes of GlobalISel static const TargetRegisterClass * constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI); diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 5864c15cc8eb..a5859938e5f0 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -59,6 +59,19 @@ unsigned constrainOperandRegClass(const MachineFunction &MF, MachineInstr &InsertPt, const MCInstrDesc &II, unsigned Reg, unsigned OpIdx); +/// Mutate the newly-selected instruction \p I to constrain its (possibly +/// generic) virtual register operands to the instruction's register class. +/// This could involve inserting COPYs before (for uses) or after (for defs). +/// This requires the number of operands to match the instruction description. +/// \returns whether operand regclass constraining succeeded. +/// +// FIXME: Not all instructions have the same number of operands. We should +// probably expose a constrain helper per operand and let the target selector +// constrain individual registers, like fast-isel. +bool constrainSelectedInstRegOperands(MachineInstr &I, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI); /// Check whether an instruction \p MI is dead: it only defines dead virtual /// registers, and doesn't have other side effects. bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI); diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 9e4865ff2c26..d256849be9af 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -186,7 +186,8 @@ namespace ISD { /// BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways. /// Given two values of the same integer value type, this produces a value /// twice as big. Like EXTRACT_ELEMENT, this can only be used before - /// legalization. + /// legalization. The lower part of the composite value should be in + /// element 0 and the upper part should be in element 1. BUILD_PAIR, /// MERGE_VALUES - This node takes multiple discrete operands and returns diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h index a404b9b70d3a..597d684909c1 100644 --- a/include/llvm/CodeGen/IntrinsicLowering.h +++ b/include/llvm/CodeGen/IntrinsicLowering.h @@ -31,26 +31,22 @@ class IntrinsicLowering { public: explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {} - /// AddPrototypes - This method, if called, causes all of the prototypes - /// that might be needed by an intrinsic lowering implementation to be - /// inserted into the module specified. + /// Add all of the prototypes that might be needed by an intrinsic lowering + /// implementation to be inserted into the module specified. void AddPrototypes(Module &M); - /// LowerIntrinsicCall - This method replaces a call with the LLVM function - /// which should be used to implement the specified intrinsic function call. + /// Replace a call to the specified intrinsic function. /// If an intrinsic function must be implemented by the code generator /// (such as va_start), this function should print a message and abort. /// /// Otherwise, if an intrinsic function call can be lowered, the code to /// implement it (often a call to a non-intrinsic function) is inserted - /// _after_ the call instruction and the call is deleted. The caller must + /// _after_ the call instruction and the call is deleted. The caller must /// be capable of handling this kind of change. - /// void LowerIntrinsicCall(CallInst *CI); - /// LowerToByteSwap - Replace a call instruction into a call to bswap - /// intrinsic. Return false if it has determined the call is not a - /// simple integer bswap. + /// Try to replace a call instruction with a call to a bswap intrinsic. Return + /// false if the call is not a simple integer bswap. static bool LowerToByteSwap(CallInst *CI); }; } diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervals.h similarity index 98% rename from include/llvm/CodeGen/LiveIntervalAnalysis.h rename to include/llvm/CodeGen/LiveIntervals.h index c744f852fc3b..1150f3c1c47b 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervals.h @@ -1,4 +1,4 @@ -//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===// +//===- LiveIntervals.h - Live Interval Analysis -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H -#define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#ifndef LLVM_CODEGEN_LIVEINTERVALS_H +#define LLVM_CODEGEN_LIVEINTERVALS_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IndexedMap.h" @@ -478,4 +478,4 @@ class VirtRegMap; } // end namespace llvm -#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#endif diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index 6d54ebf1b784..f9aab0d09e1f 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -20,11 +20,11 @@ /// register. /// /// X86 Example: -/// %YMM0 = ... -/// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +/// %ymm0 = ... +/// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0) /// -/// %YMM0 = ... -/// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) +/// %ymm0 = ... +/// %xmm0 = ..., implicit %ymm0 (%ymm0 and all its sub-registers are alive) //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index 84bccde0caa2..82b1f0b0de71 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -121,6 +121,9 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { /// main live range of \p LI or in one of the matching subregister ranges. bool useIsKill(const LiveInterval &LI, const MachineOperand &MO) const; + /// Create a new empty interval based on OldReg. + LiveInterval &createEmptyIntervalFrom(unsigned OldReg, bool createSubRanges); + public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. @@ -174,16 +177,13 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { return makeArrayRef(NewRegs).slice(FirstNew); } - /// createEmptyIntervalFrom - Create a new empty interval based on OldReg. - LiveInterval &createEmptyIntervalFrom(unsigned OldReg); - /// createFrom - Create a new virtual register based on OldReg. unsigned createFrom(unsigned OldReg); /// create - Create a new register with the same class and original slot as /// parent. LiveInterval &createEmptyInterval() { - return createEmptyIntervalFrom(getReg()); + return createEmptyIntervalFrom(getReg(), true); } unsigned create() { return createFrom(getReg()); } @@ -233,12 +233,6 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate { return Rematted.count(ParentVNI); } - void markDeadRemat(MachineInstr *inst) { - // DeadRemats is an optional field. - if (DeadRemats) - DeadRemats->insert(inst); - } - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try /// to erase it from LIS. void eraseVirtReg(unsigned Reg); diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStacks.h similarity index 94% rename from include/llvm/CodeGen/LiveStackAnalysis.h rename to include/llvm/CodeGen/LiveStacks.h index c90ae7b184f4..44ed785f7b53 100644 --- a/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/include/llvm/CodeGen/LiveStacks.h @@ -1,4 +1,4 @@ -//===- LiveStackAnalysis.h - Live Stack Slot Analysis -----------*- C++ -*-===// +//===- LiveStacks.h - Live Stack Slot Analysis ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVESTACKANALYSIS_H -#define LLVM_CODEGEN_LIVESTACKANALYSIS_H +#ifndef LLVM_CODEGEN_LIVESTACKS_H +#define LLVM_CODEGEN_LIVESTACKS_H #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -100,4 +100,4 @@ class LiveStacks : public MachineFunctionPass { } // end namespace llvm -#endif // LLVM_CODEGEN_LIVESTACK_ANALYSIS_H +#endif diff --git a/include/llvm/CodeGen/LoopTraversal.h b/include/llvm/CodeGen/LoopTraversal.h new file mode 100644 index 000000000000..a816f6dd07bd --- /dev/null +++ b/include/llvm/CodeGen/LoopTraversal.h @@ -0,0 +1,116 @@ +//==------ llvm/CodeGen/LoopTraversal.h - Loop Traversal -*- C++ -*---------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Loop Traversal logic. +/// +/// This class provides the basic blocks traversal order used by passes like +/// ReachingDefAnalysis and ExecutionDomainFix. +/// It identifies basic blocks that are part of loops and should to be visited +/// twice and returns efficient traversal order for all the blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LOOPTRAVERSAL_H +#define LLVM_CODEGEN_LOOPTRAVERSAL_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineFunction; + +/// This class provides the basic blocks traversal order used by passes like +/// ReachingDefAnalysis and ExecutionDomainFix. +/// It identifies basic blocks that are part of loops and should to be visited +/// twice and returns efficient traversal order for all the blocks. +/// +/// We want to visit every instruction in every basic block in order to update +/// it's execution domain or collect clearance information. However, for the +/// clearance calculation, we need to know clearances from all predecessors +/// (including any backedges), therfore we need to visit some blocks twice. +/// As an example, consider the following loop. +/// +/// +/// PH -> A -> B (xmm -> xmm) -> C -> D -> EXIT +/// ^ | +/// +----------------------------------+ +/// +/// The iteration order this pass will return is as follows: +/// Optimized: PH A B C A' B' C' D +/// +/// The basic block order is constructed as follows: +/// Once we finish processing some block, we update the counters in MBBInfos +/// and re-process any successors that are now 'done'. +/// We call a block that is ready for its final round of processing `done` +/// (isBlockDone), e.g. when all predecessor information is known. +/// +/// Note that a naive traversal order would be to do two complete passes over +/// all basic blocks/instructions, the first for recording clearances, the +/// second for updating clearance based on backedges. +/// However, for functions without backedges, or functions with a lot of +/// straight-line code, and a small loop, that would be a lot of unnecessary +/// work (since only the BBs that are part of the loop require two passes). +/// +/// E.g., the naive iteration order for the above exmple is as follows: +/// Naive: PH A B C D A' B' C' D' +/// +/// In the optimized approach we avoid processing D twice, because we +/// can entirely process the predecessors before getting to D. +class LoopTraversal { +private: + struct MBBInfo { + /// Whether we have gotten to this block in primary processing yet. + bool PrimaryCompleted = false; + + /// The number of predecessors for which primary processing has completed + unsigned IncomingProcessed = 0; + + /// The value of `IncomingProcessed` at the start of primary processing + unsigned PrimaryIncoming = 0; + + /// The number of predecessors for which all processing steps are done. + unsigned IncomingCompleted = 0; + + MBBInfo() = default; + }; + using MBBInfoMap = SmallVector; + /// Helps keep track if we proccessed this block and all its predecessors. + MBBInfoMap MBBInfos; + +public: + struct TraversedMBBInfo { + /// The basic block. + MachineBasicBlock *MBB = nullptr; + + /// True if this is the first time we process the basic block. + bool PrimaryPass = true; + + /// True if the block that is ready for its final round of processing. + bool IsDone = true; + + TraversedMBBInfo(MachineBasicBlock *BB = nullptr, bool Primary = true, + bool Done = true) + : MBB(BB), PrimaryPass(Primary), IsDone(Done) {} + }; + LoopTraversal() {} + + /// \brief Identifies basic blocks that are part of loops and should to be + /// visited twice and returns efficient traversal order for all the blocks. + typedef SmallVector TraversalOrder; + TraversalOrder traverse(MachineFunction &MF); + +private: + /// Returens true if the block is ready for its final round of processing. + bool isBlockDone(MachineBasicBlock *MBB); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_LOOPTRAVERSAL_H diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index a8ea1407a4e7..ba40e522e261 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -56,7 +56,7 @@ template <> struct ScalarTraits { return ""; } - static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; struct FlowStringValue : StringValue { @@ -73,7 +73,7 @@ template <> struct ScalarTraits { return ScalarTraits::input(Scalar, Ctx, S); } - static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; struct BlockStringValue { @@ -120,7 +120,7 @@ template <> struct ScalarTraits { return ScalarTraits::input(Scalar, Ctx, Value.Value); } - static bool mustQuote(StringRef Scalar) { + static QuotingType mustQuote(StringRef Scalar) { return ScalarTraits::mustQuote(Scalar); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 0f5b04d90459..1d6637a27ab4 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -25,6 +25,7 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Printable.h" #include #include #include @@ -224,6 +225,14 @@ class MachineBasicBlock return make_range(getFirstTerminator(), end()); } + /// Returns a range that iterates over the phis in the basic block. + inline iterator_range phis() { + return make_range(begin(), getFirstNonPHI()); + } + inline iterator_range phis() const { + return const_cast(this)->phis(); + } + // Machine-CFG iterators using pred_iterator = std::vector::iterator; using const_pred_iterator = std::vector::const_iterator; @@ -701,8 +710,8 @@ class MachineBasicBlock LQR_Unknown ///< Register liveness not decidable from local neighborhood. }; - /// Return whether (physical) register \p Reg has been ined and not - /// ed as of just before \p Before. + /// Return whether (physical) register \p Reg has been defined and not + /// killed as of just before \p Before. /// /// Search is localised to a neighborhood of \p Neighborhood instructions /// before (searching for defs or kills) and \p Neighborhood instructions @@ -716,9 +725,10 @@ class MachineBasicBlock // Debugging methods. void dump() const; - void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; + void print(raw_ostream &OS, const SlotIndexes * = nullptr, + bool IsStandalone = true) const; void print(raw_ostream &OS, ModuleSlotTracker &MST, - const SlotIndexes* = nullptr) const; + const SlotIndexes * = nullptr, bool IsStandalone = true) const; // Printing method used by LoopInfo. void printAsOperand(raw_ostream &OS, bool PrintType = true) const; @@ -771,6 +781,14 @@ class MachineBasicBlock raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); +/// Prints a machine basic block reference. +/// +/// The format is: +/// %bb.5 - a machine basic block with MBB.getNumber() == 5. +/// +/// Usage: OS << printMBBReference(MBB) << '\n'; +Printable printMBBReference(const MachineBasicBlock &MBB); + // This is useful when building IndexedMaps keyed on basic block pointers. struct MBB2NumberFunctor { using argument_type = const MachineBasicBlock *; diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 8c54ae925470..586535f771c2 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -68,12 +68,18 @@ enum class MachineCombinerPattern { FMLAv4i32_indexed_OP2, FMLSv1i32_indexed_OP2, FMLSv1i64_indexed_OP2, - FMLSv2i32_indexed_OP2, - FMLSv2i64_indexed_OP2, + FMLSv2f32_OP1, FMLSv2f32_OP2, + FMLSv2f64_OP1, FMLSv2f64_OP2, - FMLSv4i32_indexed_OP2, - FMLSv4f32_OP2 + FMLSv2i32_indexed_OP1, + FMLSv2i32_indexed_OP2, + FMLSv2i64_indexed_OP1, + FMLSv2i64_indexed_OP2, + FMLSv4f32_OP1, + FMLSv4f32_OP2, + FMLSv4i32_indexed_OP1, + FMLSv4i32_indexed_OP2 }; } // end namespace llvm diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 9521c277988a..f887517217e1 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -115,7 +115,7 @@ class MachineFrameInfo { /// slot can't alias any LLVM IR value. This is very similar to a Spill /// Slot, but is created by statepoint lowering is SelectionDAG, not the /// register allocator. - bool isStatepointSpillSlot; + bool isStatepointSpillSlot = false; /// Identifier for stack memory type analagous to address space. If this is /// non-0, the meaning is target defined. Offsets cannot be directly @@ -131,7 +131,7 @@ class MachineFrameInfo { // If true, the object was mapped into the local frame // block and doesn't need additional handling for allocation beyond that. - bool PreAllocated; + bool PreAllocated = false; // If true, an LLVM IR value might point to this object. // Normally, spill slots and fixed-offset objects don't alias IR-accessible @@ -140,17 +140,17 @@ class MachineFrameInfo { bool isAliased; /// If true, the object has been zero-extended. - bool isZExt; + bool isZExt = false; /// If true, the object has been zero-extended. - bool isSExt; - - StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS, const AllocaInst *Val, bool Aliased, uint8_t ID = 0) - : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), - isSpillSlot(isSS), isStatepointSpillSlot(false), StackID(ID), - Alloca(Val), - PreAllocated(false), isAliased(Aliased), isZExt(false), isSExt(false) {} + bool isSExt = false; + + StackObject(uint64_t Size, unsigned Alignment, int64_t SPOffset, + bool IsImmutable, bool IsSpillSlot, const AllocaInst *Alloca, + bool IsAliased, uint8_t StackID = 0) + : SPOffset(SPOffset), Size(Size), Alignment(Alignment), + isImmutable(IsImmutable), isSpillSlot(IsSpillSlot), + StackID(StackID), Alloca(Alloca), isAliased(IsAliased) {} }; /// The alignment of the stack. @@ -573,13 +573,13 @@ class MachineFrameInfo { /// All fixed objects should be created before other objects are created for /// efficiency. By default, fixed objects are not pointed to by LLVM IR /// values. This returns an index with a negative value. - int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, + int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased = false); /// Create a spill slot at a fixed location on the stack. /// Returns an index with a negative value. int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, - bool Immutable = false); + bool IsImmutable = false); /// Returns true if the specified index corresponds to a fixed stack object. bool isFixedObjectIndex(int ObjectIdx) const { @@ -605,10 +605,10 @@ class MachineFrameInfo { } /// Marks the immutability of an object. - void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) { + void setIsImmutableObjectIndex(int ObjectIdx, bool IsImmutable) { assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); - Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable; + Objects[ObjectIdx+NumFixedObjects].isImmutable = IsImmutable; } /// Returns true if the specified index corresponds to a spill slot. @@ -660,7 +660,7 @@ class MachineFrameInfo { /// Create a new statically sized stack object, returning /// a nonnegative identifier to represent it. - int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, + int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSpillSlot, const AllocaInst *Alloca = nullptr, uint8_t ID = 0); /// Create a new statically sized stack object that represents a spill slot, diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index c6bcca7f7b34..7d8b7ebe8d62 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -223,7 +223,7 @@ struct LandingPadInfo { }; class MachineFunction { - const Function *Fn; + const Function &F; const TargetMachine &Target; const TargetSubtargetInfo *STI; MCContext &Ctx; @@ -359,8 +359,9 @@ class MachineFunction { using VariableDbgInfoMapTy = SmallVector; VariableDbgInfoMapTy VariableDbgInfos; - MachineFunction(const Function *Fn, const TargetMachine &TM, - unsigned FunctionNum, MachineModuleInfo &MMI); + MachineFunction(const Function &F, const TargetMachine &TM, + const TargetSubtargetInfo &STI, unsigned FunctionNum, + MachineModuleInfo &MMI); MachineFunction(const MachineFunction &) = delete; MachineFunction &operator=(const MachineFunction &) = delete; ~MachineFunction(); @@ -379,8 +380,8 @@ class MachineFunction { /// Return the DataLayout attached to the Module associated to this MF. const DataLayout &getDataLayout() const; - /// getFunction - Return the LLVM function that this machine code represents - const Function *getFunction() const { return Fn; } + /// Return the LLVM function that this machine code represents + const Function &getFunction() const { return F; } /// getName - Return the name of the corresponding LLVM function. StringRef getName() const; diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 6c899ca7ee09..f9ad367b690a 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -44,6 +44,7 @@ class MachineRegisterInfo; class ModuleSlotTracker; class raw_ostream; template class SmallVectorImpl; +class SmallBitVector; class StringRef; class TargetInstrInfo; class TargetRegisterClass; @@ -67,7 +68,9 @@ class MachineInstr /// otherwise easily derivable from the IR text. /// enum CommentFlag { - ReloadReuse = 0x1 // higher bits are reserved for target dep comments. + ReloadReuse = 0x1, // higher bits are reserved for target dep comments. + NoSchedComment = 0x2, + TAsmComments = 0x4 // Target Asm comments should start from this value. }; enum MIFlag { @@ -1220,17 +1223,30 @@ class MachineInstr /// Debugging support /// @{ + /// Determine the generic type to be printed (if needed) on uses and defs. + LLT getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, + const MachineRegisterInfo &MRI) const; + + /// Return true when an instruction has tied register that can't be determined + /// by the instruction's descriptor. This is useful for MIR printing, to + /// determine whether we need to print the ties or not. + bool hasComplexRegisterTies() const; + /// Print this MI to \p OS. + /// Don't print information that can be inferred from other instructions if + /// \p IsStandalone is false. It is usually true when only a fragment of the + /// function is printed. /// Only print the defs and the opcode if \p SkipOpers is true. /// Otherwise, also print operands if \p SkipDebugLoc is true. /// Otherwise, also print the debug loc, with a terminating newline. /// \p TII is used to print the opcode name. If it's not present, but the /// MI is in a function, the opcode will be printed using the function's TII. - void print(raw_ostream &OS, bool SkipOpers = false, bool SkipDebugLoc = false, - const TargetInstrInfo *TII = nullptr) const; - void print(raw_ostream &OS, ModuleSlotTracker &MST, bool SkipOpers = false, + void print(raw_ostream &OS, bool IsStandalone = true, bool SkipOpers = false, bool SkipDebugLoc = false, const TargetInstrInfo *TII = nullptr) const; + void print(raw_ostream &OS, ModuleSlotTracker &MST, bool IsStandalone = true, + bool SkipOpers = false, bool SkipDebugLoc = false, + const TargetInstrInfo *TII = nullptr) const; void dump() const; /// @} diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 9e0f19a5aea3..2df89b15dd52 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -20,11 +20,13 @@ #define LLVM_CODEGEN_MACHINEINSTRBUILDER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/ErrorHandling.h" @@ -48,6 +50,7 @@ namespace RegState { EarlyClobber = 0x40, Debug = 0x80, InternalRead = 0x100, + Renamable = 0x200, DefineNoRead = Define | Undef, ImplicitDefine = Implicit | Define, ImplicitKill = Implicit | Kill @@ -91,7 +94,8 @@ class MachineInstrBuilder { flags & RegState::EarlyClobber, SubReg, flags & RegState::Debug, - flags & RegState::InternalRead)); + flags & RegState::InternalRead, + flags & RegState::Renamable)); return *this; } @@ -280,6 +284,12 @@ class MachineInstrBuilder { MI->copyImplicitOps(*MF, OtherMI); return *this; } + + bool constrainAllUses(const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); + } }; /// Builder interface. Specify how to create the initial instruction itself. @@ -443,6 +453,9 @@ inline unsigned getInternalReadRegState(bool B) { inline unsigned getDebugRegState(bool B) { return B ? RegState::Debug : 0; } +inline unsigned getRenamableRegState(bool B) { + return B ? RegState::Renamable : 0; +} /// Get all register state flags from machine operand \p RegOp. inline unsigned getRegState(const MachineOperand &RegOp) { @@ -453,7 +466,10 @@ inline unsigned getRegState(const MachineOperand &RegOp) { getDeadRegState(RegOp.isDead()) | getUndefRegState(RegOp.isUndef()) | getInternalReadRegState(RegOp.isInternalRead()) | - getDebugRegState(RegOp.isDebug()); + getDebugRegState(RegOp.isDebug()) | + getRenamableRegState( + TargetRegisterInfo::isPhysicalRegister(RegOp.getReg()) && + RegOp.isRenamable()); } /// Helper class for constructing bundles of MachineInstrs. diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 995c7001d928..b5341fd1ae49 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -150,7 +150,7 @@ class MachineOperandIteratorBase { /// struct VirtRegInfo { /// Reads - One of the operands read the virtual register. This does not - /// include or use operands, see MO::readsReg(). + /// include undef or internal use operands, see MO::readsReg(). bool Reads; /// Writes - One of the operands writes the virtual register. diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h index adcd1d0de63d..25a3e6b556a3 100644 --- a/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H +#include "llvm/Support/Printable.h" #include #include @@ -125,6 +126,15 @@ class MachineJumpTableInfo { void dump() const; }; + +/// Prints a jump table entry reference. +/// +/// The format is: +/// %jump-table.5 - a jump table entry with index == 5. +/// +/// Usage: OS << printJumpTableEntryReference(Idx) << '\n'; +Printable printJumpTableEntryReference(unsigned Idx); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index cdec9e79833e..c5b204a79f04 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -47,17 +47,40 @@ struct MachinePointerInfo { uint8_t StackID; - explicit MachinePointerInfo(const Value *v = nullptr, int64_t offset = 0, + unsigned AddrSpace = 0; + + explicit MachinePointerInfo(const Value *v, int64_t offset = 0, uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v ? v->getType()->getPointerAddressSpace() : 0; + } - explicit MachinePointerInfo(const PseudoSourceValue *v, - int64_t offset = 0, + explicit MachinePointerInfo(const PseudoSourceValue *v, int64_t offset = 0, uint8_t ID = 0) - : V(v), Offset(offset), StackID(ID) {} + : V(v), Offset(offset), StackID(ID) { + AddrSpace = v ? v->getAddressSpace() : 0; + } + + explicit MachinePointerInfo(unsigned AddressSpace = 0) + : V((const Value *)nullptr), Offset(0), StackID(0), + AddrSpace(AddressSpace) {} + + explicit MachinePointerInfo( + PointerUnion v, + int64_t offset = 0, + uint8_t ID = 0) + : V(v), Offset(offset), StackID(ID) { + if (V) { + if (const auto *ValPtr = V.dyn_cast()) + AddrSpace = ValPtr->getType()->getPointerAddressSpace(); + else + AddrSpace = V.get()->getAddressSpace(); + } + } MachinePointerInfo getWithOffset(int64_t O) const { - if (V.isNull()) return MachinePointerInfo(); + if (V.isNull()) + return MachinePointerInfo(AddrSpace); if (V.is()) return MachinePointerInfo(V.get(), Offset+O, StackID); return MachinePointerInfo(V.get(), Offset+O, @@ -89,6 +112,9 @@ struct MachinePointerInfo { /// Stack pointer relative access. static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID = 0); + + /// Stack memory without other information. + static MachinePointerInfo getUnknownStack(MachineFunction &MF); }; diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 7b57a407b4bc..6be304fa368b 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -155,7 +155,6 @@ class MachineModuleInfo : public ImmutablePass { const MCContext &getContext() const { return Context; } MCContext &getContext() { return Context; } - void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } /// Returns the MachineFunction constructed for the IR function \p F. diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 6693ed22328d..22fd86aaabd3 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include namespace llvm { @@ -28,6 +29,7 @@ class GlobalValue; class MachineBasicBlock; class MachineInstr; class MachineRegisterInfo; +class MCCFIInstruction; class MDNode; class ModuleSlotTracker; class TargetMachine; @@ -72,7 +74,7 @@ class MachineOperand { private: /// OpKind - Specify what kind of operand this is. This discriminates the /// union. - MachineOperandType OpKind : 8; + unsigned OpKind : 8; /// Subregister number for MO_Register. A value of 0 indicates the /// MO_Register has no subReg. @@ -83,26 +85,32 @@ class MachineOperand { /// TiedTo - Non-zero when this register operand is tied to another register /// operand. The encoding of this field is described in the block comment /// before MachineInstr::tieOperands(). - unsigned char TiedTo : 4; - - /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register - /// operands. + unsigned TiedTo : 4; /// IsDef - True if this is a def, false if this is a use of the register. + /// This is only valid on register operands. /// - bool IsDef : 1; + unsigned IsDef : 1; /// IsImp - True if this is an implicit def or use, false if it is explicit. + /// This is only valid on register opderands. /// - bool IsImp : 1; - - /// IsKill - True if this instruction is the last use of the register on this - /// path through the function. This is only valid on uses of registers. - bool IsKill : 1; - - /// IsDead - True if this register is never used by a subsequent instruction. - /// This is only valid on definitions of registers. - bool IsDead : 1; + unsigned IsImp : 1; + + /// IsDeadOrKill + /// For uses: IsKill - True if this instruction is the last use of the + /// register on this path through the function. + /// For defs: IsDead - True if this register is never used by a subsequent + /// instruction. + /// This is only valid on register operands. + unsigned IsDeadOrKill : 1; + + /// IsRenamable - True if this register may be renamed, i.e. it does not + /// generate a value that is somehow read in a way that is not represented by + /// the Machine IR (e.g. to meet an ABI or ISA requirement). This is only + /// valid on physical register operands. Virtual registers are assumed to + /// always be renamable regardless of the value of this field. + unsigned IsRenamable : 1; /// IsUndef - True if this register operand reads an "undef" value, i.e. the /// read value doesn't matter. This flag can be set on both use and def @@ -116,12 +124,12 @@ class MachineOperand { /// the same register. In that case, the instruction may depend on those /// operands reading the same dont-care value. For example: /// - /// %vreg1 = XOR %vreg2, %vreg2 + /// %1 = XOR undef %2, undef %2 /// - /// Any register can be used for %vreg2, and its value doesn't matter, but + /// Any register can be used for %2, and its value doesn't matter, but /// the two operands must be the same register. /// - bool IsUndef : 1; + unsigned IsUndef : 1; /// IsInternalRead - True if this operand reads a value that was defined /// inside the same instruction or bundle. This flag can be set on both use @@ -132,16 +140,16 @@ class MachineOperand { /// When this flag is set, the instruction bundle must contain at least one /// other def of the register. If multiple instructions in the bundle define /// the register, the meaning is target-defined. - bool IsInternalRead : 1; + unsigned IsInternalRead : 1; /// IsEarlyClobber - True if this MO_Register 'def' operand is written to /// by the MachineInstr before all input registers are read. This is used to /// model the GCC inline asm '&' constraint modifier. - bool IsEarlyClobber : 1; + unsigned IsEarlyClobber : 1; /// IsDebug - True if this MO_Register 'use' operand is in a debug pseudo, /// not a real instruction. Such uses should be ignored during codegen. - bool IsDebug : 1; + unsigned IsDebug : 1; /// SmallContents - This really should be part of the Contents union, but /// lives out here so we can get a better packed struct. @@ -190,7 +198,19 @@ class MachineOperand { } Contents; explicit MachineOperand(MachineOperandType K) - : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) {} + : OpKind(K), SubReg_TargetFlags(0), ParentMI(nullptr) { + // Assert that the layout is what we expect. It's easy to grow this object. + static_assert(alignof(MachineOperand) <= alignof(int64_t), + "MachineOperand shouldn't be more than 8 byte aligned"); + static_assert(sizeof(Contents) <= 2 * sizeof(void *), + "Contents should be at most two pointers"); + static_assert(sizeof(MachineOperand) <= + alignTo(2 * sizeof(unsigned) + + 3 * sizeof(void *)), + "MachineOperand too big. Should be Kind, SmallContents, " + "ParentMI, and Contents"); + } + public: /// getType - Returns the MachineOperandType for this operand. /// @@ -226,11 +246,59 @@ class MachineOperand { /// void clearParent() { ParentMI = nullptr; } + /// Print a subreg index operand. + /// MO_Immediate operands can also be subreg idices. If it's the case, the + /// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be + /// called to check this. + static void printSubRegIdx(raw_ostream &OS, uint64_t Index, + const TargetRegisterInfo *TRI); + + /// Print operand target flags. + static void printTargetFlags(raw_ostream& OS, const MachineOperand &Op); + + /// Print a MCSymbol as an operand. + static void printSymbol(raw_ostream &OS, MCSymbol &Sym); + + /// Print a stack object reference. + static void printStackObjectReference(raw_ostream &OS, unsigned FrameIndex, + bool IsFixed, StringRef Name); + + /// Print the offset with explicit +/- signs. + static void printOperandOffset(raw_ostream &OS, int64_t Offset); + + /// Print an IRSlotNumber. + static void printIRSlotNumber(raw_ostream &OS, int Slot); + + /// Print the MachineOperand to \p os. + /// Providing a valid \p TRI and \p IntrinsicInfo results in a more + /// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the + /// function will try to pick it up from the parent. void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr, const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; - void print(raw_ostream &os, ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI = nullptr, - const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const; + + /// More complex way of printing a MachineOperand. + /// \param TypeToPrint specifies the generic type to be printed on uses and + /// defs. It can be determined using MachineInstr::getTypeToPrint. + /// \param PrintDef - whether we want to print `def` on an operand which + /// isDef. Sometimes, if the operand is printed before '=', we don't print + /// `def`. + /// \param IsStandalone - whether we want a verbose output of the MO. This + /// prints extra information that can be easily inferred when printing the + /// whole function, but not when printing only a fragment of it. + /// \param ShouldPrintRegisterTies - whether we want to print register ties. + /// Sometimes they are easily determined by the instruction's descriptor + /// (MachineInstr::hasComplexRegiterTies can determine if it's needed). + /// \param TiedOperandIdx - if we need to print register ties this needs to + /// provide the index of the tied register. If not, it will be ignored. + /// \param TRI - provide more target-specific information to the printer. + /// Unlike the previous function, this one will not try and get the + /// information from it's parent. + /// \param IntrinsicInfo - same as \p TRI. + void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint, + bool PrintDef, bool IsStandalone, bool ShouldPrintRegisterTies, + unsigned TiedOperandIdx, const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const; + void dump() const; //===--------------------------------------------------------------------===// @@ -303,12 +371,12 @@ class MachineOperand { bool isDead() const { assert(isReg() && "Wrong MachineOperand accessor"); - return IsDead; + return IsDeadOrKill & IsDef; } bool isKill() const { assert(isReg() && "Wrong MachineOperand accessor"); - return IsKill; + return IsDeadOrKill & !IsDef; } bool isUndef() const { @@ -316,6 +384,8 @@ class MachineOperand { return IsUndef; } + bool isRenamable() const; + bool isInternalRead() const { assert(isReg() && "Wrong MachineOperand accessor"); return IsInternalRead; @@ -371,12 +441,13 @@ class MachineOperand { /// substPhysReg - Substitute the current register with the physical register /// Reg, taking any existing SubReg into account. For instance, - /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL. + /// substPhysReg(%eax) will change %reg1024:sub_8bit to %al. /// void substPhysReg(unsigned Reg, const TargetRegisterInfo&); void setIsUse(bool Val = true) { setIsDef(!Val); } + /// Change a def to a use, or a use to a def. void setIsDef(bool Val = true); void setImplicit(bool Val = true) { @@ -387,12 +458,12 @@ class MachineOperand { void setIsKill(bool Val = true) { assert(isReg() && !IsDef && "Wrong MachineOperand mutator"); assert((!Val || !isDebug()) && "Marking a debug operation as kill"); - IsKill = Val; + IsDeadOrKill = Val; } void setIsDead(bool Val = true) { assert(isReg() && IsDef && "Wrong MachineOperand mutator"); - IsDead = Val; + IsDeadOrKill = Val; } void setIsUndef(bool Val = true) { @@ -400,6 +471,12 @@ class MachineOperand { IsUndef = Val; } + void setIsRenamable(bool Val = true); + + /// Set IsRenamable to true if there are no extra register allocation + /// requirements placed on this operand by the parent instruction's opcode. + void setIsRenamableIfNoExtraRegAllocReq(); + void setIsInternalRead(bool Val = true) { assert(isReg() && "Wrong MachineOperand mutator"); IsInternalRead = Val; @@ -575,14 +652,16 @@ class MachineOperand { //===--------------------------------------------------------------------===// /// Returns true if this operand is identical to the specified operand except - /// for liveness related flags (isKill, isUndef and isDead). + /// for liveness related flags (isKill, isUndef and isDead). Note that this + /// should stay in sync with the hash_value overload below. bool isIdenticalTo(const MachineOperand &Other) const; /// \brief MachineOperand hash_value overload. /// /// Note that this includes the same information in the hash that /// isIdenticalTo uses for comparison. It is thus suited for use in hash - /// tables which use that function for equality comparisons only. + /// tables which use that function for equality comparisons only. This must + /// stay exactly in sync with isIdenticalTo above. friend hash_code hash_value(const MachineOperand &MO); /// ChangeToImmediate - Replace this operand with a new immediate operand of @@ -641,16 +720,16 @@ class MachineOperand { bool isKill = false, bool isDead = false, bool isUndef = false, bool isEarlyClobber = false, - unsigned SubReg = 0, - bool isDebug = false, - bool isInternalRead = false) { + unsigned SubReg = 0, bool isDebug = false, + bool isInternalRead = false, + bool isRenamable = false) { assert(!(isDead && !isDef) && "Dead flag on non-def"); assert(!(isKill && isDef) && "Kill flag on def"); MachineOperand Op(MachineOperand::MO_Register); Op.IsDef = isDef; Op.IsImp = isImp; - Op.IsKill = isKill; - Op.IsDead = isDead; + Op.IsDeadOrKill = isKill | isDead; + Op.IsRenamable = isRenamable; Op.IsUndef = isUndef; Op.IsInternalRead = isInternalRead; Op.IsEarlyClobber = isEarlyClobber; @@ -690,8 +769,7 @@ class MachineOperand { Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateJTI(unsigned Idx, - unsigned char TargetFlags = 0) { + static MachineOperand CreateJTI(unsigned Idx, unsigned char TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_JumpTableIndex); Op.setIndex(Idx); Op.setTargetFlags(TargetFlags); @@ -722,12 +800,12 @@ class MachineOperand { return Op; } /// CreateRegMask - Creates a register mask operand referencing Mask. The - /// operand does not take ownership of the memory referenced by Mask, it must - /// remain valid for the lifetime of the operand. + /// operand does not take ownership of the memory referenced by Mask, it + /// must remain valid for the lifetime of the operand. /// - /// A RegMask operand represents a set of non-clobbered physical registers on - /// an instruction that clobbers many registers, typically a call. The bit - /// mask has a bit set for each physreg that is preserved by this + /// A RegMask operand represents a set of non-clobbered physical registers + /// on an instruction that clobbers many registers, typically a call. The + /// bit mask has a bit set for each physreg that is preserved by this /// instruction, as described in the documentation for /// TargetRegisterInfo::getCallPreservedMask(). /// @@ -780,7 +858,10 @@ class MachineOperand { friend class MachineInstr; friend class MachineRegisterInfo; + private: + // If this operand is currently a register operand, and if this is in a + // function, deregister the operand from the register's use/def list. void removeRegFromUses(); /// Artificial kinds for DenseMap usage. @@ -795,9 +876,9 @@ class MachineOperand { // Methods for handling register use/def lists. //===--------------------------------------------------------------------===// - /// isOnRegUseList - Return true if this operand is on a register use/def list - /// or false if not. This can only be called for register operands that are - /// part of a machine instruction. + /// isOnRegUseList - Return true if this operand is on a register use/def + /// list or false if not. This can only be called for register operands + /// that are part of a machine instruction. bool isOnRegUseList() const { assert(isReg() && "Can only add reg operand to use lists"); return Contents.Reg.Prev != nullptr; @@ -826,14 +907,14 @@ template <> struct DenseMapInfo { } }; -inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) { - MO.print(OS, nullptr); +inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand &MO) { + MO.print(OS); return OS; } - // See friend declaration above. This additional declaration is required in - // order to compile LLVM with IBM xlC compiler. - hash_code hash_value(const MachineOperand &MO); -} // End llvm namespace +// See friend declaration above. This additional declaration is required in +// order to compile LLVM with IBM xlC compiler. +hash_code hash_value(const MachineOperand &MO); +} // namespace llvm #endif diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 887752b6d389..2fdefbed37ce 100644 --- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -33,7 +33,7 @@ class DiagnosticInfoMIROptimization : public DiagnosticInfoOptimizationBase { const DiagnosticLocation &Loc, const MachineBasicBlock *MBB) : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, RemarkName, - *MBB->getParent()->getFunction(), Loc), + MBB->getParent()->getFunction(), Loc), MBB(MBB) {} /// MI-specific kinds of diagnostic Arguments. @@ -159,8 +159,8 @@ class MachineOptimizationRemarkEmitter { /// (1) to filter trivial false positives or (2) to provide more context so /// that non-trivial false positives can be quickly detected by the user. bool allowExtraAnalysis(StringRef PassName) const { - return (MF.getFunction()->getContext().getDiagnosticsOutputFile() || - MF.getFunction()->getContext() + return (MF.getFunction().getContext().getDiagnosticsOutputFile() || + MF.getFunction().getContext() .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); } @@ -172,8 +172,8 @@ class MachineOptimizationRemarkEmitter { // remarks enabled. We can't currently check whether remarks are requested // for the calling pass since that requires actually building the remark. - if (MF.getFunction()->getContext().getDiagnosticsOutputFile() || - MF.getFunction()->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + if (MF.getFunction().getContext().getDiagnosticsOutputFile() || + MF.getFunction().getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { auto R = RemarkBuilder(); emit((DiagnosticInfoOptimizationBase &)R); } diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index be9b89eb77ef..0c1a774f81e7 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -84,14 +84,15 @@ class MachineRegisterInfo { /// all registers that were disabled are removed from the list. SmallVector UpdatedCSRs; - /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and hint type - /// pair making up the allocation hint. Hint type is target specific except - /// for the value 0 which means the second value of the pair is the preferred - /// register for allocation. For example, if the hint is <0, 1024>, it means - /// the allocator should prefer the physical register allocated to the virtual - /// register of the hint. - IndexedMap, VirtReg2IndexFunctor> RegAllocHints; + /// RegAllocHints - This vector records register allocation hints for + /// virtual registers. For each virtual register, it keeps a pair of hint + /// type and hints vector making up the allocation hints. Only the first + /// hint may be target specific, and in that case this is reflected by the + /// first member of the pair being non-zero. If the hinted register is + /// virtual, it means the allocator should prefer the physical register + /// allocated to it if any. + IndexedMap>, + VirtReg2IndexFunctor> RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -547,12 +548,16 @@ class MachineRegisterInfo { /// except that it also changes any definitions of the register as well. /// /// Note that it is usually necessary to first constrain ToReg's register - /// class to match the FromReg constraints using: + /// class and register bank to match the FromReg constraints using one of the + /// methods: /// /// constrainRegClass(ToReg, getRegClass(FromReg)) + /// constrainRegAttrs(ToReg, FromReg) + /// RegisterBankInfo::constrainGenericRegister(ToReg, + /// *MRI.getRegClass(FromReg), MRI) /// - /// That function will return NULL if the virtual registers have incompatible - /// constraints. + /// These functions will return a falsy result if the virtual registers have + /// incompatible constraints. /// /// Note that if ToReg is a physical register the function will replace and /// apply sub registers to ToReg in order to obtain a final/proper physical @@ -652,10 +657,30 @@ class MachineRegisterInfo { /// new register class, or NULL if no such class exists. /// This should only be used when the constraint is known to be trivial, like /// GR32 -> GR32_NOSP. Beware of increasing register pressure. + /// + /// \note Assumes that the register has a register class assigned. + /// Use RegisterBankInfo::constrainGenericRegister in GlobalISel's + /// InstructionSelect pass and constrainRegAttrs in every other pass, + /// including non-select passes of GlobalISel, instead. const TargetRegisterClass *constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs = 0); + /// Constrain the register class or the register bank of the virtual register + /// \p Reg to be a common subclass and a common bank of both registers + /// provided respectively. Do nothing if any of the attributes (classes, + /// banks, or low-level types) of the registers are deemed incompatible, or if + /// the resulting register will have a class smaller than before and of size + /// less than \p MinNumRegs. Return true if such register attributes exist, + /// false otherwise. + /// + /// \note Assumes that each register has either a low-level type or a class + /// assigned, but not both. Use this method instead of constrainRegClass and + /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG + /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively. + bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, + unsigned MinNumRegs = 0); + /// recomputeRegClass - Try to find a legal super-class of Reg's register /// class that still satisfies the constraints from the instructions using /// Reg. Returns true if Reg was upgraded. @@ -706,35 +731,61 @@ class MachineRegisterInfo { void clearVirtRegs(); /// setRegAllocationHint - Specify a register allocation hint for the - /// specified virtual register. + /// specified virtual register. This is typically used by target, and in case + /// of an earlier hint it will be overwritten. void setRegAllocationHint(unsigned VReg, unsigned Type, unsigned PrefReg) { assert(TargetRegisterInfo::isVirtualRegister(VReg)); RegAllocHints[VReg].first = Type; - RegAllocHints[VReg].second = PrefReg; + RegAllocHints[VReg].second.clear(); + RegAllocHints[VReg].second.push_back(PrefReg); } - /// Specify the preferred register allocation hint for the specified virtual - /// register. + /// addRegAllocationHint - Add a register allocation hint to the hints + /// vector for VReg. + void addRegAllocationHint(unsigned VReg, unsigned PrefReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + RegAllocHints[VReg].second.push_back(PrefReg); + } + + /// Specify the preferred (target independent) register allocation hint for + /// the specified virtual register. void setSimpleHint(unsigned VReg, unsigned PrefReg) { setRegAllocationHint(VReg, /*Type=*/0, PrefReg); } + void clearSimpleHint(unsigned VReg) { + assert (RegAllocHints[VReg].first == 0 && + "Expected to clear a non-target hint!"); + RegAllocHints[VReg].second.clear(); + } + /// getRegAllocationHint - Return the register allocation hint for the - /// specified virtual register. + /// specified virtual register. If there are many hints, this returns the + /// one with the greatest weight. std::pair getRegAllocationHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); - return RegAllocHints[VReg]; + unsigned BestHint = (RegAllocHints[VReg].second.size() ? + RegAllocHints[VReg].second[0] : 0); + return std::pair(RegAllocHints[VReg].first, BestHint); } - /// getSimpleHint - Return the preferred register allocation hint, or 0 if a - /// standard simple hint (Type == 0) is not set. + /// getSimpleHint - same as getRegAllocationHint except it will only return + /// a target independent hint. unsigned getSimpleHint(unsigned VReg) const { assert(TargetRegisterInfo::isVirtualRegister(VReg)); std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? 0 : Hint.second; } + /// getRegAllocationHints - Return a reference to the vector of all + /// register allocation hints for VReg. + const std::pair> + &getRegAllocationHints(unsigned VReg) const { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + return RegAllocHints[VReg]; + } + /// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the /// specified register as undefined which causes the DBG_VALUE to be /// deleted during LiveDebugVariables analysis. diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h index 0bdb38bfcbec..b452684757f6 100644 --- a/include/llvm/CodeGen/MachineValueType.h +++ b/include/llvm/CodeGen/MachineValueType.h @@ -64,80 +64,81 @@ namespace llvm { v16i1 = 18, // 16 x i1 v32i1 = 19, // 32 x i1 v64i1 = 20, // 64 x i1 - v512i1 = 21, // 512 x i1 - v1024i1 = 22, // 1024 x i1 - - v1i8 = 23, // 1 x i8 - v2i8 = 24, // 2 x i8 - v4i8 = 25, // 4 x i8 - v8i8 = 26, // 8 x i8 - v16i8 = 27, // 16 x i8 - v32i8 = 28, // 32 x i8 - v64i8 = 29, // 64 x i8 - v128i8 = 30, //128 x i8 - v256i8 = 31, //256 x i8 - - v1i16 = 32, // 1 x i16 - v2i16 = 33, // 2 x i16 - v4i16 = 34, // 4 x i16 - v8i16 = 35, // 8 x i16 - v16i16 = 36, // 16 x i16 - v32i16 = 37, // 32 x i16 - v64i16 = 38, // 64 x i16 - v128i16 = 39, //128 x i16 - - v1i32 = 40, // 1 x i32 - v2i32 = 41, // 2 x i32 - v4i32 = 42, // 4 x i32 - v8i32 = 43, // 8 x i32 - v16i32 = 44, // 16 x i32 - v32i32 = 45, // 32 x i32 - v64i32 = 46, // 64 x i32 - - v1i64 = 47, // 1 x i64 - v2i64 = 48, // 2 x i64 - v4i64 = 49, // 4 x i64 - v8i64 = 50, // 8 x i64 - v16i64 = 51, // 16 x i64 - v32i64 = 52, // 32 x i64 - - v1i128 = 53, // 1 x i128 + v128i1 = 21, // 128 x i1 + v512i1 = 22, // 512 x i1 + v1024i1 = 23, // 1024 x i1 + + v1i8 = 24, // 1 x i8 + v2i8 = 25, // 2 x i8 + v4i8 = 26, // 4 x i8 + v8i8 = 27, // 8 x i8 + v16i8 = 28, // 16 x i8 + v32i8 = 29, // 32 x i8 + v64i8 = 30, // 64 x i8 + v128i8 = 31, //128 x i8 + v256i8 = 32, //256 x i8 + + v1i16 = 33, // 1 x i16 + v2i16 = 34, // 2 x i16 + v4i16 = 35, // 4 x i16 + v8i16 = 36, // 8 x i16 + v16i16 = 37, // 16 x i16 + v32i16 = 38, // 32 x i16 + v64i16 = 39, // 64 x i16 + v128i16 = 40, //128 x i16 + + v1i32 = 41, // 1 x i32 + v2i32 = 42, // 2 x i32 + v4i32 = 43, // 4 x i32 + v8i32 = 44, // 8 x i32 + v16i32 = 45, // 16 x i32 + v32i32 = 46, // 32 x i32 + v64i32 = 47, // 64 x i32 + + v1i64 = 48, // 1 x i64 + v2i64 = 49, // 2 x i64 + v4i64 = 50, // 4 x i64 + v8i64 = 51, // 8 x i64 + v16i64 = 52, // 16 x i64 + v32i64 = 53, // 32 x i64 + + v1i128 = 54, // 1 x i128 // Scalable integer types - nxv1i1 = 54, // n x 1 x i1 - nxv2i1 = 55, // n x 2 x i1 - nxv4i1 = 56, // n x 4 x i1 - nxv8i1 = 57, // n x 8 x i1 - nxv16i1 = 58, // n x 16 x i1 - nxv32i1 = 59, // n x 32 x i1 - - nxv1i8 = 60, // n x 1 x i8 - nxv2i8 = 61, // n x 2 x i8 - nxv4i8 = 62, // n x 4 x i8 - nxv8i8 = 63, // n x 8 x i8 - nxv16i8 = 64, // n x 16 x i8 - nxv32i8 = 65, // n x 32 x i8 - - nxv1i16 = 66, // n x 1 x i16 - nxv2i16 = 67, // n x 2 x i16 - nxv4i16 = 68, // n x 4 x i16 - nxv8i16 = 69, // n x 8 x i16 - nxv16i16 = 70, // n x 16 x i16 - nxv32i16 = 71, // n x 32 x i16 - - nxv1i32 = 72, // n x 1 x i32 - nxv2i32 = 73, // n x 2 x i32 - nxv4i32 = 74, // n x 4 x i32 - nxv8i32 = 75, // n x 8 x i32 - nxv16i32 = 76, // n x 16 x i32 - nxv32i32 = 77, // n x 32 x i32 - - nxv1i64 = 78, // n x 1 x i64 - nxv2i64 = 79, // n x 2 x i64 - nxv4i64 = 80, // n x 4 x i64 - nxv8i64 = 81, // n x 8 x i64 - nxv16i64 = 82, // n x 16 x i64 - nxv32i64 = 83, // n x 32 x i64 + nxv1i1 = 55, // n x 1 x i1 + nxv2i1 = 56, // n x 2 x i1 + nxv4i1 = 57, // n x 4 x i1 + nxv8i1 = 58, // n x 8 x i1 + nxv16i1 = 59, // n x 16 x i1 + nxv32i1 = 60, // n x 32 x i1 + + nxv1i8 = 61, // n x 1 x i8 + nxv2i8 = 62, // n x 2 x i8 + nxv4i8 = 63, // n x 4 x i8 + nxv8i8 = 64, // n x 8 x i8 + nxv16i8 = 65, // n x 16 x i8 + nxv32i8 = 66, // n x 32 x i8 + + nxv1i16 = 67, // n x 1 x i16 + nxv2i16 = 68, // n x 2 x i16 + nxv4i16 = 69, // n x 4 x i16 + nxv8i16 = 70, // n x 8 x i16 + nxv16i16 = 71, // n x 16 x i16 + nxv32i16 = 72, // n x 32 x i16 + + nxv1i32 = 73, // n x 1 x i32 + nxv2i32 = 74, // n x 2 x i32 + nxv4i32 = 75, // n x 4 x i32 + nxv8i32 = 76, // n x 8 x i32 + nxv16i32 = 77, // n x 16 x i32 + nxv32i32 = 78, // n x 32 x i32 + + nxv1i64 = 79, // n x 1 x i64 + nxv2i64 = 80, // n x 2 x i64 + nxv4i64 = 81, // n x 4 x i64 + nxv8i64 = 82, // n x 8 x i64 + nxv16i64 = 83, // n x 16 x i64 + nxv32i64 = 84, // n x 32 x i64 FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, @@ -145,31 +146,31 @@ namespace llvm { FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, - v2f16 = 84, // 2 x f16 - v4f16 = 85, // 4 x f16 - v8f16 = 86, // 8 x f16 - v1f32 = 87, // 1 x f32 - v2f32 = 88, // 2 x f32 - v4f32 = 89, // 4 x f32 - v8f32 = 90, // 8 x f32 - v16f32 = 91, // 16 x f32 - v1f64 = 92, // 1 x f64 - v2f64 = 93, // 2 x f64 - v4f64 = 94, // 4 x f64 - v8f64 = 95, // 8 x f64 - - nxv2f16 = 96, // n x 2 x f16 - nxv4f16 = 97, // n x 4 x f16 - nxv8f16 = 98, // n x 8 x f16 - nxv1f32 = 99, // n x 1 x f32 - nxv2f32 = 100, // n x 2 x f32 - nxv4f32 = 101, // n x 4 x f32 - nxv8f32 = 102, // n x 8 x f32 - nxv16f32 = 103, // n x 16 x f32 - nxv1f64 = 104, // n x 1 x f64 - nxv2f64 = 105, // n x 2 x f64 - nxv4f64 = 106, // n x 4 x f64 - nxv8f64 = 107, // n x 8 x f64 + v2f16 = 85, // 2 x f16 + v4f16 = 86, // 4 x f16 + v8f16 = 87, // 8 x f16 + v1f32 = 88, // 1 x f32 + v2f32 = 89, // 2 x f32 + v4f32 = 90, // 4 x f32 + v8f32 = 91, // 8 x f32 + v16f32 = 92, // 16 x f32 + v1f64 = 93, // 1 x f64 + v2f64 = 94, // 2 x f64 + v4f64 = 95, // 4 x f64 + v8f64 = 96, // 8 x f64 + + nxv2f16 = 97, // n x 2 x f16 + nxv4f16 = 98, // n x 4 x f16 + nxv8f16 = 99, // n x 8 x f16 + nxv1f32 = 100, // n x 1 x f32 + nxv2f32 = 101, // n x 2 x f32 + nxv4f32 = 102, // n x 4 x f32 + nxv8f32 = 103, // n x 8 x f32 + nxv16f32 = 104, // n x 16 x f32 + nxv1f64 = 105, // n x 1 x f64 + nxv2f64 = 106, // n x 2 x f64 + nxv4f64 = 107, // n x 4 x f64 + nxv8f64 = 108, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, LAST_FP_VECTOR_VALUETYPE = nxv8f64, @@ -180,18 +181,18 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 108, // This is an X86 MMX value + x86mmx = 109, // This is an X86 MMX value - Glue = 109, // This glues nodes together during pre-RA sched + Glue = 110, // This glues nodes together during pre-RA sched - isVoid = 110, // This has no value + isVoid = 111, // This has no value - Untyped = 111, // This value takes a register, but has + Untyped = 112, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 112, // This always remains at the end of the list. + LAST_VALUETYPE = 113, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors @@ -346,10 +347,11 @@ namespace llvm { /// Return true if this is a 128-bit vector type. bool is128BitVector() const { - return (SimpleTy == MVT::v16i8 || SimpleTy == MVT::v8i16 || - SimpleTy == MVT::v4i32 || SimpleTy == MVT::v2i64 || - SimpleTy == MVT::v1i128 || SimpleTy == MVT::v8f16 || - SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64); + return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 || + SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 || + SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 || + SimpleTy == MVT::v8f16 || SimpleTy == MVT::v4f32 || + SimpleTy == MVT::v2f64); } /// Return true if this is a 256-bit vector type. @@ -420,6 +422,7 @@ namespace llvm { case v16i1: case v32i1: case v64i1: + case v128i1: case v512i1: case v1024i1: case nxv1i1: @@ -517,6 +520,7 @@ namespace llvm { case v1024i1: return 1024; case v512i1: return 512; case v256i8: return 256; + case v128i1: case v128i8: case v128i16: return 128; case v64i1: @@ -690,6 +694,7 @@ namespace llvm { case f128: case ppcf128: case i128: + case v128i1: case v16i8: case v8i16: case v4i32: @@ -828,6 +833,7 @@ namespace llvm { if (NumElements == 16) return MVT::v16i1; if (NumElements == 32) return MVT::v32i1; if (NumElements == 64) return MVT::v64i1; + if (NumElements == 128) return MVT::v128i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; break; diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 4370d116e08c..c3ac36cf82eb 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -212,6 +212,10 @@ namespace llvm { /// into tails of their predecessors. extern char &TailDuplicateID; + /// Duplicate blocks with unconditional branches into tails of their + /// predecessors. Variant that works before register allocation. + extern char &EarlyTailDuplicateID; + /// MachineTraceMetrics - This pass computes critical path and CPU resource /// usage in an ensemble of traces. extern char &MachineTraceMetricsID; @@ -269,9 +273,13 @@ namespace llvm { /// memory operations. extern char &ImplicitNullChecksID; - /// MachineLICM - This pass performs LICM on machine instructions. + /// This pass performs loop invariant code motion on machine instructions. extern char &MachineLICMID; + /// This pass performs loop invariant code motion on machine instructions. + /// This variant works before register allocation. \see MachineLICMID. + extern char &EarlyMachineLICMID; + /// MachineSinking - This pass performs sinking on machine instructions. extern char &MachineSinkingID; @@ -417,6 +425,12 @@ namespace llvm { // This pass expands memcmp() to load/stores. FunctionPass *createExpandMemCmpPass(); + /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp + FunctionPass *createBreakFalseDeps(); + + // This pass expands indirectbr instructions. + FunctionPass *createIndirectBrExpandPass(); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/ReachingDefAnalysis.h b/include/llvm/CodeGen/ReachingDefAnalysis.h new file mode 100644 index 000000000000..3c2a9d03dda5 --- /dev/null +++ b/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -0,0 +1,118 @@ +//==--- llvm/CodeGen/ReachingDefAnalysis.h - Reaching Def Analysis -*- C++ -*---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Reaching Defs Analysis pass. +/// +/// This pass tracks for each instruction what is the “closest” reaching def of +/// a given register. It is used by BreakFalseDeps (for clearance calculation) +/// and ExecutionDomainFix (for arbitrating conflicting domains). +/// +/// Note that this is different from the usual definition notion of liveness. +/// The CPU doesn't care whether or not we consider a register killed. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REACHINGDEFSANALYSIS_H +#define LLVM_CODEGEN_REACHINGDEFSANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineInstr; + +/// This class provides the reaching def analysis. +class ReachingDefAnalysis : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetRegisterInfo *TRI; + unsigned NumRegUnits; + /// Instruction that defined each register, relative to the beginning of the + /// current basic block. When a LiveRegsDefInfo is used to represent a + /// live-out register, this value is relative to the end of the basic block, + /// so it will be a negative number. + using LiveRegsDefInfo = std::vector; + LiveRegsDefInfo LiveRegs; + + /// Keeps clearance information for all registers. Note that this + /// is different from the usual definition notion of liveness. The CPU + /// doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = SmallVector; + OutRegsInfoMap MBBOutRegsInfos; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// Maps instructions to their instruction Ids, relative to the begining of + /// their basic blocks. + DenseMap InstIds; + + /// All reaching defs of a given RegUnit for a given MBB. + using MBBRegUnitDefs = SmallVector; + /// All reaching defs of all reg units for a given MBB + using MBBDefsInfo = std::vector; + /// All reaching defs of all reg units for a all MBBs + using MBBReachingDefsInfo = SmallVector; + MBBReachingDefsInfo MBBReachingDefs; + + /// Default values are 'nothing happened a long time ago'. + const int ReachingDedDefaultVal = -(1 << 20); + +public: + static char ID; // Pass identification, replacement for typeid + + ReachingDefAnalysis() : MachineFunctionPass(ID) { + initializeReachingDefAnalysisPass(*PassRegistry::getPassRegistry()); + } + void releaseMemory() override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + /// Provides the instruction id of the closest reaching def instruction of + /// PhysReg that reaches MI, relative to the begining of MI's basic block. + int getReachingDef(MachineInstr *MI, int PhysReg); + + /// Provides the clearance - the number of instructions since the closest + /// reaching def instuction of PhysReg that reaches MI. + int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + +private: + /// Set up LiveRegs by merging predecessor live-out values. + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update live-out values. + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Process he given basic block. + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + + /// Update def-ages for registers defined by MI. + /// Also break dependencies on partial defs and undef uses. + void processDefs(MachineInstr *); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_REACHINGDEFSANALYSIS_H diff --git a/include/llvm/CodeGen/RuntimeLibcalls.def b/include/llvm/CodeGen/RuntimeLibcalls.def index e042ae982e86..7ed90d959f01 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.def +++ b/include/llvm/CodeGen/RuntimeLibcalls.def @@ -130,26 +130,51 @@ HANDLE_LIBCALL(LOG_F64, "log") HANDLE_LIBCALL(LOG_F80, "logl") HANDLE_LIBCALL(LOG_F128, "logl") HANDLE_LIBCALL(LOG_PPCF128, "logl") +HANDLE_LIBCALL(LOG_FINITE_F32, "__logf_finite") +HANDLE_LIBCALL(LOG_FINITE_F64, "__log_finite") +HANDLE_LIBCALL(LOG_FINITE_F80, "__logl_finite") +HANDLE_LIBCALL(LOG_FINITE_F128, "__logl_finite") +HANDLE_LIBCALL(LOG_FINITE_PPCF128, "__logl_finite") HANDLE_LIBCALL(LOG2_F32, "log2f") HANDLE_LIBCALL(LOG2_F64, "log2") HANDLE_LIBCALL(LOG2_F80, "log2l") HANDLE_LIBCALL(LOG2_F128, "log2l") HANDLE_LIBCALL(LOG2_PPCF128, "log2l") +HANDLE_LIBCALL(LOG2_FINITE_F32, "__log2f_finite") +HANDLE_LIBCALL(LOG2_FINITE_F64, "__log2_finite") +HANDLE_LIBCALL(LOG2_FINITE_F80, "__log2l_finite") +HANDLE_LIBCALL(LOG2_FINITE_F128, "__log2l_finite") +HANDLE_LIBCALL(LOG2_FINITE_PPCF128, "__log2l_finite") HANDLE_LIBCALL(LOG10_F32, "log10f") HANDLE_LIBCALL(LOG10_F64, "log10") HANDLE_LIBCALL(LOG10_F80, "log10l") HANDLE_LIBCALL(LOG10_F128, "log10l") HANDLE_LIBCALL(LOG10_PPCF128, "log10l") +HANDLE_LIBCALL(LOG10_FINITE_F32, "__log10f_finite") +HANDLE_LIBCALL(LOG10_FINITE_F64, "__log10_finite") +HANDLE_LIBCALL(LOG10_FINITE_F80, "__log10l_finite") +HANDLE_LIBCALL(LOG10_FINITE_F128, "__log10l_finite") +HANDLE_LIBCALL(LOG10_FINITE_PPCF128, "__log10l_finite") HANDLE_LIBCALL(EXP_F32, "expf") HANDLE_LIBCALL(EXP_F64, "exp") HANDLE_LIBCALL(EXP_F80, "expl") HANDLE_LIBCALL(EXP_F128, "expl") HANDLE_LIBCALL(EXP_PPCF128, "expl") +HANDLE_LIBCALL(EXP_FINITE_F32, "__expf_finite") +HANDLE_LIBCALL(EXP_FINITE_F64, "__exp_finite") +HANDLE_LIBCALL(EXP_FINITE_F80, "__expl_finite") +HANDLE_LIBCALL(EXP_FINITE_F128, "__expl_finite") +HANDLE_LIBCALL(EXP_FINITE_PPCF128, "__expl_finite") HANDLE_LIBCALL(EXP2_F32, "exp2f") HANDLE_LIBCALL(EXP2_F64, "exp2") HANDLE_LIBCALL(EXP2_F80, "exp2l") HANDLE_LIBCALL(EXP2_F128, "exp2l") HANDLE_LIBCALL(EXP2_PPCF128, "exp2l") +HANDLE_LIBCALL(EXP2_FINITE_F32, "__exp2f_finite") +HANDLE_LIBCALL(EXP2_FINITE_F64, "__exp2_finite") +HANDLE_LIBCALL(EXP2_FINITE_F80, "__exp2l_finite") +HANDLE_LIBCALL(EXP2_FINITE_F128, "__exp2l_finite") +HANDLE_LIBCALL(EXP2_FINITE_PPCF128, "__exp2l_finite") HANDLE_LIBCALL(SIN_F32, "sinf") HANDLE_LIBCALL(SIN_F64, "sin") HANDLE_LIBCALL(SIN_F80, "sinl") @@ -165,11 +190,18 @@ HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) HANDLE_LIBCALL(SINCOS_F128, nullptr) HANDLE_LIBCALL(SINCOS_PPCF128, nullptr) +HANDLE_LIBCALL(SINCOS_STRET_F32, nullptr) +HANDLE_LIBCALL(SINCOS_STRET_F64, nullptr) HANDLE_LIBCALL(POW_F32, "powf") HANDLE_LIBCALL(POW_F64, "pow") HANDLE_LIBCALL(POW_F80, "powl") HANDLE_LIBCALL(POW_F128, "powl") HANDLE_LIBCALL(POW_PPCF128, "powl") +HANDLE_LIBCALL(POW_FINITE_F32, "__powf_finite") +HANDLE_LIBCALL(POW_FINITE_F64, "__pow_finite") +HANDLE_LIBCALL(POW_FINITE_F80, "__powl_finite") +HANDLE_LIBCALL(POW_FINITE_F128, "__powl_finite") +HANDLE_LIBCALL(POW_FINITE_PPCF128, "__powl_finite") HANDLE_LIBCALL(CEIL_F32, "ceilf") HANDLE_LIBCALL(CEIL_F64, "ceil") HANDLE_LIBCALL(CEIL_F80, "ceill") @@ -219,6 +251,7 @@ HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") // Conversion HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") +HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2") HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2") HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") @@ -235,6 +268,7 @@ HANDLE_LIBCALL(FPROUND_PPCF128_F32, "__gcc_qtos") HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2") HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2") HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod") +HANDLE_LIBCALL(FPROUND_F128_F80, "__trunctfxf2") HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi") HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi") HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti") @@ -334,6 +368,7 @@ HANDLE_LIBCALL(O_PPCF128, "__gcc_qunord") HANDLE_LIBCALL(MEMCPY, "memcpy") HANDLE_LIBCALL(MEMMOVE, "memmove") HANDLE_LIBCALL(MEMSET, "memset") +HANDLE_LIBCALL(BZERO, nullptr) // Element-wise unordered-atomic memory of different sizes HANDLE_LIBCALL(MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, "__llvm_memcpy_element_unordered_atomic_1") diff --git a/include/llvm/CodeGen/SDNodeProperties.td b/include/llvm/CodeGen/SDNodeProperties.td new file mode 100644 index 000000000000..83bbab2fdc8d --- /dev/null +++ b/include/llvm/CodeGen/SDNodeProperties.td @@ -0,0 +1,34 @@ +//===- SDNodeProperties.td - Common code for DAG isels ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class SDNodeProperty; + +// Selection DAG Pattern Operations +class SDPatternOperator { + list Properties = []; +} + +//===----------------------------------------------------------------------===// +// Selection DAG Node Properties. +// +// Note: These are hard coded into tblgen. +// +def SDNPCommutative : SDNodeProperty; // X op Y == Y op X +def SDNPAssociative : SDNodeProperty; // (X op Y) op Z == X op (Y op Z) +def SDNPHasChain : SDNodeProperty; // R/W chain operand and result +def SDNPOutGlue : SDNodeProperty; // Write a flag result +def SDNPInGlue : SDNodeProperty; // Read a flag operand +def SDNPOptInGlue : SDNodeProperty; // Optionally read a flag operand +def SDNPMayStore : SDNodeProperty; // May write to memory, sets 'mayStore'. +def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'. +def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'. +def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand +def SDNPVariadic : SDNodeProperty; // Node has variable arguments. +def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match +def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index ba3511d52624..28a34597401f 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -73,6 +73,7 @@ class OptimizationRemarkEmitter; class SDDbgValue; class SelectionDAG; class SelectionDAGTargetInfo; +class TargetLibraryInfo; class TargetLowering; class TargetMachine; class TargetSubtargetInfo; @@ -210,6 +211,7 @@ class SelectionDAG { const TargetMachine &TM; const SelectionDAGTargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; + const TargetLibraryInfo *LibInfo = nullptr; MachineFunction *MF; Pass *SDAGISelPass = nullptr; LLVMContext *Context; @@ -376,7 +378,7 @@ class SelectionDAG { /// Prepare this SelectionDAG to process code in the given MachineFunction. void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr); + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo); /// Clear state and free memory necessary to make this /// SelectionDAG ready to process a new block. @@ -389,6 +391,7 @@ class SelectionDAG { const TargetMachine &getTarget() const { return TM; } const TargetSubtargetInfo &getSubtarget() const { return MF->getSubtarget(); } const TargetLowering &getTargetLoweringInfo() const { return *TLI; } + const TargetLibraryInfo &getLibInfo() const { return *LibInfo; } const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; } LLVMContext *getContext() const {return Context; } OptimizationRemarkEmitter &getORE() const { return *ORE; } @@ -796,6 +799,24 @@ class SelectionDAG { /// \brief Create a logical NOT operation as (XOR Val, BooleanOne). SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT); + /// \brief Create an add instruction with appropriate flags when used for + /// addressing some offset of an object. i.e. if a load is split into multiple + /// components, create an add nuw from the base pointer to the offset. + SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, int64_t Offset) { + EVT VT = Op.getValueType(); + return getObjectPtrOffset(SL, Op, getConstant(Offset, SL, VT)); + } + + SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Op, SDValue Offset) { + EVT VT = Op.getValueType(); + + // The object itself can't wrap around the address space, so it shouldn't be + // possible for the adds of the offsets to the split parts to overflow. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + return getNode(ISD::ADD, SL, VT, Op, Offset, Flags); + } + /// Return a new CALLSEQ_START node, that starts new call frame, in which /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and /// OutSize specifies part of the frame set up prior to the sequence. @@ -970,11 +991,14 @@ class SelectionDAG { /// result and takes a list of operands. Opcode may be INTRINSIC_VOID, /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not /// less than FIRST_TARGET_MEMORY_OPCODE. - SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, - ArrayRef Ops, EVT MemVT, - MachinePointerInfo PtrInfo, unsigned Align = 0, - bool Vol = false, bool ReadMem = true, - bool WriteMem = true, unsigned Size = 0); + SDValue getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, + ArrayRef Ops, EVT MemVT, + MachinePointerInfo PtrInfo, + unsigned Align = 0, + MachineMemOperand::Flags Flags + = MachineMemOperand::MOLoad | MachineMemOperand::MOStore, + unsigned Size = 0); SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, EVT MemVT, @@ -1229,7 +1253,7 @@ class SelectionDAG { void ReplaceAllUsesWith(SDNode *From, const SDValue *To); /// Replace any uses of From with To, leaving - /// uses of other values produced by From.Val alone. + /// uses of other values produced by From.getNode() alone. void ReplaceAllUsesOfValueWith(SDValue From, SDValue To); /// Like ReplaceAllUsesOfValueWith, but for multiple values at once. diff --git a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 18e4c7a83def..580606441a9d 100644 --- a/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -56,7 +56,7 @@ class BaseIndexOffset { int64_t &Off); /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr, const SelectionDAG &DAG); + static BaseIndexOffset match(LSBaseSDNode *N, const SelectionDAG &DAG); }; } // end namespace llvm diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 053ebaa1ddaf..de6849a1eae1 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -276,6 +276,8 @@ class SelectionDAGISel : public MachineFunctionPass { return false; } + bool isOrEquivalentToAdd(const SDNode *N) const; + private: // Calls to these functions are generated by tblgen. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 7de2e766d521..7eb4dbb4e8f5 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -189,8 +189,8 @@ class SDValue { inline bool isUndef() const; inline unsigned getMachineOpcode() const; inline const DebugLoc &getDebugLoc() const; - inline void dump() const; - inline void dumpr() const; + inline void dump(const SelectionDAG *G = nullptr) const; + inline void dumpr(const SelectionDAG *G = nullptr) const; /// Return true if this operand (which must be a chain) reaches the /// specified operand without crossing any side-effecting instructions. @@ -1089,12 +1089,12 @@ inline const DebugLoc &SDValue::getDebugLoc() const { return Node->getDebugLoc(); } -inline void SDValue::dump() const { - return Node->dump(); +inline void SDValue::dump(const SelectionDAG *G) const { + return Node->dump(G); } -inline void SDValue::dumpr() const { - return Node->dumpr(); +inline void SDValue::dumpr(const SelectionDAG *G) const { + return Node->dumpr(G); } // Define inline functions from the SDUse class. @@ -2120,13 +2120,14 @@ class MaskedGatherScatterSDNode : public MemSDNode { : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} // In the both nodes address is Op1, mask is Op2: - // MaskedGatherSDNode (Chain, src0, mask, base, index), src0 is a passthru value - // MaskedScatterSDNode (Chain, value, mask, base, index) + // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) + // MaskedScatterSDNode (Chain, value, mask, base, index, scale) // Mask is a vector of i1 elements const SDValue &getBasePtr() const { return getOperand(3); } const SDValue &getIndex() const { return getOperand(4); } const SDValue &getMask() const { return getOperand(2); } const SDValue &getValue() const { return getOperand(1); } + const SDValue &getScale() const { return getOperand(5); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MGATHER || diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h index 3c3ba886f4b3..be6562c85f2e 100644 --- a/include/llvm/CodeGen/TailDuplicator.h +++ b/include/llvm/CodeGen/TailDuplicator.h @@ -17,13 +17,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/CommandLine.h" #include #include diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h index 53d389d9917a..61f1cf07bcf2 100644 --- a/include/llvm/CodeGen/TargetFrameLowering.h +++ b/include/llvm/CodeGen/TargetFrameLowering.h @@ -330,12 +330,12 @@ class TargetFrameLowering { /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function *F) { - if (!F->hasLocalLinkage() || F->hasAddressTaken() || - !F->hasFnAttribute(Attribute::NoRecurse)) + static bool isSafeForNoCSROpt(const Function &F) { + if (!F.hasLocalLinkage() || F.hasAddressTaken() || + !F.hasFnAttribute(Attribute::NoRecurse)) return false; // Function should not be optimized as tail call. - for (const User *U : F->users()) + for (const User *U : F.users()) if (auto CS = ImmutableCallSite(U)) if (CS.isTailCall()) return false; diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 6770e503e615..22b0225f7905 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/None.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" @@ -39,6 +38,7 @@ namespace llvm { class DFAPacketizer; class InstrItineraryData; +class LiveIntervals; class LiveVariables; class MachineMemOperand; class MachineRegisterInfo; @@ -421,11 +421,12 @@ class TargetInstrInfo : public MCInstrInfo { /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of - /// the list is modeled as . - /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// the list is modeled as . Operands with the undef + /// flag are not added to this list. + /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: - /// - vreg1:sub1, sub0 - /// - vreg2<:0>, sub1 + /// - %1:sub1, sub0 + /// - %2<:0>, sub1 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -442,11 +443,12 @@ class TargetInstrInfo : public MCInstrInfo { /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI /// and \p DefIdx. /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. - /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: - /// - vreg1:sub1, sub0 + /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce: + /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike(). /// @@ -460,12 +462,13 @@ class TargetInstrInfo : public MCInstrInfo { /// and \p DefIdx. /// \p [out] BaseReg and \p [out] InsertedReg contain /// the equivalent inputs of INSERT_SUBREG. - /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: - /// - BaseReg: vreg0:sub0 - /// - InsertedReg: vreg1:sub1, sub3 + /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce: + /// - BaseReg: %0:sub0 + /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence - /// with the pair \p MI, \p DefIdx. False otherwise. + /// with the pair \p MI, \p DefIdx and the operand has no undef flag set. + /// False otherwise. /// /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike(). /// @@ -547,7 +550,7 @@ class TargetInstrInfo : public MCInstrInfo { /// Represents a predicate at the MachineFunction level. The control flow a /// MachineBranchPredicate represents is: /// - /// Reg = LHS `Predicate` RHS == ConditionDef + /// Reg = LHS `Predicate` RHS == ConditionDef /// if Reg then goto TrueDest else goto FalseDest /// struct MachineBranchPredicate { @@ -1432,7 +1435,7 @@ class TargetInstrInfo : public MCInstrInfo { /// For example, AVX instructions may copy part of a register operand into /// the unused high bits of the destination register. /// - /// vcvtsi2sdq %rax, %xmm0, %xmm14 + /// vcvtsi2sdq %rax, undef %xmm0, %xmm14 /// /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a /// false dependence on any previous write to %xmm0. @@ -1607,11 +1610,18 @@ class TargetInstrInfo : public MCInstrInfo { enum MachineOutlinerInstrType { Legal, Illegal, Invisible }; /// Returns how or if \p MI should be outlined. - virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const { + virtual MachineOutlinerInstrType + getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningType!"); } + /// \brief Returns target-defined flags defining properties of the MBB for + /// the outliner. + virtual unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { + return 0x0; + } + /// Insert a custom epilogue for outlined functions. /// This may be empty, in which case no epilogue or return statement will be /// emitted. diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 4210f58ddb03..7e3073ea047f 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -702,15 +702,16 @@ class TargetLoweringBase { struct IntrinsicInfo { unsigned opc = 0; // target opcode EVT memVT; // memory VT - const Value* ptrVal = nullptr; // value representing memory location + + // value representing memory location + PointerUnion ptrVal; + int offset = 0; // offset off of ptrVal unsigned size = 0; // the size of the memory location // (taken from memVT if zero) unsigned align = 1; // alignment - bool vol = false; // is volatile? - bool readMem = false; // reads memory? - bool writeMem = false; // writes memory? + MachineMemOperand::Flags flags = MachineMemOperand::MONone; IntrinsicInfo() = default; }; @@ -719,6 +720,7 @@ class TargetLoweringBase { /// true and store the intrinsic information into the IntrinsicInfo that was /// passed to the function. virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, + MachineFunction &, unsigned /*Intrinsic*/) const { return false; } @@ -798,7 +800,7 @@ class TargetLoweringBase { } /// Return true if lowering to a jump table is allowed. - bool areJTsAllowed(const Function *Fn) const { + virtual bool areJTsAllowed(const Function *Fn) const { if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") return false; @@ -822,8 +824,8 @@ class TargetLoweringBase { /// also combined within this function. Currently, the minimum size check is /// performed in findJumpTable() in SelectionDAGBuiler and /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. - bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, - uint64_t Range) const { + virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + uint64_t Range) const { const bool OptForSize = SI->getParent()->getParent()->optForSize(); const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); const unsigned MaxJumpTableSize = @@ -1200,6 +1202,18 @@ class TargetLoweringBase { return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; } + /// For memcmp expansion when the memcmp result is only compared equal or + /// not-equal to 0, allow up to this number of load pairs per block. As an + /// example, this may allow 'memcmp(a, b, 3) == 0' in a single block: + /// a0 = load2bytes &a[0] + /// b0 = load2bytes &b[0] + /// a2 = load1byte &a[2] + /// b2 = load1byte &b[2] + /// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 + virtual unsigned getMemcmpEqZeroLoadsPerBlock() const { + return 1; + } + /// \brief Get maximum # of store operations permitted for llvm.memmove /// /// This function returns the maximum number of store operations permitted @@ -1274,7 +1288,7 @@ class TargetLoweringBase { } /// Return lower limit for number of blocks in a jump table. - unsigned getMinimumJumpTableEntries() const; + virtual unsigned getMinimumJumpTableEntries() const; /// Return lower limit of the density in a jump table. unsigned getMinimumJumpTableDensity(bool OptForSize) const; @@ -1360,6 +1374,12 @@ class TargetLoweringBase { /// getIRStackGuard returns nullptr. virtual Value *getSDagStackGuard(const Module &M) const; + /// If this function returns true, stack protection checks should XOR the + /// frame pointer (or whichever pointer is used to address locals) into the + /// stack guard value before checking it. getIRStackGuard must return nullptr + /// if this returns true. + virtual bool useStackGuardXorFP() const { return false; } + /// If the target has a standard stack protection check function that /// performs validation and error handling, returns the function. Otherwise, /// returns nullptr. Must be previously inserted by insertSSPDeclarations. @@ -1434,6 +1454,9 @@ class TargetLoweringBase { /// require a more complex expansion. unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } + /// Whether the target supports unaligned atomic operations. + bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } + /// Whether AtomicExpandPass should automatically insert fences and reduce /// ordering for this atomic. This should be true for most architectures with /// weak memory ordering. Defaults to false. @@ -1839,11 +1862,16 @@ class TargetLoweringBase { MaxAtomicSizeInBitsSupported = SizeInBits; } - // Sets the minimum cmpxchg or ll/sc size supported by the backend. + /// Sets the minimum cmpxchg or ll/sc size supported by the backend. void setMinCmpXchgSizeInBits(unsigned SizeInBits) { MinCmpXchgSizeInBits = SizeInBits; } + /// Sets whether unaligned atomic operations are supported. + void setSupportsUnalignedAtomics(bool UnalignedSupported) { + SupportsUnalignedAtomics = UnalignedSupported; + } + public: //===--------------------------------------------------------------------===// // Addressing mode description hooks (used by LSR etc). @@ -2325,6 +2353,9 @@ class TargetLoweringBase { /// backend supports. unsigned MinCmpXchgSizeInBits; + /// This indicates if the target supports unaligned atomic operations. + bool SupportsUnalignedAtomics; + /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. unsigned StackPointerRegisterToSaveRestore; @@ -2410,7 +2441,7 @@ class TargetLoweringBase { PromoteToType; /// Stores the name each libcall. - const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL]; + const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; /// The ISD::CondCode that should be used to test the result of each of the /// comparison libcall against zero. @@ -2419,6 +2450,9 @@ class TargetLoweringBase { /// Stores the CallingConv that should be used for each libcall. CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; + /// Set default libcall names and calling conventions. + void InitLibcalls(const Triple &TT); + protected: /// Return true if the extension represented by \p I is free. /// \pre \p I is a sign, zero, or fp extension and @@ -3487,6 +3521,11 @@ class TargetLowering : public TargetLoweringBase { return false; } + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + llvm_unreachable("not implemented for this target"); + } + /// Lower TLS global address SDNode for target independent emulated TLS model. virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; diff --git a/include/llvm/CodeGen/TargetLoweringObjectFile.h b/include/llvm/CodeGen/TargetLoweringObjectFile.h index fe77c2954129..9877072012d9 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFile.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFile.h @@ -183,6 +183,9 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV) const {} + virtual void emitLinkerFlagsForUsed(raw_ostream &OS, + const GlobalValue *GV) const {} + protected: virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index c1ba32dd5de5..8ccb51c2af67 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -163,6 +163,9 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV) const override; + + void emitLinkerFlagsForUsed(raw_ostream &OS, + const GlobalValue *GV) const override; }; class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile { @@ -182,6 +185,10 @@ class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile { const Function &F) const override; void InitializeWasm(); + MCSection *getStaticCtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; + MCSection *getStaticDtorSection(unsigned Priority, + const MCSymbol *KeySym) const override; const MCExpr *lowerRelativeReference(const GlobalValue *LHS, const GlobalValue *RHS, diff --git a/include/llvm/CodeGen/TargetOpcodes.def b/include/llvm/CodeGen/TargetOpcodes.def index 37e2e41b43da..d3e8483798a7 100644 --- a/include/llvm/CodeGen/TargetOpcodes.def +++ b/include/llvm/CodeGen/TargetOpcodes.def @@ -265,6 +265,25 @@ HANDLE_TARGET_OPCODE(G_LOAD) /// Generic store. HANDLE_TARGET_OPCODE(G_STORE) +/// Generic atomic cmpxchg with internal success check. +HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) + +/// Generic atomic cmpxchg. +HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG) + +/// Generic atomicrmw. +HANDLE_TARGET_OPCODE(G_ATOMICRMW_XCHG) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_ADD) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_SUB) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_AND) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_NAND) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_OR) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_XOR) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_MAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_MIN) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) +HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) + /// Generic conditional branch instruction. HANDLE_TARGET_OPCODE(G_BRCOND) diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index 1aaa85d77a54..7345107a11b0 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -84,20 +84,6 @@ template <> struct isPodLike { /// This is an ImmutablePass solely for the purpose of exposing CodeGen options /// to the internals of other CodeGen passes. class TargetPassConfig : public ImmutablePass { -public: - /// Pseudo Pass IDs. These are defined within TargetPassConfig because they - /// are unregistered pass IDs. They are only useful for use with - /// TargetPassConfig APIs to identify multiple occurrences of the same pass. - /// - - /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early - /// during codegen, on SSA form. - static char EarlyTailDuplicateID; - - /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine - /// optimization after regalloc. - static char PostRAMachineLICMID; - private: PassManagerBase *PM = nullptr; AnalysisID StartBefore = nullptr; @@ -218,9 +204,6 @@ class TargetPassConfig : public ImmutablePass { /// Return true if the optimized regalloc pipeline is enabled. bool getOptimizeRegAlloc() const; - /// Return true if shrink wrapping is enabled. - bool getEnableShrinkWrap() const; - /// Return true if the default global register allocator is in use and /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; @@ -320,14 +303,10 @@ class TargetPassConfig : public ImmutablePass { /// verification is enabled. void addVerifyPass(const std::string &Banner); - /// Check whether or not GlobalISel should be enabled by default. - /// Fallback/abort behavior is controlled via other methods. - virtual bool isGlobalISelEnabled() const; - /// Check whether or not GlobalISel should abort on error. - /// When this is disable, GlobalISel will fall back on SDISel instead of + /// When this is disabled, GlobalISel will fall back on SDISel instead of /// erroring out. - virtual bool isGlobalISelAbortEnabled() const; + bool isGlobalISelAbortEnabled() const; /// Check whether or not a diagnostic should be emitted when GlobalISel /// uses the fallback path. In other words, it will emit a diagnostic @@ -416,6 +395,13 @@ class TargetPassConfig : public ImmutablePass { /// immediately before machine code is emitted. virtual void addPreEmitPass() { } + /// Targets may add passes immediately before machine code is emitted in this + /// callback. This is called even later than `addPreEmitPass`. + // FIXME: Rename `addPreEmitPass` to something more sensible given its actual + // position and remove the `2` suffix here as this callback is what + // `addPreEmitPass` *should* be but in reality isn't. + virtual void addPreEmitPass2() {} + /// Utilities for targets to add passes to the pass manager. /// diff --git a/include/llvm/CodeGen/TargetRegisterInfo.h b/include/llvm/CodeGen/TargetRegisterInfo.h index 2641a1aea835..81907538fb0b 100644 --- a/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/include/llvm/CodeGen/TargetRegisterInfo.h @@ -785,11 +785,10 @@ class TargetRegisterInfo : public MCRegisterInfo { /// as returned from RegisterClassInfo::getOrder(). The hint registers must /// come from Order, and they must not be reserved. /// - /// The default implementation of this function can resolve - /// target-independent hints provided to MRI::setRegAllocationHint with - /// HintType == 0. Targets that override this function should defer to the - /// default implementation if they have no reason to change the allocation - /// order for VirtReg. There may be target-independent hints. + /// The default implementation of this function will only add target + /// independent register allocation hints. Targets that override this + /// function should typically call this default implementation as well and + /// expect to see generic copy hints added. virtual bool getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, @@ -808,6 +807,13 @@ class TargetRegisterInfo : public MCRegisterInfo { // Do nothing. } + /// The creation of multiple copy hints have been implemented in + /// weightCalcHelper(), but since this affects so many tests for many + /// targets, this is temporarily disabled per default. THIS SHOULD BE + /// "GENERAL GOODNESS" and hopefully all targets will update their tests + /// and enable this soon. This hook should then be removed. + virtual bool enableMultipleCopyHints() const { return false; } + /// Allow the target to reverse allocation order of local live ranges. This /// will generally allocate shorter local live ranges first. For targets with /// many registers, this could reduce regalloc compile time by a large @@ -1138,9 +1144,9 @@ struct VirtReg2IndexFunctor { /// /// The format is: /// %noreg - NoRegister -/// %vreg5 - a virtual register. -/// %vreg5:sub_8bit - a virtual register with sub-register index (with TRI). -/// %EAX - a physical register +/// %5 - a virtual register. +/// %5:sub_8bit - a virtual register with sub-register index (with TRI). +/// %eax - a physical register /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << printReg(Reg, TRI, SubRegIdx) << '\n'; @@ -1151,8 +1157,8 @@ Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, /// /// Register units are named after their root registers: /// -/// AL - Single root. -/// FP0~ST7 - Dual roots. +/// al - Single root. +/// fp0~st7 - Dual roots. /// /// Usage: OS << printRegUnit(Unit, TRI) << '\n'; Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); @@ -1161,6 +1167,11 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); /// registers on a \ref raw_ostream. Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); +/// \brief Create Printable object to print register classes or register banks +/// on a \ref raw_ostream. +Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI); + } // end namespace llvm #endif // LLVM_CODEGEN_TARGETREGISTERINFO_H diff --git a/include/llvm/CodeGen/TargetSubtargetInfo.h b/include/llvm/CodeGen/TargetSubtargetInfo.h index 576522aef466..5e5faac6cbb1 100644 --- a/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -174,6 +174,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// \brief True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; + /// True if the subtarget should run the indirectbr expansion pass. + virtual bool enableIndirectBrExpand() const; + /// \brief Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom @@ -248,6 +251,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// Returns string representation of scheduler comment std::string getSchedInfoStr(const MachineInstr &MI) const override; std::string getSchedInfoStr(MCInst const &MCI) const override; + + /// This is called after a .mir file was loaded. + virtual void mirFileLoaded(MachineFunction &MF) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index b1e62daa5aae..73c7fb4ce4b3 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -40,110 +40,111 @@ def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value def v16i1 : ValueType<16, 18>; // 16 x i1 vector value def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value -def v512i1 : ValueType<512, 21>; // 512 x i1 vector value -def v1024i1: ValueType<1024,22>; //1024 x i1 vector value - -def v1i8 : ValueType<8, 23>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 24>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 25>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 26>; // 8 x i8 vector value -def v16i8 : ValueType<128, 27>; // 16 x i8 vector value -def v32i8 : ValueType<256, 28>; // 32 x i8 vector value -def v64i8 : ValueType<512, 29>; // 64 x i8 vector value -def v128i8 : ValueType<1024,30>; //128 x i8 vector value -def v256i8 : ValueType<2048,31>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 32>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 33>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 34>; // 4 x i16 vector value -def v8i16 : ValueType<128, 35>; // 8 x i16 vector value -def v16i16 : ValueType<256, 36>; // 16 x i16 vector value -def v32i16 : ValueType<512, 37>; // 32 x i16 vector value -def v64i16 : ValueType<1024,38>; // 64 x i16 vector value -def v128i16: ValueType<2048,39>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 40>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 41>; // 2 x i32 vector value -def v4i32 : ValueType<128, 42>; // 4 x i32 vector value -def v8i32 : ValueType<256, 43>; // 8 x i32 vector value -def v16i32 : ValueType<512, 44>; // 16 x i32 vector value -def v32i32 : ValueType<1024,45>; // 32 x i32 vector value -def v64i32 : ValueType<2048,46>; // 32 x i32 vector value - -def v1i64 : ValueType<64 , 47>; // 1 x i64 vector value -def v2i64 : ValueType<128, 48>; // 2 x i64 vector value -def v4i64 : ValueType<256, 49>; // 4 x i64 vector value -def v8i64 : ValueType<512, 50>; // 8 x i64 vector value -def v16i64 : ValueType<1024,51>; // 16 x i64 vector value -def v32i64 : ValueType<2048,52>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 53>; // 1 x i128 vector value - -def nxv1i1 : ValueType<1, 54>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 55>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 56>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 57>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 58>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 59>; // n x 32 x i1 vector value - -def nxv1i8 : ValueType<8, 60>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 61>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 62>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 63>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 64>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 65>; // n x 32 x i8 vector value - -def nxv1i16 : ValueType<16, 66>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 67>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 68>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 69>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 70>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 71>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 72>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 73>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 74>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 75>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 76>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,77>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 78>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 79>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 80>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 81>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,82>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,83>; // n x 32 x i64 vector value - -def v2f16 : ValueType<32 , 84>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 85>; // 4 x f16 vector value -def v8f16 : ValueType<128, 86>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 87>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 88>; // 2 x f32 vector value -def v4f32 : ValueType<128, 89>; // 4 x f32 vector value -def v8f32 : ValueType<256, 90>; // 8 x f32 vector value -def v16f32 : ValueType<512, 91>; // 16 x f32 vector value -def v1f64 : ValueType<64, 92>; // 1 x f64 vector value -def v2f64 : ValueType<128, 93>; // 2 x f64 vector value -def v4f64 : ValueType<256, 94>; // 4 x f64 vector value -def v8f64 : ValueType<512, 95>; // 8 x f64 vector value - -def nxv2f16 : ValueType<32 , 96>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 97>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 98>; // n x 8 x f16 vector value -def nxv1f32 : ValueType<32 , 99>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 100>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 101>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 102>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 103>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 104>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 105>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 106>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 107>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 108>; // X86 MMX value -def FlagVT : ValueType<0 , 109>; // Pre-RA sched glue -def isVoid : ValueType<0 , 110>; // Produces no value -def untyped: ValueType<8 , 111>; // Produces an untyped value +def v128i1 : ValueType<128, 21>; // 128 x i1 vector value +def v512i1 : ValueType<512, 22>; // 512 x i1 vector value +def v1024i1: ValueType<1024,23>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 24>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value +def v16i8 : ValueType<128, 28>; // 16 x i8 vector value +def v32i8 : ValueType<256, 29>; // 32 x i8 vector value +def v64i8 : ValueType<512, 30>; // 64 x i8 vector value +def v128i8 : ValueType<1024,31>; //128 x i8 vector value +def v256i8 : ValueType<2048,32>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value +def v8i16 : ValueType<128, 36>; // 8 x i16 vector value +def v16i16 : ValueType<256, 37>; // 16 x i16 vector value +def v32i16 : ValueType<512, 38>; // 32 x i16 vector value +def v64i16 : ValueType<1024,39>; // 64 x i16 vector value +def v128i16: ValueType<2048,40>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value +def v4i32 : ValueType<128, 43>; // 4 x i32 vector value +def v8i32 : ValueType<256, 44>; // 8 x i32 vector value +def v16i32 : ValueType<512, 45>; // 16 x i32 vector value +def v32i32 : ValueType<1024,46>; // 32 x i32 vector value +def v64i32 : ValueType<2048,47>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 48>; // 1 x i64 vector value +def v2i64 : ValueType<128, 49>; // 2 x i64 vector value +def v4i64 : ValueType<256, 50>; // 4 x i64 vector value +def v8i64 : ValueType<512, 51>; // 8 x i64 vector value +def v16i64 : ValueType<1024,52>; // 16 x i64 vector value +def v32i64 : ValueType<2048,53>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 54>; // 1 x i128 vector value + +def nxv1i1 : ValueType<1, 55>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 56>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 57>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 58>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 59>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 60>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 61>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 62>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 63>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 64>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 65>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 66>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 67>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 68>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 69>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 70>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 71>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 72>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 73>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 74>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 75>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 76>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 77>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,78>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 79>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 80>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 81>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 82>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,83>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,84>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 85>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 86>; // 4 x f16 vector value +def v8f16 : ValueType<128, 87>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 88>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 89>; // 2 x f32 vector value +def v4f32 : ValueType<128, 90>; // 4 x f32 vector value +def v8f32 : ValueType<256, 91>; // 8 x f32 vector value +def v16f32 : ValueType<512, 92>; // 16 x f32 vector value +def v1f64 : ValueType<64, 93>; // 1 x f64 vector value +def v2f64 : ValueType<128, 94>; // 2 x f64 vector value +def v4f64 : ValueType<256, 95>; // 4 x f64 vector value +def v8f64 : ValueType<512, 96>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 97>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 98>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 99>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 100>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 101>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 102>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 103>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 104>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 105>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 106>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 107>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 108>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 109>; // X86 MMX value +def FlagVT : ValueType<0 , 110>; // Pre-RA sched glue +def isVoid : ValueType<0 , 111>; // Produces no value +def untyped: ValueType<8 , 112>; // Produces an untyped value def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index 038f70a79f9e..940f84203042 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -359,9 +359,6 @@ /* Has gcc/MSVC atomic intrinsics */ #cmakedefine01 LLVM_HAS_ATOMICS -/* Define if LLVM_ENABLE_DUMP is enabled */ -#cmakedefine LLVM_ENABLE_DUMP - /* Host triple LLVM will be executed on */ #cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}" diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake index 4b0c59460619..4daa00f3bc40 100644 --- a/include/llvm/Config/llvm-config.h.cmake +++ b/include/llvm/Config/llvm-config.h.cmake @@ -14,6 +14,9 @@ #ifndef LLVM_CONFIG_H #define LLVM_CONFIG_H +/* Define if LLVM_ENABLE_DUMP is enabled */ +#cmakedefine LLVM_ENABLE_DUMP + /* Define if we link Polly to the tools */ #cmakedefine LINK_POLLY_INTO_TOOLS diff --git a/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h new file mode 100644 index 000000000000..bd1743511ed4 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h @@ -0,0 +1,70 @@ +//===- AppendingTypeTableBuilder.h -------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_APPENDINGTYPETABLEBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_APPENDINGTYPETABLEBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +class ContinuationRecordBuilder; + +class AppendingTypeTableBuilder : public TypeCollection { + + BumpPtrAllocator &RecordStorage; + SimpleTypeSerializer SimpleSerializer; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + +public: + explicit AppendingTypeTableBuilder(BumpPtrAllocator &Storage); + ~AppendingTypeTableBuilder(); + + // TypeTableCollection overrides + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + + // public interface + void reset(); + TypeIndex nextTypeIndex() const; + + BumpPtrAllocator &getAllocator() { return RecordStorage; } + + ArrayRef> records() const; + TypeIndex insertRecordBytes(ArrayRef &Record); + TypeIndex insertRecord(ContinuationRecordBuilder &Builder); + + template TypeIndex writeLeafType(T &Record) { + ArrayRef Data = SimpleSerializer.serialize(Record); + return insertRecordBytes(Data); + } +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h index 9f3a753ad1ae..596996d94519 100644 --- a/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -61,6 +61,30 @@ template struct RemappedRecord { SmallVector, 8> Mappings; }; +template +Error forEachCodeViewRecord(ArrayRef StreamBuffer, Func F) { + while (!StreamBuffer.empty()) { + if (StreamBuffer.size() < sizeof(RecordPrefix)) + return make_error(cv_error_code::corrupt_record); + + const RecordPrefix *Prefix = + reinterpret_cast(StreamBuffer.data()); + + uint16_t RealLen = Prefix->RecordLen + 2; + if (StreamBuffer.size() < RealLen) + return make_error(cv_error_code::corrupt_record); + + ArrayRef Data = StreamBuffer.take_front(RealLen); + StreamBuffer = StreamBuffer.drop_front(RealLen); + + Record R(static_cast((uint16_t)Prefix->RecordKind), + Data); + if (auto EC = F(R)) + return EC; + } + return Error::success(); +} + /// Read a complete record from a stream at a random offset. template inline Expected> readCVRecordFromStream(BinaryStreamRef Stream, diff --git a/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h new file mode 100644 index 000000000000..7f851a2595dc --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h @@ -0,0 +1,65 @@ +//===- ContinuationRecordBuilder.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_CONTINUATIONRECORDBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_CONTINUATIONRECORDBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { +enum class ContinuationRecordKind { FieldList, MethodOverloadList }; + +class ContinuationRecordBuilder { + SmallVector SegmentOffsets; + Optional Kind; + AppendingBinaryByteStream Buffer; + BinaryStreamWriter SegmentWriter; + TypeRecordMapping Mapping; + ArrayRef InjectedSegmentBytes; + + uint32_t getCurrentSegmentLength() const; + + void insertSegmentEnd(uint32_t Offset); + CVType createSegmentRecord(uint32_t OffBegin, uint32_t OffEnd, + Optional RefersTo); + +public: + ContinuationRecordBuilder(); + ~ContinuationRecordBuilder(); + + void begin(ContinuationRecordKind RecordKind); + + // This template is explicitly instantiated in the implementation file for all + // supported types. The method itself is ugly, so inlining it into the header + // file clutters an otherwise straightforward interface. + template void writeMemberType(RecordType &Record); + + std::vector end(TypeIndex Index); +}; +} // namespace codeview +} // namespace llvm + +#endif \ No newline at end of file diff --git a/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h new file mode 100644 index 000000000000..d8ac3343c15f --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -0,0 +1,87 @@ +//===- GlobalTypeTableBuilder.h ----------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPETABLEBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_GLOBALTYPETABLEBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +class ContinuationRecordBuilder; + +class GlobalTypeTableBuilder : public TypeCollection { + /// Storage for records. These need to outlive the TypeTableBuilder. + BumpPtrAllocator &RecordStorage; + + /// A serializer that can write non-continuation leaf types. Only used as + /// a convenience function so that we can provide an interface method to + /// write an unserialized record. + SimpleTypeSerializer SimpleSerializer; + + /// Hash table. + DenseMap HashedRecords; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + + /// Contains a list of all hash values inexed by TypeIndex.toArrayIndex(). + SmallVector SeenHashes; + +public: + explicit GlobalTypeTableBuilder(BumpPtrAllocator &Storage); + ~GlobalTypeTableBuilder(); + + // TypeTableCollection overrides + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + + // public interface + void reset(); + TypeIndex nextTypeIndex() const; + + BumpPtrAllocator &getAllocator() { return RecordStorage; } + + ArrayRef> records() const; + ArrayRef hashes() const; + + using CreateRecord = llvm::function_ref()>; + + TypeIndex insertRecordAs(GloballyHashedType Hash, CreateRecord Create); + TypeIndex insertRecordBytes(ArrayRef Data); + TypeIndex insertRecord(ContinuationRecordBuilder &Builder); + + template TypeIndex writeLeafType(T &Record) { + ArrayRef Data = SimpleSerializer.serialize(Record); + return insertRecordBytes(Data); + } +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H diff --git a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h index 1d5117475bb5..16d78692c839 100644 --- a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h +++ b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -67,6 +67,7 @@ class LazyRandomTypeCollection : public TypeCollection { void reset(ArrayRef Data, uint32_t RecordCountHint); void reset(StringRef Data, uint32_t RecordCountHint); + void reset(BinaryStreamReader &Reader, uint32_t RecordCountHint); uint32_t getOffsetOfType(TypeIndex Index); diff --git a/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h new file mode 100644 index 000000000000..9030918ebbb3 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h @@ -0,0 +1,81 @@ +//===- MergingTypeTableBuilder.h ---------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H +#define LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +class ContinuationRecordBuilder; + +class MergingTypeTableBuilder : public TypeCollection { + /// Storage for records. These need to outlive the TypeTableBuilder. + BumpPtrAllocator &RecordStorage; + + /// A serializer that can write non-continuation leaf types. Only used as + /// a convenience function so that we can provide an interface method to + /// write an unserialized record. + SimpleTypeSerializer SimpleSerializer; + + /// Hash table. + DenseMap HashedRecords; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + +public: + explicit MergingTypeTableBuilder(BumpPtrAllocator &Storage); + ~MergingTypeTableBuilder(); + + // TypeTableCollection overrides + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + + // public interface + void reset(); + TypeIndex nextTypeIndex() const; + + BumpPtrAllocator &getAllocator() { return RecordStorage; } + + ArrayRef> records() const; + + TypeIndex insertRecordAs(hash_code Hash, ArrayRef &Record); + TypeIndex insertRecordBytes(ArrayRef &Record); + TypeIndex insertRecord(ContinuationRecordBuilder &Builder); + + template TypeIndex writeLeafType(T &Record) { + ArrayRef Data = SimpleSerializer.serialize(Record); + return insertRecordBytes(Data); + } +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MERGINGTYPETABLEBUILDER_H diff --git a/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h new file mode 100644 index 000000000000..a85d9270186b --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h @@ -0,0 +1,53 @@ +//===- SimpleTypeSerializer.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_SIMPLETYPESERIALIZER_H +#define LLVM_DEBUGINFO_CODEVIEW_SIMPLETYPESERIALIZER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +namespace llvm { +namespace codeview { + +class SimpleTypeSerializer { + std::vector ScratchBuffer; + +public: + SimpleTypeSerializer(); + ~SimpleTypeSerializer(); + + // This template is explicitly instantiated in the implementation file for all + // supported types. The method itself is ugly, so inlining it into the header + // file clutters an otherwise straightforward interface. + template ArrayRef serialize(T &Record); + + // Don't allow serialization of field list records using this interface. + ArrayRef serialize(const FieldListRecord &Record) = delete; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_SIMPLETYPESERIALIZER_H diff --git a/include/llvm/DebugInfo/CodeView/TypeCollection.h b/include/llvm/DebugInfo/CodeView/TypeCollection.h index 0f856f57a727..e9fc9b0de8ef 100644 --- a/include/llvm/DebugInfo/CodeView/TypeCollection.h +++ b/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -31,6 +31,16 @@ class TypeCollection { virtual bool contains(TypeIndex Index) = 0; virtual uint32_t size() = 0; virtual uint32_t capacity() = 0; + + template void ForEachRecord(TFunc Func) { + Optional Next = getFirst(); + + while (Next.hasValue()) { + TypeIndex N = *Next; + Func(N, getType(N)); + Next = getNext(N); + } + } }; } } diff --git a/include/llvm/DebugInfo/CodeView/TypeHashing.h b/include/llvm/DebugInfo/CodeView/TypeHashing.h new file mode 100644 index 000000000000..741337533701 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -0,0 +1,204 @@ +//===- TypeHashing.h ---------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +#include "llvm/Support/FormatProviders.h" + +#include + +namespace llvm { +namespace codeview { + +/// A locally hashed type represents a straightforward hash code of a serialized +/// record. The record is simply serialized, and then the bytes are hashed by +/// a standard algorithm. This is sufficient for the case of de-duplicating +/// records within a single sequence of types, because if two records both have +/// a back-reference to the same type in the same stream, they will both have +/// the same numeric value for the TypeIndex of the back reference. +struct LocallyHashedType { + hash_code Hash; + ArrayRef RecordData; + + /// Given a type, compute its local hash. + static LocallyHashedType hashType(ArrayRef RecordData); + + /// Given a sequence of types, compute all of the local hashes. + template + static std::vector hashTypes(Range &&Records) { + std::vector Hashes; + Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); + for (const auto &R : Records) + Hashes.push_back(hashType(R)); + + return Hashes; + } + + static std::vector + hashTypeCollection(TypeCollection &Types) { + std::vector Hashes; + Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { + Hashes.push_back(hashType(Type.RecordData)); + }); + return Hashes; + } +}; + +enum class GlobalTypeHashAlg : uint16_t { SHA1 = 0 }; + +/// A globally hashed type represents a hash value that is sufficient to +/// uniquely identify a record across multiple type streams or type sequences. +/// This works by, for any given record A which references B, replacing the +/// TypeIndex that refers to B with a previously-computed global hash for B. As +/// this is a recursive algorithm (e.g. the global hash of B also depends on the +/// global hashes of the types that B refers to), a global hash can uniquely +/// identify identify that A occurs in another stream that has a completely +/// different graph structure. Although the hash itself is slower to compute, +/// probing is much faster with a globally hashed type, because the hash itself +/// is considered "as good as" the original type. Since type records can be +/// quite large, this makes the equality comparison of the hash much faster than +/// equality comparison of a full record. +struct GloballyHashedType { + GloballyHashedType() = default; + GloballyHashedType(StringRef H) + : GloballyHashedType(ArrayRef(H.bytes_begin(), H.bytes_end())) {} + GloballyHashedType(ArrayRef H) { + assert(H.size() == 20); + ::memcpy(Hash.data(), H.data(), 20); + } + std::array Hash; + + /// Given a sequence of bytes representing a record, compute a global hash for + /// this record. Due to the nature of global hashes incorporating the hashes + /// of referenced records, this function requires a list of types and ids + /// that RecordData might reference, indexable by TypeIndex. + static GloballyHashedType hashType(ArrayRef RecordData, + ArrayRef PreviousTypes, + ArrayRef PreviousIds); + + /// Given a sequence of bytes representing a record, compute a global hash for + /// this record. Due to the nature of global hashes incorporating the hashes + /// of referenced records, this function requires a list of types and ids + /// that RecordData might reference, indexable by TypeIndex. + static GloballyHashedType hashType(CVType Type, + ArrayRef PreviousTypes, + ArrayRef PreviousIds) { + return hashType(Type.RecordData, PreviousTypes, PreviousIds); + } + + /// Given a sequence of combined type and ID records, compute global hashes + /// for each of them, returning the results in a vector of hashed types. + template + static std::vector hashTypes(Range &&Records) { + std::vector Hashes; + for (const auto &R : Records) + Hashes.push_back(hashType(R, Hashes, Hashes)); + + return Hashes; + } + + /// Given a sequence of combined type and ID records, compute global hashes + /// for each of them, returning the results in a vector of hashed types. + template + static std::vector + hashIds(Range &&Records, ArrayRef TypeHashes) { + std::vector IdHashes; + for (const auto &R : Records) + IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); + + return IdHashes; + } + + static std::vector + hashTypeCollection(TypeCollection &Types) { + std::vector Hashes; + Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { + Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); + }); + return Hashes; + } +}; +#if defined(_MSC_VER) +// is_trivially_copyable is not available in older versions of libc++, but it is +// available in all supported versions of MSVC, so at least this gives us some +// coverage. +static_assert(std::is_trivially_copyable::value, + "GloballyHashedType must be trivially copyable so that we can " + "reinterpret_cast arrays of hash data to arrays of " + "GloballyHashedType"); +#endif +} // namespace codeview + +template <> struct DenseMapInfo { + static codeview::LocallyHashedType Empty; + static codeview::LocallyHashedType Tombstone; + + static codeview::LocallyHashedType getEmptyKey() { return Empty; } + + static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::LocallyHashedType Val) { + return Val.Hash; + } + + static bool isEqual(codeview::LocallyHashedType LHS, + codeview::LocallyHashedType RHS) { + if (LHS.Hash != RHS.Hash) + return false; + return LHS.RecordData == RHS.RecordData; + } +}; + +template <> struct DenseMapInfo { + static codeview::GloballyHashedType Empty; + static codeview::GloballyHashedType Tombstone; + + static codeview::GloballyHashedType getEmptyKey() { return Empty; } + + static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::GloballyHashedType Val) { + return *reinterpret_cast(Val.Hash.data()); + } + + static bool isEqual(codeview::GloballyHashedType LHS, + codeview::GloballyHashedType RHS) { + return LHS.Hash == RHS.Hash; + } +}; + +template <> struct format_provider { +public: + static void format(const codeview::LocallyHashedType &V, + llvm::raw_ostream &Stream, StringRef Style) { + write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); + } +}; + +template <> struct format_provider { +public: + static void format(const codeview::GloballyHashedType &V, + llvm::raw_ostream &Stream, StringRef Style) { + for (uint8_t B : V.Hash) { + write_hex(Stream, B, HexPrintStyle::Upper, 2); + } + } +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h index e0c2226bdbd7..c71281de7145 100644 --- a/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -98,6 +98,7 @@ class TypeIndex { static const uint32_t FirstNonSimpleIndex = 0x1000; static const uint32_t SimpleKindMask = 0x000000ff; static const uint32_t SimpleModeMask = 0x00000700; + static const uint32_t DecoratedItemIdMask = 0x80000000; public: TypeIndex() : Index(static_cast(SimpleTypeKind::None)) {} @@ -110,6 +111,7 @@ class TypeIndex { uint32_t getIndex() const { return Index; } void setIndex(uint32_t I) { Index = I; } bool isSimple() const { return Index < FirstNonSimpleIndex; } + bool isDecoratedItemId() const { return !!(Index & DecoratedItemIdMask); } bool isNoneType() const { return *this == None(); } diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h index a780a49bbbf8..508bdd395f74 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -334,6 +334,11 @@ class PointerRecord : public TypeRecord { uint32_t Attrs; Optional MemberInfo; + void setAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, + uint8_t Size) { + Attrs = calcAttrs(PK, PM, PO, Size); + } + private: static uint32_t calcAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, uint8_t Size) { diff --git a/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/include/llvm/DebugInfo/CodeView/TypeSerializer.h deleted file mode 100644 index 0e734a8170bd..000000000000 --- a/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ /dev/null @@ -1,159 +0,0 @@ -//===- TypeSerializer.h -----------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H -#define LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Error.h" -#include -#include -#include -#include - -namespace llvm { -namespace codeview { - -class TypeHasher; - -class TypeSerializer : public TypeVisitorCallbacks { - struct SubRecord { - SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} - - TypeLeafKind Kind; - uint32_t Size = 0; - }; - struct RecordSegment { - SmallVector SubRecords; - - uint32_t length() const { - uint32_t L = sizeof(RecordPrefix); - for (const auto &R : SubRecords) { - L += R.Size; - } - return L; - } - }; - - using MutableRecordList = SmallVector, 2>; - - static constexpr uint8_t ContinuationLength = 8; - BumpPtrAllocator &RecordStorage; - RecordSegment CurrentSegment; - MutableRecordList FieldListSegments; - - Optional TypeKind; - Optional MemberKind; - std::vector RecordBuffer; - MutableBinaryByteStream Stream; - BinaryStreamWriter Writer; - TypeRecordMapping Mapping; - - /// Private type record hashing implementation details are handled here. - std::unique_ptr Hasher; - - /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). - SmallVector, 2> SeenRecords; - - /// Temporary storage that we use to copy a record's data while re-writing - /// its type indices. - SmallVector RemapStorage; - - TypeIndex nextTypeIndex() const; - - bool isInFieldList() const; - MutableArrayRef getCurrentSubRecordData(); - MutableArrayRef getCurrentRecordData(); - Error writeRecordPrefix(TypeLeafKind Kind); - - Expected> - addPadding(MutableArrayRef Record); - -public: - explicit TypeSerializer(BumpPtrAllocator &Storage, bool Hash = true); - ~TypeSerializer() override; - - void reset(); - - BumpPtrAllocator &getAllocator() { return RecordStorage; } - - ArrayRef> records() const; - TypeIndex insertRecordBytes(ArrayRef &Record); - TypeIndex insertRecord(const RemappedType &Record); - Expected visitTypeEndGetIndex(CVType &Record); - - using TypeVisitorCallbacks::visitTypeBegin; - Error visitTypeBegin(CVType &Record) override; - Error visitTypeEnd(CVType &Record) override; - Error visitMemberBegin(CVMemberRecord &Record) override; - Error visitMemberEnd(CVMemberRecord &Record) override; - -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - virtual Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \ - return visitKnownRecordImpl(CVR, Record); \ - } -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \ - return visitKnownMemberImpl(CVR, Record); \ - } -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" - -private: - template - Error visitKnownRecordImpl(CVType &CVR, RecordKind &Record) { - return Mapping.visitKnownRecord(CVR, Record); - } - - template - Error visitKnownMemberImpl(CVMemberRecord &CVR, RecordType &Record) { - assert(CVR.Kind == static_cast(Record.getKind())); - - if (auto EC = Writer.writeEnum(CVR.Kind)) - return EC; - - if (auto EC = Mapping.visitKnownMember(CVR, Record)) - return EC; - - // Get all the data that was just written and is yet to be committed to - // the current segment. Then pad it to 4 bytes. - MutableArrayRef ThisRecord = getCurrentSubRecordData(); - auto ExpectedRecord = addPadding(ThisRecord); - if (!ExpectedRecord) - return ExpectedRecord.takeError(); - ThisRecord = *ExpectedRecord; - - CurrentSegment.SubRecords.emplace_back(CVR.Kind, ThisRecord.size()); - CVR.Data = ThisRecord; - - // Both the last subrecord and the total length of this segment should be - // multiples of 4. - assert(ThisRecord.size() % 4 == 0); - assert(CurrentSegment.length() % 4 == 0); - - return Error::success(); - } -}; - -} // end namespace codeview -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H diff --git a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index d78fab47db66..59e216abcb11 100644 --- a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -19,7 +19,9 @@ namespace llvm { namespace codeview { class TypeIndex; -class TypeTableBuilder; +struct GloballyHashedType; +class GlobalTypeTableBuilder; +class MergingTypeTableBuilder; /// \brief Merge one set of type records into another. This method assumes /// that all records are type records, and there are no Id records present. @@ -34,7 +36,7 @@ class TypeTableBuilder; /// /// \returns Error::success() if the operation succeeded, otherwise an /// appropriate error code. -Error mergeTypeRecords(TypeTableBuilder &Dest, +Error mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, const CVTypeArray &Types); @@ -59,7 +61,7 @@ Error mergeTypeRecords(TypeTableBuilder &Dest, /// /// \returns Error::success() if the operation succeeded, otherwise an /// appropriate error code. -Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, +Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef Types, SmallVectorImpl &SourceToDest, const CVTypeArray &Ids); @@ -78,11 +80,27 @@ Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, /// /// \returns Error::success() if the operation succeeded, otherwise an /// appropriate error code. -Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds, - TypeTableBuilder &DestTypes, +Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, + MergingTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes); +Error mergeTypeAndIdRecords(GlobalTypeTableBuilder &DestIds, + GlobalTypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, + const CVTypeArray &IdsAndTypes, + ArrayRef Hashes); + +Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Types, + ArrayRef Hashes); + +Error mergeIdRecords(GlobalTypeTableBuilder &Dest, ArrayRef Types, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids, + ArrayRef Hashes); + } // end namespace codeview } // end namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h deleted file mode 100644 index 1069dcd45334..000000000000 --- a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ /dev/null @@ -1,137 +0,0 @@ -//===- TypeTableBuilder.h ---------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H -#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeSerializer.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Error.h" -#include -#include -#include -#include - -namespace llvm { -namespace codeview { - -class TypeTableBuilder { -private: - TypeIndex handleError(Error EC) const { - assert(false && "Couldn't write Type!"); - consumeError(std::move(EC)); - return TypeIndex(); - } - - BumpPtrAllocator &Allocator; - TypeSerializer Serializer; - -public: - explicit TypeTableBuilder(BumpPtrAllocator &Allocator, - bool WriteUnique = true) - : Allocator(Allocator), Serializer(Allocator, WriteUnique) {} - TypeTableBuilder(const TypeTableBuilder &) = delete; - TypeTableBuilder &operator=(const TypeTableBuilder &) = delete; - - bool empty() const { return Serializer.records().empty(); } - - BumpPtrAllocator &getAllocator() const { return Allocator; } - - template TypeIndex writeKnownType(T &Record) { - static_assert(!std::is_same::value, - "Can't serialize FieldList!"); - - CVType Type; - Type.Type = static_cast(Record.getKind()); - if (auto EC = Serializer.visitTypeBegin(Type)) - return handleError(std::move(EC)); - if (auto EC = Serializer.visitKnownRecord(Type, Record)) - return handleError(std::move(EC)); - - auto ExpectedIndex = Serializer.visitTypeEndGetIndex(Type); - if (!ExpectedIndex) - return handleError(ExpectedIndex.takeError()); - - return *ExpectedIndex; - } - - TypeIndex writeSerializedRecord(ArrayRef Record) { - return Serializer.insertRecordBytes(Record); - } - - TypeIndex writeSerializedRecord(const RemappedType &Record) { - return Serializer.insertRecord(Record); - } - - template void ForEachRecord(TFunc Func) { - uint32_t Index = TypeIndex::FirstNonSimpleIndex; - - for (auto Record : Serializer.records()) { - Func(TypeIndex(Index), Record); - ++Index; - } - } - - ArrayRef> records() const { return Serializer.records(); } -}; - -class FieldListRecordBuilder { - TypeTableBuilder &TypeTable; - BumpPtrAllocator Allocator; - TypeSerializer TempSerializer; - CVType Type; - -public: - explicit FieldListRecordBuilder(TypeTableBuilder &TypeTable) - : TypeTable(TypeTable), TempSerializer(Allocator, false) { - Type.Type = TypeLeafKind::LF_FIELDLIST; - } - - void begin() { - TempSerializer.reset(); - - if (auto EC = TempSerializer.visitTypeBegin(Type)) - consumeError(std::move(EC)); - } - - template void writeMemberType(T &Record) { - CVMemberRecord CVMR; - CVMR.Kind = static_cast(Record.getKind()); - if (auto EC = TempSerializer.visitMemberBegin(CVMR)) - consumeError(std::move(EC)); - if (auto EC = TempSerializer.visitKnownMember(CVMR, Record)) - consumeError(std::move(EC)); - if (auto EC = TempSerializer.visitMemberEnd(CVMR)) - consumeError(std::move(EC)); - } - - TypeIndex end(bool Write) { - TypeIndex Index; - if (auto EC = TempSerializer.visitTypeEnd(Type)) { - consumeError(std::move(EC)); - return TypeIndex(); - } - - if (Write) { - for (auto Record : TempSerializer.records()) - Index = TypeTable.writeSerializedRecord(Record); - } - - return Index; - } -}; - -} // end namespace codeview -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 4a368bec85cd..abace9378607 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -153,6 +153,7 @@ enum DIDumpType : unsigned { struct DIDumpOptions { unsigned DumpType = DIDT_All; unsigned RecurseDepth = -1U; + bool ShowAddresses = true; bool ShowChildren = false; bool ShowParents = false; bool ShowForm = false; diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index e8abd3151e55..391c72018ae6 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -24,7 +24,7 @@ class raw_ostream; /// This implements the Apple accelerator table format, a precursor of the /// DWARF 5 accelerator table format. /// TODO: Factor out a common base class for both formats. -class DWARFAcceleratorTable { +class AppleAcceleratorTable { struct Header { uint32_t Magic; uint16_t Version; @@ -53,7 +53,7 @@ class DWARFAcceleratorTable { /// multiple DWARFFormValues. class ValueIterator : public std::iterator> { - const DWARFAcceleratorTable *AccelTable = nullptr; + const AppleAcceleratorTable *AccelTable = nullptr; SmallVector AtomForms; ///< The decoded data entry. unsigned DataOffset = 0; ///< Offset into the section. @@ -64,7 +64,7 @@ class DWARFAcceleratorTable { void Next(); public: /// Construct a new iterator for the entries at \p DataOffset. - ValueIterator(const DWARFAcceleratorTable &AccelTable, unsigned DataOffset); + ValueIterator(const AppleAcceleratorTable &AccelTable, unsigned DataOffset); /// End marker. ValueIterator() = default; @@ -86,11 +86,11 @@ class DWARFAcceleratorTable { }; - DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection, + AppleAcceleratorTable(const DWARFDataExtractor &AccelSection, DataExtractor StringSection) : AccelSection(AccelSection), StringSection(StringSection) {} - bool extract(); + llvm::Error extract(); uint32_t getNumBuckets(); uint32_t getNumHashes(); uint32_t getSizeHdr(); diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 2ddbc4b91ba2..476c0f1bdfe9 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -69,10 +69,10 @@ class DWARFContext : public DIContext { std::unique_ptr DebugFrame; std::unique_ptr EHFrame; std::unique_ptr Macro; - std::unique_ptr AppleNames; - std::unique_ptr AppleTypes; - std::unique_ptr AppleNamespaces; - std::unique_ptr AppleObjC; + std::unique_ptr AppleNames; + std::unique_ptr AppleTypes; + std::unique_ptr AppleNamespaces; + std::unique_ptr AppleObjC; DWARFUnitSection DWOCUs; std::deque> DWOTUs; @@ -243,16 +243,16 @@ class DWARFContext : public DIContext { const DWARFDebugMacro *getDebugMacro(); /// Get a reference to the parsed accelerator table object. - const DWARFAcceleratorTable &getAppleNames(); + const AppleAcceleratorTable &getAppleNames(); /// Get a reference to the parsed accelerator table object. - const DWARFAcceleratorTable &getAppleTypes(); + const AppleAcceleratorTable &getAppleTypes(); /// Get a reference to the parsed accelerator table object. - const DWARFAcceleratorTable &getAppleNamespaces(); + const AppleAcceleratorTable &getAppleNamespaces(); /// Get a reference to the parsed accelerator table object. - const DWARFAcceleratorTable &getAppleObjC(); + const AppleAcceleratorTable &getAppleObjC(); /// Get a pointer to a parsed line table corresponding to a compile unit. const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu); diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index dfbbb95076e8..ab46fac39f7c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -43,6 +43,7 @@ class DWARFDebugArangeSet { uint64_t Length; uint64_t getEndAddress() const { return Address + Length; } + void dump(raw_ostream &OS, uint32_t AddressSize) const; }; private: diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 24075817219f..de8ad4e5ef3c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -15,6 +15,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -34,6 +35,7 @@ class DWARFDebugLine { uint64_t DirIdx = 0; uint64_t ModTime = 0; uint64_t Length = 0; + MD5::MD5Result Checksum; }; struct Prologue { @@ -46,11 +48,11 @@ class DWARFDebugLine { /// parameters affect interpretation of forms (used in the directory and /// file tables starting with v5). DWARFFormParams FormParams; - /// In v5, size in bytes of a segment selector. - uint8_t SegSelectorSize; /// The number of bytes following the prologue_length field to the beginning /// of the first byte of the statement program itself. uint64_t PrologueLength; + /// In v5, size in bytes of a segment selector. + uint8_t SegSelectorSize; /// The size in bytes of the smallest target machine instruction. Statement /// program opcodes that alter the address register first multiply their /// operands by this value. @@ -66,6 +68,8 @@ class DWARFDebugLine { uint8_t LineRange; /// The number assigned to the first special opcode. uint8_t OpcodeBase; + /// For v5, whether filename entries provide an MD5 checksum. + bool HasMD5; std::vector StandardOpcodeLengths; std::vector IncludeDirectories; std::vector FileNames; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index f9ec96366a53..8c0011793ff1 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -50,6 +50,8 @@ struct DWARFAddressRange { return LowPC <= RHS.HighPC && RHS.HighPC <= HighPC; return false; } + + void dump(raw_ostream &OS, uint32_t AddressSize) const; }; static inline bool operator<(const DWARFAddressRange &LHS, diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index d32053519ec4..2c0a942a5a5c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -50,6 +50,8 @@ struct DWARFFormParams { } llvm_unreachable("Invalid Format value"); } + + explicit operator bool() const { return Version && AddrSize; } }; class DWARFFormValue { diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index e9178e03fa8a..3cec58383f87 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -165,6 +165,29 @@ struct BaseAddress { uint64_t SectionIndex; }; +/// Represents a unit's contribution to the string offsets table. +struct StrOffsetsContributionDescriptor { + uint64_t Base = 0; + uint64_t Size = 0; + /// Format and version. + DWARFFormParams FormParams = {0, 0, dwarf::DwarfFormat::DWARF32}; + + StrOffsetsContributionDescriptor(uint64_t Base, uint64_t Size, + uint8_t Version, dwarf::DwarfFormat Format) + : Base(Base), Size(Size), FormParams({Version, 0, Format}) {} + + uint8_t getVersion() const { return FormParams.Version; } + dwarf::DwarfFormat getFormat() const { return FormParams.Format; } + uint8_t getDwarfOffsetByteSize() const { + return FormParams.getDwarfOffsetByteSize(); + } + /// Determine whether a contribution to the string offsets table is + /// consistent with the relevant section size and that its length is + /// a multiple of the size of one of its entries. + Optional + validateContributionSize(DWARFDataExtractor &DA); +}; + class DWARFUnit { DWARFContext &Context; /// Section containing this DWARFUnit. @@ -176,7 +199,6 @@ class DWARFUnit { const DWARFSection &LineSection; StringRef StringSection; const DWARFSection &StringOffsetSection; - uint64_t StringOffsetSectionBase = 0; const DWARFSection *AddrOffsetSection; uint32_t AddrOffsetSectionBase = 0; bool isLittleEndian; @@ -185,6 +207,9 @@ class DWARFUnit { // Version, address size, and DWARF format. DWARFFormParams FormParams; + /// Start, length, and DWARF format of the unit's contribution to the string + /// offsets table (DWARF v5). + Optional StringOffsetsTableContribution; uint32_t Offset; uint32_t Length; @@ -195,10 +220,40 @@ class DWARFUnit { /// The compile unit debug information entry items. std::vector DieArray; - /// Map from range's start address to end address and corresponding DIE. - /// IntervalMap does not support range removal, as a result, we use the - /// std::map::upper_bound for address range lookup. - std::map> AddrDieMap; + /// The vector of inlined subroutine DIEs that we can map directly to from + /// their subprogram below. + std::vector InlinedSubroutineDIEs; + + /// A type representing a subprogram DIE and a map (built using a sorted + /// vector) into that subprogram's inlined subroutine DIEs. + struct SubprogramDIEAddrInfo { + DWARFDie SubprogramDIE; + + uint64_t SubprogramBasePC; + + /// A vector sorted to allow mapping from a relative PC to the inlined + /// subroutine DIE with the most specific address range covering that PC. + /// + /// The PCs are relative to the `SubprogramBasePC`. + /// + /// The vector is sorted in ascending order of the first int which + /// represents the relative PC for an interval in the map. The second int + /// represents the index into the `InlinedSubroutineDIEs` vector of the DIE + /// that interval maps to. An index of '-1` indicates an empty mapping. The + /// interval covered is from the `.first` relative PC to the next entry's + /// `.first` relative PC. + std::vector> InlinedSubroutineDIEAddrMap; + }; + + /// Vector of the subprogram DIEs and their subroutine address maps. + std::vector SubprogramDIEAddrInfos; + + /// A vector sorted to allow mapping from a PC to the subprogram DIE (and + /// associated addr map) index. Subprograms with overlapping PC ranges aren't + /// supported here. Nothing will crash, but the mapping may be inaccurate. + /// This vector may also contain "empty" ranges marked by an address with + /// a DIE index of '-1'. + std::vector> SubprogramDIEAddrMap; using die_iterator_range = iterator_range::iterator>; @@ -219,6 +274,21 @@ class DWARFUnit { /// Size in bytes of the unit header. virtual uint32_t getHeaderSize() const { return getVersion() <= 4 ? 11 : 12; } + /// Find the unit's contribution to the string offsets table and determine its + /// length and form. The given offset is expected to be derived from the unit + /// DIE's DW_AT_str_offsets_base attribute. + Optional + determineStringOffsetsTableContribution(DWARFDataExtractor &DA, + uint64_t Offset); + + /// Find the unit's contribution to the string offsets table and determine its + /// length and form. The given offset is expected to be 0 in a dwo file or, + /// in a dwp file, the start of the unit's contribution to the string offsets + /// table section (as determined by the index table). + Optional + determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA, + uint64_t Offset); + public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, @@ -242,9 +312,6 @@ class DWARFUnit { AddrOffsetSectionBase = Base; } - /// Recursively update address to Die map. - void updateAddressDieMap(DWARFDie Die); - void setRangesSection(const DWARFSection *RS, uint32_t Base) { RangeSection = RS; RangeSectionBase = Base; @@ -272,6 +339,10 @@ class DWARFUnit { uint32_t getNextUnitOffset() const { return Offset + Length + 4; } uint32_t getLength() const { return Length; } + const Optional & + getStringOffsetsTableContribution() const { + return StringOffsetsTableContribution; + } const DWARFFormParams &getFormParams() const { return FormParams; } uint16_t getVersion() const { return FormParams.Version; } dwarf::DwarfFormat getFormat() const { return FormParams.Format; } @@ -281,6 +352,16 @@ class DWARFUnit { return FormParams.getDwarfOffsetByteSize(); } + uint8_t getDwarfStringOffsetsByteSize() const { + assert(StringOffsetsTableContribution); + return StringOffsetsTableContribution->getDwarfOffsetByteSize(); + } + + uint64_t getStringOffsetsBase() const { + assert(StringOffsetsTableContribution); + return StringOffsetsTableContribution->Base; + } + const DWARFAbbreviationDeclarationSet *getAbbreviations() const; uint8_t getUnitType() const { return UnitType; } @@ -426,6 +507,9 @@ class DWARFUnit { /// parseDWO - Parses .dwo file for current compile unit. Returns true if /// it was actually constructed. bool parseDWO(); + + void buildSubprogramDIEAddrMap(); + void buildInlinedSubroutineDIEAddrMap(SubprogramDIEAddrInfo &SPInfo); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 0d920abe3231..c427a07ccc14 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -24,7 +24,6 @@ struct DWARFAttribute; class DWARFContext; class DWARFDie; class DWARFUnit; -class DWARFAcceleratorTable; class DWARFDataExtractor; class DWARFDebugAbbrev; class DataExtractor; @@ -229,8 +228,9 @@ class DWARFVerifier { /// \param SectionName the name of the table we're verifying /// /// \returns The number of errors occured during verification - unsigned verifyAccelTable(const DWARFSection *AccelSection, - DataExtractor *StrData, const char *SectionName); + unsigned verifyAppleAccelTable(const DWARFSection *AccelSection, + DataExtractor *StrData, + const char *SectionName); public: DWARFVerifier(raw_ostream &S, DWARFContext &D, diff --git a/include/llvm/DebugInfo/MSF/MSFCommon.h b/include/llvm/DebugInfo/MSF/MSFCommon.h index f28415d4e603..dd532647b71a 100644 --- a/include/llvm/DebugInfo/MSF/MSFCommon.h +++ b/include/llvm/DebugInfo/MSF/MSFCommon.h @@ -52,6 +52,16 @@ struct SuperBlock { struct MSFLayout { MSFLayout() = default; + uint32_t mainFpmBlock() const { + assert(SB->FreeBlockMapBlock == 1 || SB->FreeBlockMapBlock == 2); + return SB->FreeBlockMapBlock; + } + + uint32_t alternateFpmBlock() const { + // If mainFpmBlock is 1, this is 2. If mainFpmBlock is 2, this is 1. + return 3U - mainFpmBlock(); + } + const SuperBlock *SB = nullptr; BitVector FreePageMap; ArrayRef DirectoryBlocks; @@ -108,14 +118,40 @@ inline uint32_t getFpmIntervalLength(const MSFLayout &L) { return L.SB->BlockSize; } -inline uint32_t getNumFpmIntervals(const MSFLayout &L, - bool IncludeUnusedFpmData = false) { - if (IncludeUnusedFpmData) - return divideCeil(L.SB->NumBlocks, L.SB->BlockSize); +/// Given an MSF with the specified block size and number of blocks, determine +/// how many pieces the specified Fpm is split into. +/// \p BlockSize - the block size of the MSF +/// \p NumBlocks - the total number of blocks in the MSF +/// \p IncludeUnusedFpmData - When true, this will count every block that is +/// both in the file and matches the form of an FPM block, even if some of +/// those FPM blocks are unused (a single FPM block can describe the +/// allocation status of up to 32,767 blocks, although one appears only +/// every 4,096 blocks). So there are 8x as many blocks that match the +/// form as there are blocks that are necessary to describe the allocation +/// status of the file. When this parameter is false, these extraneous +/// trailing blocks are not counted. +inline uint32_t getNumFpmIntervals(uint32_t BlockSize, uint32_t NumBlocks, + bool IncludeUnusedFpmData, int FpmNumber) { + assert(FpmNumber == 1 || FpmNumber == 2); + if (IncludeUnusedFpmData) { + // This calculation determines how many times a number of the form + // BlockSize * k + N appears in the range [0, NumBlocks). We only need to + // do this when unused data is included, since the number of blocks dwarfs + // the number of fpm blocks. + return divideCeil(NumBlocks - FpmNumber, BlockSize); + } // We want the minimum number of intervals required, where each interval can // represent BlockSize * 8 blocks. - return divideCeil(L.SB->NumBlocks, 8 * L.SB->BlockSize); + return divideCeil(NumBlocks, 8 * BlockSize); +} + +inline uint32_t getNumFpmIntervals(const MSFLayout &L, + bool IncludeUnusedFpmData = false, + bool AltFpm = false) { + return getNumFpmIntervals(L.SB->BlockSize, L.SB->NumBlocks, + IncludeUnusedFpmData, + AltFpm ? L.alternateFpmBlock() : L.mainFpmBlock()); } Error validateSuperBlock(const SuperBlock &SB); diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h index 8de54e70701d..abd4cf5effa2 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h @@ -31,6 +31,8 @@ class PDBSymbolTypeFunctionSig : public PDBSymbol { void dumpRight(PDBSymDumper &Dumper) const override; void dumpArgList(raw_ostream &OS) const; + bool isCVarArgs() const; + FORWARD_SYMBOL_METHOD(getCallingConvention) FORWARD_SYMBOL_ID_METHOD(getClassParent) FORWARD_SYMBOL_ID_METHOD(getUnmodifiedType) diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h index 6d144a5b8909..a6c6da37d1cc 100644 --- a/include/llvm/DebugInfo/PDB/PDBTypes.h +++ b/include/llvm/DebugInfo/PDB/PDBTypes.h @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include #include #include #include diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 77c23b46d320..7932688290e3 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -137,17 +137,15 @@ class ExecutionEngine { virtual char *getMemoryForGV(const GlobalVariable *GV); static ExecutionEngine *(*MCJITCtor)( - std::unique_ptr M, - std::string *ErrorStr, - std::shared_ptr MM, - std::shared_ptr SR, - std::unique_ptr TM); + std::unique_ptr M, std::string *ErrorStr, + std::shared_ptr MM, + std::shared_ptr SR, + std::unique_ptr TM); static ExecutionEngine *(*OrcMCJITReplacementCtor)( - std::string *ErrorStr, - std::shared_ptr MM, - std::shared_ptr SR, - std::unique_ptr TM); + std::string *ErrorStr, std::shared_ptr MM, + std::shared_ptr SR, + std::unique_ptr TM); static ExecutionEngine *(*InterpCtor)(std::unique_ptr M, std::string *ErrorStr); @@ -532,7 +530,7 @@ class EngineBuilder { std::string *ErrorStr; CodeGenOpt::Level OptLevel; std::shared_ptr MemMgr; - std::shared_ptr Resolver; + std::shared_ptr Resolver; TargetOptions Options; Optional RelocModel; Optional CMModel; @@ -571,8 +569,7 @@ class EngineBuilder { EngineBuilder& setMemoryManager(std::unique_ptr MM); - EngineBuilder& - setSymbolResolver(std::unique_ptr SR); + EngineBuilder &setSymbolResolver(std::unique_ptr SR); /// setErrorStr - Set the error string to write to on error. This option /// defaults to NULL. diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h index 933b3ea8e13d..0ce16dca0b59 100644 --- a/include/llvm/ExecutionEngine/JITSymbol.h +++ b/include/llvm/ExecutionEngine/JITSymbol.h @@ -19,8 +19,11 @@ #include #include #include +#include +#include #include +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" namespace llvm { @@ -48,9 +51,16 @@ class JITSymbolFlags { Weak = 1U << 1, Common = 1U << 2, Absolute = 1U << 3, - Exported = 1U << 4 + Exported = 1U << 4, + NotMaterialized = 1U << 5, + Materializing = 1U << 6 }; + static JITSymbolFlags stripTransientFlags(JITSymbolFlags Orig) { + return static_cast(Orig.Flags & + ~(NotMaterialized | Materializing)); + } + /// @brief Default-construct a JITSymbolFlags instance. JITSymbolFlags() = default; @@ -67,6 +77,15 @@ class JITSymbolFlags { return (Flags & HasError) == HasError; } + /// @brief Returns true if this symbol has been fully materialized (i.e. is + /// callable). + bool isMaterialized() const { return !(Flags & NotMaterialized); } + + /// @brief Returns true if this symbol is in the process of being + /// materialized. This is generally only of interest as an + /// implementation detail to JIT infrastructure. + bool isMaterializing() const { return Flags & Materializing; } + /// @brief Returns true if the Weak flag is set. bool isWeak() const { return (Flags & Weak) == Weak; @@ -78,7 +97,7 @@ class JITSymbolFlags { } /// @brief Returns true if the symbol isn't weak or common. - bool isStrongDefinition() const { + bool isStrong() const { return !isWeak() && !isCommon(); } @@ -134,6 +153,8 @@ class ARMJITSymbolFlags { /// @brief Represents a symbol that has been evaluated to an address already. class JITEvaluatedSymbol { public: + JITEvaluatedSymbol() = default; + /// @brief Create a 'null' symbol. JITEvaluatedSymbol(std::nullptr_t) {} @@ -256,11 +277,49 @@ class JITSymbol { JITSymbolFlags Flags; }; -/// \brief Symbol resolution. +/// @brief Symbol resolution interface. +/// +/// Allows symbol flags and addresses to be looked up by name. +/// Symbol queries are done in bulk (i.e. you request resolution of a set of +/// symbols, rather than a single one) to reduce IPC overhead in the case of +/// remote JITing, and expose opportunities for parallel compilation. class JITSymbolResolver { public: + using LookupSet = std::set; + using LookupResult = std::map; + using LookupFlagsResult = std::map; + virtual ~JITSymbolResolver() = default; + /// @brief Returns the fully resolved address and flags for each of the given + /// symbols. + /// + /// This method will return an error if any of the given symbols can not be + /// resolved, or if the resolution process itself triggers an error. + virtual Expected lookup(const LookupSet &Symbols) = 0; + + /// @brief Returns the symbol flags for each of the given symbols. + /// + /// This method does NOT return an error if any of the given symbols is + /// missing. Instead, that symbol will be left out of the result map. + virtual Expected lookupFlags(const LookupSet &Symbols) = 0; + +private: + virtual void anchor(); +}; + +/// \brief Legacy symbol resolution interface. +class LegacyJITSymbolResolver : public JITSymbolResolver { +public: + /// @brief Performs lookup by, for each symbol, first calling + /// findSymbolInLogicalDylib and if that fails calling + /// findSymbol. + Expected lookup(const LookupSet &Symbols) final; + + /// @brief Performs flags lookup by calling findSymbolInLogicalDylib and + /// returning the flags value for that symbol. + Expected lookupFlags(const LookupSet &Symbols) final; + /// This method returns the address of the specified symbol if it exists /// within the logical dynamic library represented by this JITSymbolResolver. /// Unlike findSymbol, queries through this interface should return addresses diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index a961992c2147..3281c354676c 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -183,7 +183,7 @@ class CompileOnDemandLayer { return Error::success(); } - std::shared_ptr ExternalSymbolResolver; + std::shared_ptr ExternalSymbolResolver; std::unique_ptr StubsMgr; StaticGlobalRenamer StaticRenamer; SourceModulesList SourceModules; @@ -223,7 +223,7 @@ class CompileOnDemandLayer { /// @brief Add a module to the compile-on-demand layer. Expected addModule(std::shared_ptr M, - std::shared_ptr Resolver) { + std::shared_ptr Resolver) { LogicalDylibs.push_back(LogicalDylib()); auto &LD = LogicalDylibs.back(); diff --git a/include/llvm/ExecutionEngine/Orc/Core.h b/include/llvm/ExecutionEngine/Orc/Core.h new file mode 100644 index 000000000000..ad7545f63bea --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/Core.h @@ -0,0 +1,286 @@ +//===------ Core.h -- Core ORC APIs (Layer, JITDylib, etc.) -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains core ORC APIs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_CORE_H +#define LLVM_EXECUTIONENGINE_ORC_CORE_H + +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" + +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +/// VModuleKey provides a unique identifier (allocated and managed by +/// ExecutionSessions) for a module added to the JIT. +using VModuleKey = uint64_t; + +class VSO; + +/// @brief A set of symbol names (represented by SymbolStringPtrs for +// efficiency). +using SymbolNameSet = std::set; + +/// @brief A map from symbol names (as SymbolStringPtrs) to JITSymbols +/// (address/flags pairs). +using SymbolMap = std::map; + +/// @brief A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. +using SymbolFlagsMap = std::map; + +/// @brief A symbol query that returns results via a callback when results are +/// ready. +/// +/// makes a callback when all symbols are available. +class AsynchronousSymbolQuery { +public: + /// @brief Callback to notify client that symbols have been resolved. + using SymbolsResolvedCallback = std::function)>; + + /// @brief Callback to notify client that symbols are ready for execution. + using SymbolsReadyCallback = std::function; + + /// @brief Create a query for the given symbols, notify-resolved and + /// notify-ready callbacks. + AsynchronousSymbolQuery(const SymbolNameSet &Symbols, + SymbolsResolvedCallback NotifySymbolsResolved, + SymbolsReadyCallback NotifySymbolsReady); + + /// @brief Notify client that the query failed. + /// + /// If the notify-resolved callback has not been made yet, then it is called + /// with the given error, and the notify-finalized callback is never made. + /// + /// If the notify-resolved callback has already been made then then the + /// notify-finalized callback is called with the given error. + /// + /// It is illegal to call setFailed after both callbacks have been made. + void setFailed(Error Err); + + /// @brief Set the resolved symbol information for the given symbol name. + /// + /// If this symbol was the last one not resolved, this will trigger a call to + /// the notify-finalized callback passing the completed sybol map. + void setDefinition(SymbolStringPtr Name, JITEvaluatedSymbol Sym); + + /// @brief Notify the query that a requested symbol is ready for execution. + /// + /// This decrements the query's internal count of not-yet-ready symbols. If + /// this call to notifySymbolFinalized sets the counter to zero, it will call + /// the notify-finalized callback with Error::success as the value. + void notifySymbolFinalized(); + +private: + SymbolMap Symbols; + size_t OutstandingResolutions = 0; + size_t OutstandingFinalizations = 0; + SymbolsResolvedCallback NotifySymbolsResolved; + SymbolsReadyCallback NotifySymbolsReady; +}; + +/// @brief A SymbolFlagsMap containing flags of found symbols, plus a set of +/// not-found symbols. Shared between SymbolResolver::lookupFlags and +/// VSO::lookupFlags for convenience. +struct LookupFlagsResult { + SymbolFlagsMap SymbolFlags; + SymbolNameSet SymbolsNotFound; +}; + +class SymbolResolver { +public: + virtual ~SymbolResolver() = default; + virtual LookupFlagsResult lookupFlags(const SymbolNameSet &Symbols) = 0; + virtual SymbolNameSet lookup(AsynchronousSymbolQuery &Query, + SymbolNameSet Symbols) = 0; + +private: + virtual void anchor(); +}; + +/// @brief Represents a source of symbol definitions which may be materialized +/// (turned into data / code through some materialization process) or +/// discarded (if the definition is overridden by a stronger one). +/// +/// SymbolSources are used when providing lazy definitions of symbols to VSOs. +/// The VSO will call materialize when the address of a symbol is requested via +/// the lookup method. The VSO will call discard if a stronger definition is +/// added or already present. +class SymbolSource { +public: + virtual ~SymbolSource() {} + + /// @brief Implementations of this method should materialize the given + /// symbols (plus any additional symbols required) by adding a + /// Materializer to the ExecutionSession's MaterializationQueue. + virtual Error materialize(VSO &V, SymbolNameSet Symbols) = 0; + + /// @brief Implementations of this method should discard the given symbol + /// from the source (e.g. if the source is an LLVM IR Module and the + /// symbol is a function, delete the function body or mark it available + /// externally). + virtual void discard(VSO &V, SymbolStringPtr Name) = 0; + +private: + virtual void anchor(); +}; + +/// @brief Represents a dynamic linkage unit in a JIT process. +/// +/// VSO acts as a symbol table (symbol definitions can be set and the dylib +/// queried to find symbol addresses) and as a key for tracking resources +/// (since a VSO's address is fixed). +class VSO { + friend class ExecutionSession; + +public: + enum RelativeLinkageStrength { + NewDefinitionIsStronger, + DuplicateDefinition, + ExistingDefinitionIsStronger + }; + + using SetDefinitionsResult = + std::map; + using SourceWorkMap = std::map; + + struct LookupResult { + SourceWorkMap MaterializationWork; + SymbolNameSet UnresolvedSymbols; + }; + + VSO() = default; + + VSO(const VSO &) = delete; + VSO &operator=(const VSO &) = delete; + VSO(VSO &&) = delete; + VSO &operator=(VSO &&) = delete; + + /// @brief Compare new linkage with existing linkage. + static RelativeLinkageStrength + compareLinkage(Optional OldFlags, JITSymbolFlags NewFlags); + + /// @brief Compare new linkage with an existing symbol's linkage. + RelativeLinkageStrength compareLinkage(SymbolStringPtr Name, + JITSymbolFlags NewFlags) const; + + /// @brief Adds the given symbols to the mapping as resolved, finalized + /// symbols. + /// + /// FIXME: We can take this by const-ref once symbol-based laziness is + /// removed. + Error define(SymbolMap NewSymbols); + + /// @brief Adds the given symbols to the mapping as lazy symbols. + Error defineLazy(const SymbolFlagsMap &NewSymbols, SymbolSource &Source); + + /// @brief Add the given symbol/address mappings to the dylib, but do not + /// mark the symbols as finalized yet. + void resolve(SymbolMap SymbolValues); + + /// @brief Finalize the given symbols. + void finalize(SymbolNameSet SymbolsToFinalize); + + /// @brief Look up the flags for the given symbols. + /// + /// Returns the flags for the give symbols, together with the set of symbols + /// not found. + LookupFlagsResult lookupFlags(SymbolNameSet Symbols); + + /// @brief Apply the given query to the given symbols in this VSO. + /// + /// For symbols in this VSO that have already been materialized, their address + /// will be set in the query immediately. + /// + /// For symbols in this VSO that have not been materialized, the query will be + /// recorded and the source for those symbols (plus the set of symbols to be + /// materialized by that source) will be returned as the MaterializationWork + /// field of the LookupResult. + /// + /// Any symbols not found in this VSO will be returned in the + /// UnresolvedSymbols field of the LookupResult. + LookupResult lookup(AsynchronousSymbolQuery &Query, SymbolNameSet Symbols); + +private: + class MaterializationInfo { + public: + MaterializationInfo(JITSymbolFlags Flags, AsynchronousSymbolQuery &Query); + JITSymbolFlags getFlags() const; + JITTargetAddress getAddress() const; + void query(SymbolStringPtr Name, AsynchronousSymbolQuery &Query); + void resolve(SymbolStringPtr Name, JITEvaluatedSymbol Sym); + void finalize(); + + private: + JITSymbolFlags Flags; + JITTargetAddress Address = 0; + std::vector PendingResolution; + std::vector PendingFinalization; + }; + + class SymbolTableEntry { + public: + SymbolTableEntry(JITSymbolFlags Flags, SymbolSource &Source); + SymbolTableEntry(JITEvaluatedSymbol Sym); + SymbolTableEntry(SymbolTableEntry &&Other); + ~SymbolTableEntry(); + JITSymbolFlags getFlags() const; + void replaceWithSource(VSO &V, SymbolStringPtr Name, JITSymbolFlags Flags, + SymbolSource &NewSource); + SymbolSource *query(SymbolStringPtr Name, AsynchronousSymbolQuery &Query); + void resolve(VSO &V, SymbolStringPtr Name, JITEvaluatedSymbol Sym); + void finalize(); + + private: + JITSymbolFlags Flags; + union { + JITTargetAddress Address; + SymbolSource *Source; + std::unique_ptr MatInfo; + }; + }; + + std::map Symbols; +}; + +/// @brief An ExecutionSession represents a running JIT program. +class ExecutionSession { +public: + /// @brief Construct an ExecutionEngine. + /// + /// SymbolStringPools may be shared between ExecutionSessions. + ExecutionSession(SymbolStringPool &SSP); + + /// @brief Returns the SymbolStringPool for this ExecutionSession. + SymbolStringPool &getSymbolStringPool() const { return SSP; } + + /// @brief Allocate a module key for a new module to add to the JIT. + VModuleKey allocateVModule(); + + /// @brief Return a module key to the ExecutionSession so that it can be + /// re-used. This should only be done once all resources associated + //// with the original key have been released. + void releaseVModule(VModuleKey Key); + +public: + SymbolStringPool &SSP; + VModuleKey LastKey = 0; +}; + +} // End namespace orc +} // End namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_CORE_H diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index 228392ae0d4a..7b6f3d2f92ab 100644 --- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -23,7 +23,7 @@ namespace llvm { namespace orc { template -class LambdaResolver : public JITSymbolResolver { +class LambdaResolver : public LegacyJITSymbolResolver { public: LambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) diff --git a/include/llvm/ExecutionEngine/Orc/Legacy.h b/include/llvm/ExecutionEngine/Orc/Legacy.h new file mode 100644 index 000000000000..11143a872a5b --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/Legacy.h @@ -0,0 +1,38 @@ +//===--- Legacy.h -- Adapters for ExecutionEngine API interop ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains core ORC APIs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_LEGACY_H +#define LLVM_EXECUTIONENGINE_ORC_LEGACY_H + +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Core.h" + +namespace llvm { +namespace orc { + +class JITSymbolResolverAdapter : public JITSymbolResolver { +public: + JITSymbolResolverAdapter(ExecutionSession &ES, SymbolResolver &R); + Expected lookup(const LookupSet &Symbols) override; + Expected lookupFlags(const LookupSet &Symbols) override; + +private: + ExecutionSession &ES; + std::set ResolvedStrings; + SymbolResolver &R; +}; + +} // End namespace orc +} // End namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_LEGACY_H diff --git a/include/llvm/ExecutionEngine/Orc/NullResolver.h b/include/llvm/ExecutionEngine/Orc/NullResolver.h index 957b94912b3f..fc5cb3e7c710 100644 --- a/include/llvm/ExecutionEngine/Orc/NullResolver.h +++ b/include/llvm/ExecutionEngine/Orc/NullResolver.h @@ -22,7 +22,7 @@ namespace orc { /// SymbolResolver impliementation that rejects all resolution requests. /// Useful for clients that have no cross-object fixups. -class NullResolver : public JITSymbolResolver { +class NullResolver : public LegacyJITSymbolResolver { public: JITSymbol findSymbol(const std::string &Name) final; diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h index e1ac87075ac0..c2ff41e421e7 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcError.h +++ b/include/llvm/ExecutionEngine/Orc/OrcError.h @@ -22,7 +22,8 @@ namespace orc { enum class OrcErrorCode : int { // RPC Errors - JITSymbolNotFound = 1, + DuplicateDefinition = 1, + JITSymbolNotFound, RemoteAllocatorDoesNotExist, RemoteAllocatorIdAlreadyInUse, RemoteMProtectAddrUnrecognized, @@ -39,6 +40,18 @@ enum class OrcErrorCode : int { std::error_code orcError(OrcErrorCode ErrCode); +class DuplicateDefinition : public ErrorInfo { +public: + static char ID; + + DuplicateDefinition(std::string SymbolName); + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; + const std::string &getSymbolName() const; +private: + std::string SymbolName; +}; + class JITSymbolNotFound : public ErrorInfo { public: static char ID; diff --git a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h index 17255954a99f..21d0b68a7716 100644 --- a/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h @@ -328,7 +328,8 @@ class RemoteObjectClientLayer : public RemoteObjectLayer { /// @return A handle that can be used to refer to the loaded object (for /// symbol searching, finalization, freeing memory, etc.). Expected - addObject(ObjectPtr Object, std::shared_ptr Resolver) { + addObject(ObjectPtr Object, + std::shared_ptr Resolver) { StringRef ObjBuffer = Object->getBinary()->getData(); if (auto HandleOrErr = this->Remote.template callB(ObjBuffer)) { @@ -386,7 +387,8 @@ class RemoteObjectClientLayer : public RemoteObjectLayer { } std::map> Resolvers; + std::shared_ptr> + Resolvers; }; /// RemoteObjectServerLayer acts as a server and handling RPC calls for the diff --git a/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h new file mode 100644 index 000000000000..da40d1caaabe --- /dev/null +++ b/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h @@ -0,0 +1,137 @@ +//===- SymbolStringPool.h - Multi-threaded pool for JIT symbols -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains a multi-threaded string pool suitable for use with ORC. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H +#define LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H + +#include "llvm/ADT/StringMap.h" +#include +#include + +namespace llvm { +namespace orc { + +class SymbolStringPtr; + +/// @brief String pool for symbol names used by the JIT. +class SymbolStringPool { + friend class SymbolStringPtr; +public: + /// @brief Create a symbol string pointer from the given string. + SymbolStringPtr intern(StringRef S); + + /// @brief Remove from the pool any entries that are no longer referenced. + void clearDeadEntries(); + + /// @brief Returns true if the pool is empty. + bool empty() const; +private: + using RefCountType = std::atomic; + using PoolMap = StringMap; + using PoolMapEntry = StringMapEntry; + mutable std::mutex PoolMutex; + PoolMap Pool; +}; + +/// @brief Pointer to a pooled string representing a symbol name. +class SymbolStringPtr { + friend class SymbolStringPool; + friend bool operator==(const SymbolStringPtr &LHS, + const SymbolStringPtr &RHS); + friend bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS); + +public: + SymbolStringPtr() = default; + SymbolStringPtr(const SymbolStringPtr &Other) + : S(Other.S) { + if (S) + ++S->getValue(); + } + + SymbolStringPtr& operator=(const SymbolStringPtr &Other) { + if (S) + --S->getValue(); + S = Other.S; + if (S) + ++S->getValue(); + return *this; + } + + SymbolStringPtr(SymbolStringPtr &&Other) : S(nullptr) { + std::swap(S, Other.S); + } + + SymbolStringPtr& operator=(SymbolStringPtr &&Other) { + if (S) + --S->getValue(); + S = nullptr; + std::swap(S, Other.S); + return *this; + } + + ~SymbolStringPtr() { + if (S) + --S->getValue(); + } + + StringRef operator*() const { return S->first(); } + +private: + + SymbolStringPtr(SymbolStringPool::PoolMapEntry *S) + : S(S) { + if (S) + ++S->getValue(); + } + + SymbolStringPool::PoolMapEntry *S = nullptr; +}; + +inline bool operator==(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) { + return LHS.S == RHS.S; +} + +inline bool operator!=(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) { + return !(LHS == RHS); +} + +inline bool operator<(const SymbolStringPtr &LHS, const SymbolStringPtr &RHS) { + return LHS.S < RHS.S; +} + +inline SymbolStringPtr SymbolStringPool::intern(StringRef S) { + std::lock_guard Lock(PoolMutex); + PoolMap::iterator I; + bool Added; + std::tie(I, Added) = Pool.try_emplace(S, 0); + return SymbolStringPtr(&*I); +} + +inline void SymbolStringPool::clearDeadEntries() { + std::lock_guard Lock(PoolMutex); + for (auto I = Pool.begin(), E = Pool.end(); I != E;) { + auto Tmp = I++; + if (Tmp->second == 0) + Pool.erase(Tmp); + } +} + +inline bool SymbolStringPool::empty() const { + std::lock_guard Lock(PoolMutex); + return Pool.empty(); +} + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SYMBOLSTRINGPOOL_H diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index 0c1862c5c3ea..ee75202d2b62 100644 --- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -56,7 +56,7 @@ class MCJITMemoryManager : public RuntimeDyld::MemoryManager { // FIXME: As the RuntimeDyld fills out, additional routines will be needed // for the varying types of objects to be allocated. class RTDyldMemoryManager : public MCJITMemoryManager, - public JITSymbolResolver { + public LegacyJITSymbolResolver { public: RTDyldMemoryManager() = default; RTDyldMemoryManager(const RTDyldMemoryManager&) = delete; diff --git a/include/llvm/FuzzMutate/IRMutator.h b/include/llvm/FuzzMutate/IRMutator.h index 65ab871db0ef..9aa9d6d6a4bc 100644 --- a/include/llvm/FuzzMutate/IRMutator.h +++ b/include/llvm/FuzzMutate/IRMutator.h @@ -16,6 +16,7 @@ #ifndef LLVM_FUZZMUTATE_IRMUTATOR_H #define LLVM_FUZZMUTATE_IRMUTATOR_H +#include "llvm/ADT/Optional.h" #include "llvm/FuzzMutate/OpDescriptor.h" #include "llvm/Support/ErrorHandling.h" @@ -74,7 +75,8 @@ class IRMutator { class InjectorIRStrategy : public IRMutationStrategy { std::vector Operations; - fuzzerop::OpDescriptor chooseOperation(Value *Src, RandomIRBuilder &IB); + Optional chooseOperation(Value *Src, + RandomIRBuilder &IB); public: InjectorIRStrategy(std::vector &&Operations) diff --git a/include/llvm/FuzzMutate/OpDescriptor.h b/include/llvm/FuzzMutate/OpDescriptor.h index 322c599dc7ff..dd30fda99bea 100644 --- a/include/llvm/FuzzMutate/OpDescriptor.h +++ b/include/llvm/FuzzMutate/OpDescriptor.h @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include @@ -128,7 +129,7 @@ static inline SourcePred anyFloatType() { static inline SourcePred anyPtrType() { auto Pred = [](ArrayRef, const Value *V) { - return V->getType()->isPointerTy(); + return V->getType()->isPointerTy() && !V->isSwiftError(); }; auto Make = [](ArrayRef, ArrayRef Ts) { std::vector Result; @@ -140,8 +141,37 @@ static inline SourcePred anyPtrType() { return {Pred, Make}; } +static inline SourcePred sizedPtrType() { + auto Pred = [](ArrayRef, const Value *V) { + if (V->isSwiftError()) + return false; + + if (const auto *PtrT = dyn_cast(V->getType())) + return PtrT->getElementType()->isSized(); + return false; + }; + auto Make = [](ArrayRef, ArrayRef Ts) { + std::vector Result; + + for (Type *T : Ts) + if (T->isSized()) + Result.push_back(UndefValue::get(PointerType::getUnqual(T))); + + return Result; + }; + return {Pred, Make}; +} + static inline SourcePred anyAggregateType() { auto Pred = [](ArrayRef, const Value *V) { + // We can't index zero sized arrays. + if (isa(V->getType())) + return V->getType()->getArrayNumElements() > 0; + + // Structs can also be zero sized. I.e opaque types. + if (isa(V->getType())) + return V->getType()->getStructNumElements() > 0; + return V->getType()->isAggregateType(); }; // TODO: For now we only find aggregates in BaseTypes. It might be better to diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td index 2cf58b0fa413..ebe5c1985875 100644 --- a/include/llvm/IR/Attributes.td +++ b/include/llvm/IR/Attributes.td @@ -164,6 +164,9 @@ def SanitizeThread : EnumAttr<"sanitize_thread">; /// MemorySanitizer is on. def SanitizeMemory : EnumAttr<"sanitize_memory">; +/// HWAddressSanitizer is on. +def SanitizeHWAddress : EnumAttr<"sanitize_hwaddress">; + /// Argument is swift error. def SwiftError : EnumAttr<"swifterror">; @@ -200,6 +203,7 @@ class CompatRule { def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; class MergeRule { diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index ff6495e7f075..6889e2658244 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -96,9 +96,9 @@ class LLVM_NODISCARD ConstantRange { /// /// NB! The returned set does *not* contain **all** possible values of X for /// which "X BinOpC Y" does not wrap -- some viable values of X may be - /// missing, so you cannot use this to constrain X's range. E.g. in the last - /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2) - /// is not in the set returned. + /// missing, so you cannot use this to constrain X's range. E.g. in the + /// fourth example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), + /// but (-2) is not in the set returned. /// /// Examples: /// typedef OverflowingBinaryOperator OBO; @@ -109,6 +109,10 @@ class LLVM_NODISCARD ConstantRange { /// MGNR(Add, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap) /// == [0,INT_MAX) /// MGNR(Add, [i8 -1, 6), OBO::NoSignedWrap) == [INT_MIN+1, INT_MAX-4) + /// MGNR(Sub, [i8 1, 2), OBO::NoSignedWrap) == [-127, 128) + /// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap) == [1, 0) + /// MGNR(Sub, [i8 1, 2), OBO::NoUnsignedWrap | OBO::NoSignedWrap) + /// == [1,INT_MAX) static ConstantRange makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, const ConstantRange &Other, unsigned NoWrapKind); @@ -313,6 +317,10 @@ class LLVM_NODISCARD ConstantRange { /// logical right shift of a value in this range and a value in \p Other. ConstantRange lshr(const ConstantRange &Other) const; + /// Return a new range representing the possible values resulting from a + /// arithmetic right shift of a value in this range and a value in \p Other. + ConstantRange ashr(const ConstantRange &Other) const; + /// Return a new range that is the logical not of the current set. ConstantRange inverse() const; diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index 3c2074dfe788..5244a4978df0 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -90,7 +90,10 @@ namespace llvm { /// /// If \c AllowUnresolved, collect unresolved nodes attached to the module /// in order to resolve cycles during \a finalize(). - explicit DIBuilder(Module &M, bool AllowUnresolved = true); + /// + /// If \p CU is given a value other than nullptr, then set \p CUNode to CU. + explicit DIBuilder(Module &M, bool AllowUnresolved = true, + DICompileUnit *CU = nullptr); DIBuilder(const DIBuilder &) = delete; DIBuilder &operator=(const DIBuilder &) = delete; diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def index 7ea6346998fe..96cc3e562851 100644 --- a/include/llvm/IR/DebugInfoFlags.def +++ b/include/llvm/IR/DebugInfoFlags.def @@ -43,6 +43,8 @@ HANDLE_DI_FLAG((1 << 18), IntroducedVirtual) HANDLE_DI_FLAG((1 << 19), BitField) HANDLE_DI_FLAG((1 << 20), NoReturn) HANDLE_DI_FLAG((1 << 21), MainSubprogram) +HANDLE_DI_FLAG((1 << 22), TypePassByValue) +HANDLE_DI_FLAG((1 << 23), TypePassByReference) // To avoid needing a dedicated value for IndirectVirtualBase, we use // the bitwise or of Virtual and FwdDecl, which does not otherwise @@ -52,7 +54,7 @@ HANDLE_DI_FLAG((1 << 2) | (1 << 5), IndirectVirtualBase) #ifdef DI_FLAG_LARGEST_NEEDED // intended to be used with ADT/BitmaskEnum.h // NOTE: always must be equal to largest flag, check this when adding new flag -HANDLE_DI_FLAG((1 << 21), Largest) +HANDLE_DI_FLAG((1 << 23), Largest) #undef DI_FLAG_LARGEST_NEEDED #endif diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index c35b3bede2a1..f58f3df7b74a 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -633,6 +633,10 @@ class DIType : public DIScope { bool isStaticMember() const { return getFlags() & FlagStaticMember; } bool isLValueReference() const { return getFlags() & FlagLValueReference; } bool isRValueReference() const { return getFlags() & FlagRValueReference; } + bool isTypePassByValue() const { return getFlags() & FlagTypePassByValue; } + bool isTypePassByReference() const { + return getFlags() & FlagTypePassByReference; + } static bool classof(const Metadata *MD) { switch (MD->getMetadataID()) { @@ -2297,8 +2301,9 @@ class DIExpression : public MDNode { /// Prepend \p DIExpr with a deref and offset operation and optionally turn it /// into a stack value. - static DIExpression *prepend(const DIExpression *DIExpr, bool Deref, - int64_t Offset = 0, bool StackValue = false); + static DIExpression *prepend(const DIExpression *DIExpr, bool DerefBefore, + int64_t Offset = 0, bool DerefAfter = false, + bool StackValue = false); /// Create a DIExpression to describe one part of an aggregate variable that /// is fragmented across multiple Values. The DW_OP_LLVM_fragment operation diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h index 6ad99e516fba..c5373376adef 100644 --- a/include/llvm/IR/Dominators.h +++ b/include/llvm/IR/Dominators.h @@ -290,6 +290,90 @@ class DominatorTreeWrapperPass : public FunctionPass { void print(raw_ostream &OS, const Module *M = nullptr) const override; }; +//===------------------------------------- +/// \brief Class to defer updates to a DominatorTree. +/// +/// Definition: Applying updates to every edge insertion and deletion is +/// expensive and not necessary. When one needs the DominatorTree for analysis +/// they can request a flush() to perform a larger batch update. This has the +/// advantage of the DominatorTree inspecting the set of updates to find +/// duplicates or unnecessary subtree updates. +/// +/// The scope of DeferredDominance operates at a Function level. +/// +/// It is not necessary for the user to scrub the updates for duplicates or +/// updates that point to the same block (Delete, BB_A, BB_A). Performance +/// can be gained if the caller attempts to batch updates before submitting +/// to applyUpdates(ArrayRef) in cases where duplicate edge requests will +/// occur. +/// +/// It is required for the state of the LLVM IR to be applied *before* +/// submitting updates. The update routines must analyze the current state +/// between a pair of (From, To) basic blocks to determine if the update +/// needs to be queued. +/// Example (good): +/// TerminatorInstructionBB->removeFromParent(); +/// DDT->deleteEdge(BB, Successor); +/// Example (bad): +/// DDT->deleteEdge(BB, Successor); +/// TerminatorInstructionBB->removeFromParent(); +class DeferredDominance { +public: + DeferredDominance(DominatorTree &DT_) : DT(DT_) {} + + /// \brief Queues multiple updates and discards duplicates. + void applyUpdates(ArrayRef Updates); + + /// \brief Helper method for a single edge insertion. It's almost always + /// better to batch updates and call applyUpdates to quickly remove duplicate + /// edges. This is best used when there is only a single insertion needed to + /// update Dominators. + void insertEdge(BasicBlock *From, BasicBlock *To); + + /// \brief Helper method for a single edge deletion. It's almost always better + /// to batch updates and call applyUpdates to quickly remove duplicate edges. + /// This is best used when there is only a single deletion needed to update + /// Dominators. + void deleteEdge(BasicBlock *From, BasicBlock *To); + + /// \brief Delays the deletion of a basic block until a flush() event. + void deleteBB(BasicBlock *DelBB); + + /// \brief Returns true if DelBB is awaiting deletion at a flush() event. + bool pendingDeletedBB(BasicBlock *DelBB); + + /// \brief Flushes all pending updates and block deletions. Returns a + /// correct DominatorTree reference to be used by the caller for analysis. + DominatorTree &flush(); + + /// \brief Drops all internal state and forces a (slow) recalculation of the + /// DominatorTree based on the current state of the LLVM IR in F. This should + /// only be used in corner cases such as the Entry block of F being deleted. + void recalculate(Function &F); + + /// \brief Debug method to help view the state of pending updates. + LLVM_DUMP_METHOD void dump() const; + +private: + DominatorTree &DT; + SmallVector PendUpdates; + SmallPtrSet DeletedBBs; + + /// Apply an update (Kind, From, To) to the internal queued updates. The + /// update is only added when determined to be necessary. Checks for + /// self-domination, unnecessary updates, duplicate requests, and balanced + /// pairs of requests are all performed. Returns true if the update is + /// queued and false if it is discarded. + bool applyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From, + BasicBlock *To); + + /// Performs all pending basic block deletions. We have to defer the deletion + /// of these blocks until after the DominatorTree updates are applied. The + /// internal workings of the DominatorTree code expect every update's From + /// and To blocks to exist and to be a member of the same Function. + bool flushDelBB(); +}; + } // end namespace llvm #endif // LLVM_IR_DOMINATORS_H diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 574a40e90f8d..9204b77c8ee6 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -131,7 +131,7 @@ class Function : public GlobalObject, public ilist_node { // This is here to help easily convert from FunctionT * (Function * or // MachineFunction *) in BlockFrequencyInfoImpl to Function * by calling // FunctionT->getFunction(). - const Function *getFunction() const { return this; } + const Function &getFunction() const { return *this; } static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { @@ -218,6 +218,7 @@ class Function : public GlobalObject, public ilist_node { Attribute::get(getContext(), Kind, Val)); } + /// @brief Add function attributes to this function. void addFnAttr(Attribute Attr) { addAttribute(AttributeList::FunctionIndex, Attr); } @@ -233,20 +234,59 @@ class Function : public GlobalObject, public ilist_node { getContext(), AttributeList::FunctionIndex, Kind)); } + enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic }; + + /// Class to represent profile counts. + /// + /// This class represents both real and synthetic profile counts. + class ProfileCount { + private: + uint64_t Count; + ProfileCountType PCT; + static ProfileCount Invalid; + + public: + ProfileCount() : Count(-1), PCT(PCT_Invalid) {} + ProfileCount(uint64_t Count, ProfileCountType PCT) + : Count(Count), PCT(PCT) {} + bool hasValue() const { return PCT != PCT_Invalid; } + uint64_t getCount() const { return Count; } + ProfileCountType getType() const { return PCT; } + bool isSynthetic() const { return PCT == PCT_Synthetic; } + explicit operator bool() { return hasValue(); } + bool operator!() const { return !hasValue(); } + // Update the count retaining the same profile count type. + ProfileCount &setCount(uint64_t C) { + Count = C; + return *this; + } + static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); } + }; + /// \brief Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on - /// pgo data. \p Imports points to a set of GUIDs that needs to be imported - /// by the function for sample PGO, to enable the same inlines as the - /// profiled optimized binary. - void setEntryCount(uint64_t Count, + /// pgo data. \p Imports points to a set of GUIDs that needs to + /// be imported by the function for sample PGO, to enable the same inlines as + /// the profiled optimized binary. + void setEntryCount(ProfileCount Count, + const DenseSet *Imports = nullptr); + + /// A convenience wrapper for setting entry count + void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real, const DenseSet *Imports = nullptr); /// \brief Get the entry count for this function. /// /// Entry count is the number of times the function was executed based on /// pgo data. - Optional getEntryCount() const; + ProfileCount getEntryCount() const; + + /// Return true if the function is annotated with profile data. + /// + /// Presence of entry counts from a profile run implies the function has + /// profile annotations. + bool hasProfileData() const { return getEntryCount().hasValue(); } /// Returns the set of GUIDs that needs to be imported to the function for /// sample PGO, to enable the same inlines as the profiled optimized binary. @@ -262,6 +302,8 @@ class Function : public GlobalObject, public ilist_node { bool hasFnAttribute(Attribute::AttrKind Kind) const { return AttributeSets.hasFnAttribute(Kind); } + + /// @brief Return true if the function has the attribute. bool hasFnAttribute(StringRef Kind) const { return AttributeSets.hasFnAttribute(Kind); } @@ -270,6 +312,8 @@ class Function : public GlobalObject, public ilist_node { Attribute getFnAttribute(Attribute::AttrKind Kind) const { return getAttribute(AttributeList::FunctionIndex, Kind); } + + /// @brief Return the attribute for the given attribute kind. Attribute getFnAttribute(StringRef Kind) const { return getAttribute(AttributeList::FunctionIndex, Kind); } @@ -336,10 +380,12 @@ class Function : public GlobalObject, public ilist_node { return getAttributes().hasParamAttribute(ArgNo, Kind); } + /// @brief gets the attribute from the list of attributes. Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { return AttributeSets.getAttribute(i, Kind); } + /// @brief gets the attribute from the list of attributes. Attribute getAttribute(unsigned i, StringRef Kind) const { return AttributeSets.getAttribute(i, Kind); } @@ -422,7 +468,7 @@ class Function : public GlobalObject, public ilist_node { } void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } - /// @brief Determine if the function may only access memory that is + /// @brief Determine if the function may only access memory that is /// inaccessible from the IR. bool onlyAccessesInaccessibleMemory() const { return hasFnAttribute(Attribute::InaccessibleMemOnly); @@ -490,7 +536,7 @@ class Function : public GlobalObject, public ilist_node { } void setDoesNotRecurse() { addFnAttr(Attribute::NoRecurse); - } + } /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index 1793de7887fc..116b54e0d0f9 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -77,11 +77,12 @@ class GlobalValue : public Constant { GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps, LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace) : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps), - ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility), + ValueType(Ty), Visibility(DefaultVisibility), UnnamedAddrVal(unsigned(UnnamedAddr::None)), DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal), - HasLLVMReservedName(false), IsDSOLocal(false), - IntID((Intrinsic::ID)0U), Parent(nullptr) { + HasLLVMReservedName(false), IsDSOLocal(false), IntID((Intrinsic::ID)0U), + Parent(nullptr) { + setLinkage(Linkage); setName(Name); } @@ -232,6 +233,8 @@ class GlobalValue : public Constant { assert((!hasLocalLinkage() || V == DefaultVisibility) && "local linkage requires default visibility"); Visibility = V; + if (!hasExternalWeakLinkage() && V != DefaultVisibility) + setDSOLocal(true); } /// If the value is "Thread Local", its value isn't shared by the threads. @@ -434,8 +437,10 @@ class GlobalValue : public Constant { } void setLinkage(LinkageTypes LT) { - if (isLocalLinkage(LT)) + if (isLocalLinkage(LT)) { Visibility = DefaultVisibility; + setDSOLocal(true); + } Linkage = LT; } LinkageTypes getLinkage() const { return LinkageTypes(Linkage); } diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 41f379b87c23..76bc4010d8c8 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -34,6 +34,7 @@ namespace llvm { class BasicBlock; class FastMathFlags; class MDNode; +class Module; struct AAMDNodes; template <> struct ilist_alloc_traits { @@ -534,6 +535,14 @@ class Instruction : public User, /// matters, isSafeToSpeculativelyExecute may be more appropriate. bool mayHaveSideEffects() const { return mayWriteToMemory() || mayThrow(); } + /// Return true if the instruction can be removed if the result is unused. + /// + /// When constant folding some instructions cannot be removed even if their + /// results are unused. Specifically terminator instructions and calls that + /// may have side effects cannot be removed without semantically changing the + /// generated program. + bool isSafeToRemove() const; + /// Return true if the instruction is a variety of EH-block. bool isEHPad() const { switch (getOpcode()) { diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 2ca0a24cbae1..9d2b046ca490 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -243,6 +243,8 @@ namespace llvm { return cast(getRawDest()->getType())->getAddressSpace(); } + unsigned getDestAlignment() const { return getParamAlignment(ARG_DEST); } + /// Set the specified arguments of the instruction. void setDest(Value *Ptr) { assert(getRawDest()->getType() == Ptr->getType() && @@ -250,6 +252,13 @@ namespace llvm { setArgOperand(ARG_DEST, Ptr); } + void setDestAlignment(unsigned Align) { + removeParamAttr(ARG_DEST, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_DEST, + Attribute::getWithAlignment(getContext(), Align)); + } + void setLength(Value *L) { assert(getLength()->getType() == L->getType() && "setLength called with value of wrong type!"); @@ -347,12 +356,23 @@ namespace llvm { return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(ARG_SOURCE, Ptr); } + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, + Attribute::getWithAlignment(getContext(), Align)); + } + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memcpy_element_unordered_atomic: @@ -394,16 +414,13 @@ namespace llvm { /// This is the common base class for memset/memcpy/memmove. class MemIntrinsic : public MemIntrinsicBase { private: - enum { ARG_ALIGN = 3, ARG_VOLATILE = 4 }; + enum { ARG_VOLATILE = 3 }; public: - ConstantInt *getAlignmentCst() const { - return cast(const_cast(getArgOperand(ARG_ALIGN))); - } - - unsigned getAlignment() const { - return getAlignmentCst()->getZExtValue(); - } + // TODO: Remove this method entirely. + // Interim, for now, during transition from having an alignment + // arg to using alignment attributes. + unsigned getAlignment() const; ConstantInt *getVolatileCst() const { return cast( @@ -414,14 +431,13 @@ namespace llvm { return !getVolatileCst()->isZero(); } - void setAlignment(Constant *A) { setArgOperand(ARG_ALIGN, A); } + // TODO: Remove this method entirely. It is here only during transition + // from having an explicit alignment arg to using alignment attributes. + // For now we always set dest & source alignment attributes to match + void setAlignment(unsigned Align); void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); } - Type *getAlignmentType() const { - return getArgOperand(ARG_ALIGN)->getType(); - } - // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { @@ -462,11 +478,14 @@ namespace llvm { /// This class wraps the llvm.memcpy/memmove intrinsics. class MemTransferInst : public MemIntrinsic { + private: + enum { ARG_SOURCE = 1 }; + public: /// Return the arguments to the instruction. - Value *getRawSource() const { return const_cast(getArgOperand(1)); } - const Use &getRawSourceUse() const { return getArgOperandUse(1); } - Use &getRawSourceUse() { return getArgOperandUse(1); } + Value *getRawSource() const { return const_cast(getArgOperand(ARG_SOURCE)); } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } /// This is just like getRawSource, but it strips off any cast /// instructions that feed it, giving the original input. The returned @@ -477,10 +496,21 @@ namespace llvm { return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); - setArgOperand(1, Ptr); + setArgOperand(ARG_SOURCE, Ptr); + } + + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, + Attribute::getWithAlignment(getContext(), Align)); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -493,6 +523,19 @@ namespace llvm { } }; + inline unsigned MemIntrinsic::getAlignment() const { + if (const auto *MTI = dyn_cast(this)) + return std::min(MTI->getDestAlignment(), MTI->getSourceAlignment()); + else + return getDestAlignment(); + } + + inline void MemIntrinsic::setAlignment(unsigned Align) { + setDestAlignment(Align); + if (auto *MTI = dyn_cast(this)) + MTI->setSourceAlignment(Align); + } + /// This class wraps the llvm.memcpy intrinsic. class MemCpyInst : public MemTransferInst { public: @@ -606,12 +649,23 @@ namespace llvm { return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(ARG_SOURCE, Ptr); } + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, + Attribute::getWithAlignment(getContext(), Align)); + } + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memcpy: diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 07de0568cab0..c8f5c64fadc1 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// include "llvm/CodeGen/ValueTypes.td" +include "llvm/CodeGen/SDNodeProperties.td" //===----------------------------------------------------------------------===// // Properties we keep track of for intrinsics. @@ -264,16 +265,17 @@ def llvm_vararg_ty : LLVMType; // this means vararg here // intrinsic. // * Properties can be set to describe the behavior of the intrinsic. // -class SDPatternOperator; class Intrinsic ret_types, list param_types = [], - list properties = [], - string name = ""> : SDPatternOperator { + list intr_properties = [], + string name = "", + list sd_properties = []> : SDPatternOperator { string LLVMName = name; string TargetPrefix = ""; // Set to a prefix for target-specific intrinsics. list RetTypes = ret_types; list ParamTypes = param_types; - list IntrProperties = properties; + list IntrProperties = intr_properties; + let Properties = sd_properties; bit isTarget = 0; } @@ -388,17 +390,17 @@ def int_instrprof_value_profile : Intrinsic<[], def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, ReadOnly<1>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; // FIXME: Add version of these floating point intrinsics which allow non-default diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 65c9aaab975d..50341338c399 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -146,6 +146,9 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_CvtFPToFx_Intrinsic : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; + + class AdvSIMD_1Arg_Intrinsic + : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>; } // Arithmetic ops @@ -244,7 +247,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // Vector Max def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic; - def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmax : AdvSIMD_2FloatArg_Intrinsic; def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Max Across Lanes @@ -256,7 +259,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // Vector Min def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic; - def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_fmin : AdvSIMD_2FloatArg_Intrinsic; def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic; // Vector Min/Max Number @@ -354,7 +357,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic; // Vector Absolute Value - def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic; + def int_aarch64_neon_abs : AdvSIMD_1Arg_Intrinsic; // Vector Saturating Absolute Value def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic; diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index f507f9c16689..454b62bdfb6d 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -288,13 +288,29 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile - [IntrArgMemOnly, NoCapture<0>] + [IntrArgMemOnly, NoCapture<0>], "", + [SDNPMemOperand] >; def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; -class AMDGPUImageLoad : Intrinsic < +class AMDGPUAtomicF32Intrin : + GCCBuiltin, + Intrinsic<[llvm_float_ty], + [LLVMAnyPointerType, + llvm_float_ty, + llvm_i32_ty, // ordering + llvm_i32_ty, // scope + llvm_i1_ty], // isVolatile + [IntrArgMemOnly, NoCapture<0>] +>; + +def int_amdgcn_atomic_fadd : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fadd">; +def int_amdgcn_atomic_fmin : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmin">; +def int_amdgcn_atomic_fmax : AMDGPUAtomicF32Intrin<"__builtin_amdgcn_ds_fmax">; + +class AMDGPUImageLoad : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) @@ -303,11 +319,12 @@ class AMDGPUImageLoad : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - [IntrReadMem]>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; -def int_amdgcn_image_getresinfo : AMDGPUImageLoad; +def int_amdgcn_image_getresinfo : AMDGPUImageLoad<1>; class AMDGPUImageStore : Intrinsic < [], @@ -319,12 +336,12 @@ class AMDGPUImageStore : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; -class AMDGPUImageSample : Intrinsic < +class AMDGPUImageSample : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyfloat_ty, // vaddr(VGPR) llvm_anyint_ty, // rsrc(SGPR) @@ -335,7 +352,8 @@ class AMDGPUImageSample : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - [IntrReadMem]>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -417,7 +435,7 @@ def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample; def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample; -def int_amdgcn_image_getlod : AMDGPUImageSample; +def int_amdgcn_image_getlod : AMDGPUImageSample<1>; class AMDGPUImageAtomic : Intrinsic < [llvm_i32_ty], @@ -427,7 +445,7 @@ class AMDGPUImageAtomic : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; @@ -450,7 +468,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], @@ -459,7 +477,7 @@ class AMDGPUBufferLoad : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrReadMem]>; + [IntrReadMem], "", [SDNPMemOperand]>; def int_amdgcn_buffer_load_format : AMDGPUBufferLoad; def int_amdgcn_buffer_load : AMDGPUBufferLoad; @@ -471,7 +489,7 @@ class AMDGPUBufferStore : Intrinsic < llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - [IntrWriteMem]>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_buffer_store_format : AMDGPUBufferStore; def int_amdgcn_buffer_store : AMDGPUBufferStore; @@ -486,7 +504,7 @@ def int_amdgcn_tbuffer_load : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - []>; + [IntrReadMem], "", [SDNPMemOperand]>; def int_amdgcn_tbuffer_store : Intrinsic < [], @@ -500,7 +518,7 @@ def int_amdgcn_tbuffer_store : Intrinsic < llvm_i32_ty, // nfmt(imm) llvm_i1_ty, // glc(imm) llvm_i1_ty], // slc(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; class AMDGPUBufferAtomic : Intrinsic < [llvm_i32_ty], @@ -509,7 +527,7 @@ class AMDGPUBufferAtomic : Intrinsic < llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic; def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic; @@ -528,7 +546,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(SGPR/VGPR/imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; // Uses that do not set the done bit should set IntrWriteMem on the // call site. @@ -570,7 +588,7 @@ def int_amdgcn_s_dcache_inv : def int_amdgcn_s_memtime : GCCBuiltin<"__builtin_amdgcn_s_memtime">, - Intrinsic<[llvm_i64_ty], [], []>; + Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; def int_amdgcn_s_sleep : GCCBuiltin<"__builtin_amdgcn_s_sleep">, @@ -816,7 +834,7 @@ def int_amdgcn_s_dcache_wb_vol : def int_amdgcn_s_memrealtime : GCCBuiltin<"__builtin_amdgcn_s_memrealtime">, - Intrinsic<[llvm_i64_ty], [], []>; + Intrinsic<[llvm_i64_ty], [], [IntrReadMem]>; // llvm.amdgcn.ds.permute def int_amdgcn_ds_permute : diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td index 098245344725..5c96702bca76 100644 --- a/include/llvm/IR/IntrinsicsHexagon.td +++ b/include/llvm/IR/IntrinsicsHexagon.td @@ -5044,7 +5044,6 @@ def int_hexagon_V6_vassignp_128B : Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">; - // // Hexagon_iii_Intrinsic // tag : S6_rol_i_r @@ -5582,54 +5581,6 @@ class Hexagon_v1024i_Intrinsic [llvm_v32i32_ty], [llvm_i32_ty], [IntrNoMem]>; -// -// Hexagon_v512v512LLii_Intrinsic -// tag : V6_vlutb -class Hexagon_v512v512LLii_Intrinsic - : Hexagon_Intrinsic; - -// -// Hexagon_v1024v1024LLii_Intrinsic -// tag : V6_vlutb_128B -class Hexagon_v1024v1024LLii_Intrinsic - : Hexagon_Intrinsic; - -// -// Hexagon_v512v512v512LLii_Intrinsic -// tag : V6_vlutb_acc -class Hexagon_v512v512v512LLii_Intrinsic - : Hexagon_Intrinsic; - -// -// Hexagon_v1024v1024v1024LLii_Intrinsic -// tag : V6_vlutb_acc_128B -class Hexagon_v1024v1024v1024LLii_Intrinsic - : Hexagon_Intrinsic; - -// -// Hexagon_v2048v2048LLii_Intrinsic -// tag : V6_vlutb_dv_128B -class Hexagon_v2048v2048LLii_Intrinsic - : Hexagon_Intrinsic; - -// -// Hexagon_v2048v2048v2048LLii_Intrinsic -// tag : V6_vlutb_dv_acc_128B -class Hexagon_v2048v2048v2048LLii_Intrinsic - : Hexagon_Intrinsic; - // // Hexagon_v512v512v512v512i_Intrinsic // tag : V6_vlutvvb_oracc @@ -9166,54 +9117,6 @@ Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">; def int_hexagon_V6_vcombine_128B : Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">; -// -// BUILTIN_INFO(HEXAGON.V6_vlutb,VI_ftype_VIDISI,3) -// tag : V6_vlutb -def int_hexagon_V6_vlutb : -Hexagon_v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_128B,VI_ftype_VIDISI,3) -// tag : V6_vlutb_128B -def int_hexagon_V6_vlutb_128B : -Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_acc,VI_ftype_VIVIDISI,4) -// tag : V6_vlutb_acc -def int_hexagon_V6_vlutb_acc : -Hexagon_v512v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb_acc">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_acc_128B,VI_ftype_VIVIDISI,4) -// tag : V6_vlutb_acc_128B -def int_hexagon_V6_vlutb_acc_128B : -Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_acc_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv,VD_ftype_VDDISI,3) -// tag : V6_vlutb_dv -def int_hexagon_V6_vlutb_dv : -Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_128B,VD_ftype_VDDISI,3) -// tag : V6_vlutb_dv_128B -def int_hexagon_V6_vlutb_dv_128B : -Hexagon_v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_128B">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc,VD_ftype_VDVDDISI,4) -// tag : V6_vlutb_dv_acc -def int_hexagon_V6_vlutb_dv_acc : -Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc">; - -// -// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc_128B,VD_ftype_VDVDDISI,4) -// tag : V6_vlutb_dv_acc_128B -def int_hexagon_V6_vlutb_dv_acc_128B : -Hexagon_v2048v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc_128B">; - // // BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2) // tag : V6_vdelta @@ -9349,6 +9252,30 @@ Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">; // // Masked vector stores // +def int_hexagon_V6_vS32b_qpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai">; + +def int_hexagon_V6_vS32b_nqpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai">; + +def int_hexagon_V6_vS32b_nt_qpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai">; + +def int_hexagon_V6_vS32b_nt_nqpred_ai : +Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai">; + +def int_hexagon_V6_vS32b_qpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B">; + +def int_hexagon_V6_vS32b_nqpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B">; + +def int_hexagon_V6_vS32b_nt_qpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B">; + +def int_hexagon_V6_vS32b_nt_nqpred_ai_128B : +Hexagon_vv128ivmemv1024_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B">; + def int_hexagon_V6_vmaskedstoreq : Hexagon_vv64ivmemv512_Intrinsic<"HEXAGON_V6_vmaskedstoreq">; @@ -9642,6 +9569,20 @@ class Hexagon_V62_v2048v2048v1024v1024i_Intrinsic [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty], [IntrNoMem]>; +// Hexagon_v512v64iv512v512v64i_Intrinsic +// tag: V6_vaddcarry +class Hexagon_v512v64iv512v512v64i_Intrinsic + : Hexagon_Intrinsic; + +// Hexagon_v1024v128iv1024v1024v128i_Intrinsic +// tag: V6_vaddcarry_128B +class Hexagon_v1024v128iv1024v1024v128i_Intrinsic + : Hexagon_Intrinsic; + // // BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2) @@ -10213,3 +10154,821 @@ Hexagon_V62_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_nm">; def int_hexagon_V6_vlutvwh_nm_128B : Hexagon_V62_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_nm_128B">; +// +// BUILTIN_INFO(HEXAGON.V6_vaddcarry,VI_ftype_VIVIQV,3) +// tag: V6_vaddcarry +def int_hexagon_V6_vaddcarry : +Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vaddcarry">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaddcarry_128B,VI_ftype_VIVIQV,3) +// tag: V6_vaddcarry_128B +def int_hexagon_V6_vaddcarry_128B : +Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vaddcarry_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubcarry,VI_ftype_VIVIQV,3) +// tag: V6_vsubcarry +def int_hexagon_V6_vsubcarry : +Hexagon_v512v64iv512v512v64i_Intrinsic<"HEXAGON_v6_vsubcarry">; + +// +// BUILTIN_INFO(HEXAGON.V6_vsubcarry_128B,VI_ftype_VIVIQV,3) +// tag: V6_vsubcarry_128B +def int_hexagon_V6_vsubcarry_128B : +Hexagon_v1024v128iv1024v1024v128i_Intrinsic<"HEXAGON_v6_vsubcarry_128B">; + + +/// +/// HexagonV65 intrinsics +/// + +// +// Hexagon_V65_iLLiLLi_Intrinsic +// tag : A6_vcmpbeq_notany +class Hexagon_V65_iLLiLLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v512LLi_Intrinsic +// tag : V6_vrmpyub_rtt +class Hexagon_V65_v1024v512LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v2048v1024LLi_Intrinsic +// tag : V6_vrmpyub_rtt_128B +class Hexagon_V65_v2048v1024LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024v512LLi_Intrinsic +// tag : V6_vrmpyub_rtt_acc +class Hexagon_V65_v1024v1024v512LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v2048v2048v1024LLi_Intrinsic +// tag : V6_vrmpyub_rtt_acc_128B +class Hexagon_V65_v2048v2048v1024LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512v512i_Intrinsic +// tag : V6_vasruwuhsat +class Hexagon_V65_v512v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024v1024i_Intrinsic +// tag : V6_vasruwuhsat_128B +class Hexagon_V65_v1024v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512v512_Intrinsic +// tag : V6_vavguw +class Hexagon_V65_v512v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024v1024_Intrinsic +// tag : V6_vavguw_128B +class Hexagon_V65_v1024v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512_Intrinsic +// tag : V6_vabsb +class Hexagon_V65_v512v512_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024_Intrinsic +// tag : V6_vabsb_128B +class Hexagon_V65_v1024v1024_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024i_Intrinsic +// tag : V6_vmpabuu +class Hexagon_V65_v1024v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v2048v2048i_Intrinsic +// tag : V6_vmpabuu_128B +class Hexagon_V65_v2048v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v2048v2048v2048i_Intrinsic +// tag : V6_vmpabuu_acc_128B +class Hexagon_V65_v2048v2048v2048i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024v512i_Intrinsic +// tag : V6_vmpyh_acc +class Hexagon_V65_v1024v1024v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v2048v2048v1024i_Intrinsic +// tag : V6_vmpyh_acc_128B +class Hexagon_V65_v2048v2048v1024i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512v512LLi_Intrinsic +// tag : V6_vmpahhsat +class Hexagon_V65_v512v512v512LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024v1024LLi_Intrinsic +// tag : V6_vmpahhsat_128B +class Hexagon_V65_v1024v1024v1024LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512LLi_Intrinsic +// tag : V6_vlut4 +class Hexagon_V65_v512v512LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v1024LLi_Intrinsic +// tag : V6_vlut4_128B +class Hexagon_V65_v1024v1024LLi_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v512i_Intrinsic +// tag : V6_vmpyuhe +class Hexagon_V65_v512v512i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v512v64i_Intrinsic +// tag : V6_vprefixqb +class Hexagon_V65_v512v64i_Intrinsic + : Hexagon_Intrinsic; + +// +// Hexagon_V65_v1024v128i_Intrinsic +// tag : V6_vprefixqb_128B +class Hexagon_V65_v1024v128i_Intrinsic + : Hexagon_Intrinsic; + +// +// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany,QI_ftype_DIDI,2) +// tag : A6_vcmpbeq_notany +def int_hexagon_A6_vcmpbeq_notany : +Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany">; + +// +// BUILTIN_INFO(HEXAGON.A6_vcmpbeq_notany_128B,QI_ftype_DIDI,2) +// tag : A6_vcmpbeq_notany_128B +def int_hexagon_A6_vcmpbeq_notany_128B : +Hexagon_V65_iLLiLLi_Intrinsic<"HEXAGON_A6_vcmpbeq_notany_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt,VD_ftype_VIDI,2) +// tag : V6_vrmpyub_rtt +def int_hexagon_V6_vrmpyub_rtt : +Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_128B,VD_ftype_VIDI,2) +// tag : V6_vrmpyub_rtt_128B +def int_hexagon_V6_vrmpyub_rtt_128B : +Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc,VD_ftype_VDVIDI,3) +// tag : V6_vrmpyub_rtt_acc +def int_hexagon_V6_vrmpyub_rtt_acc : +Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpyub_rtt_acc_128B,VD_ftype_VDVIDI,3) +// tag : V6_vrmpyub_rtt_acc_128B +def int_hexagon_V6_vrmpyub_rtt_acc_128B : +Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt,VD_ftype_VIDI,2) +// tag : V6_vrmpybub_rtt +def int_hexagon_V6_vrmpybub_rtt : +Hexagon_V65_v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_128B,VD_ftype_VIDI,2) +// tag : V6_vrmpybub_rtt_128B +def int_hexagon_V6_vrmpybub_rtt_128B : +Hexagon_V65_v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc,VD_ftype_VDVIDI,3) +// tag : V6_vrmpybub_rtt_acc +def int_hexagon_V6_vrmpybub_rtt_acc : +Hexagon_V65_v1024v1024v512LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vrmpybub_rtt_acc_128B,VD_ftype_VDVIDI,3) +// tag : V6_vrmpybub_rtt_acc_128B +def int_hexagon_V6_vrmpybub_rtt_acc_128B : +Hexagon_V65_v2048v2048v1024LLi_Intrinsic<"HEXAGON_V6_vrmpybub_rtt_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat,VI_ftype_VIVISI,3) +// tag : V6_vasruwuhsat +def int_hexagon_V6_vasruwuhsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruwuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruwuhsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruwuhsat_128B +def int_hexagon_V6_vasruwuhsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruwuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubsat,VI_ftype_VIVISI,3) +// tag : V6_vasruhubsat +def int_hexagon_V6_vasruhubsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruhubsat_128B +def int_hexagon_V6_vasruhubsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat,VI_ftype_VIVISI,3) +// tag : V6_vasruhubrndsat +def int_hexagon_V6_vasruhubrndsat : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasruhubrndsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasruhubrndsat_128B,VI_ftype_VIVISI,3) +// tag : V6_vasruhubrndsat_128B +def int_hexagon_V6_vasruhubrndsat_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasruhubrndsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh_acc,VI_ftype_VIVISI,3) +// tag : V6_vaslh_acc +def int_hexagon_V6_vaslh_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vaslh_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vaslh_acc_128B +def int_hexagon_V6_vaslh_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh_acc,VI_ftype_VIVISI,3) +// tag : V6_vasrh_acc +def int_hexagon_V6_vasrh_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vasrh_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vasrh_acc_128B +def int_hexagon_V6_vasrh_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguw,VI_ftype_VIVI,2) +// tag : V6_vavguw +def int_hexagon_V6_vavguw : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguw_128B,VI_ftype_VIVI,2) +// tag : V6_vavguw_128B +def int_hexagon_V6_vavguw_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguwrnd,VI_ftype_VIVI,2) +// tag : V6_vavguwrnd +def int_hexagon_V6_vavguwrnd : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavguwrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavguwrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavguwrnd_128B +def int_hexagon_V6_vavguwrnd_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguwrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgb,VI_ftype_VIVI,2) +// tag : V6_vavgb +def int_hexagon_V6_vavgb : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgb_128B,VI_ftype_VIVI,2) +// tag : V6_vavgb_128B +def int_hexagon_V6_vavgb_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgbrnd,VI_ftype_VIVI,2) +// tag : V6_vavgbrnd +def int_hexagon_V6_vavgbrnd : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vavgbrnd">; + +// +// BUILTIN_INFO(HEXAGON.V6_vavgbrnd_128B,VI_ftype_VIVI,2) +// tag : V6_vavgbrnd_128B +def int_hexagon_V6_vavgbrnd_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgbrnd_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgb,VI_ftype_VIVI,2) +// tag : V6_vnavgb +def int_hexagon_V6_vnavgb : +Hexagon_V65_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vnavgb_128B,VI_ftype_VIVI,2) +// tag : V6_vnavgb_128B +def int_hexagon_V6_vnavgb_128B : +Hexagon_V65_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb,VI_ftype_VI,1) +// tag : V6_vabsb +def int_hexagon_V6_vabsb : +Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_128B,VI_ftype_VI,1) +// tag : V6_vabsb_128B +def int_hexagon_V6_vabsb_128B : +Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_sat,VI_ftype_VI,1) +// tag : V6_vabsb_sat +def int_hexagon_V6_vabsb_sat : +Hexagon_V65_v512v512_Intrinsic<"HEXAGON_V6_vabsb_sat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vabsb_sat_128B,VI_ftype_VI,1) +// tag : V6_vabsb_sat_128B +def int_hexagon_V6_vabsb_sat_128B : +Hexagon_V65_v1024v1024_Intrinsic<"HEXAGON_V6_vabsb_sat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu,VD_ftype_VDSI,2) +// tag : V6_vmpabuu +def int_hexagon_V6_vmpabuu : +Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_128B,VD_ftype_VDSI,2) +// tag : V6_vmpabuu_128B +def int_hexagon_V6_vmpabuu_128B : +Hexagon_V65_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc,VD_ftype_VDVDSI,3) +// tag : V6_vmpabuu_acc +def int_hexagon_V6_vmpabuu_acc : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabuu_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpabuu_acc_128B,VD_ftype_VDVDSI,3) +// tag : V6_vmpabuu_acc_128B +def int_hexagon_V6_vmpabuu_acc_128B : +Hexagon_V65_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabuu_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc,VD_ftype_VDVISI,3) +// tag : V6_vmpyh_acc +def int_hexagon_V6_vmpyh_acc : +Hexagon_V65_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyh_acc_128B,VD_ftype_VDVISI,3) +// tag : V6_vmpyh_acc_128B +def int_hexagon_V6_vmpyh_acc_128B : +Hexagon_V65_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpahhsat +def int_hexagon_V6_vmpahhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpahhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpahhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpahhsat_128B +def int_hexagon_V6_vmpahhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpahhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpauhuhsat +def int_hexagon_V6_vmpauhuhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpauhuhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpauhuhsat_128B +def int_hexagon_V6_vmpauhuhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpauhuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat,VI_ftype_VIVIDI,3) +// tag : V6_vmpsuhuhsat +def int_hexagon_V6_vmpsuhuhsat : +Hexagon_V65_v512v512v512LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpsuhuhsat_128B,VI_ftype_VIVIDI,3) +// tag : V6_vmpsuhuhsat_128B +def int_hexagon_V6_vmpsuhuhsat_128B : +Hexagon_V65_v1024v1024v1024LLi_Intrinsic<"HEXAGON_V6_vmpsuhuhsat_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlut4,VI_ftype_VIDI,2) +// tag : V6_vlut4 +def int_hexagon_V6_vlut4 : +Hexagon_V65_v512v512LLi_Intrinsic<"HEXAGON_V6_vlut4">; + +// +// BUILTIN_INFO(HEXAGON.V6_vlut4_128B,VI_ftype_VIDI,2) +// tag : V6_vlut4_128B +def int_hexagon_V6_vlut4_128B : +Hexagon_V65_v1024v1024LLi_Intrinsic<"HEXAGON_V6_vlut4_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe,VI_ftype_VISI,2) +// tag : V6_vmpyuhe +def int_hexagon_V6_vmpyuhe : +Hexagon_V65_v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_128B,VI_ftype_VISI,2) +// tag : V6_vmpyuhe_128B +def int_hexagon_V6_vmpyuhe_128B : +Hexagon_V65_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc,VI_ftype_VIVISI,3) +// tag : V6_vmpyuhe_acc +def int_hexagon_V6_vmpyuhe_acc : +Hexagon_V65_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc">; + +// +// BUILTIN_INFO(HEXAGON.V6_vmpyuhe_acc_128B,VI_ftype_VIVISI,3) +// tag : V6_vmpyuhe_acc_128B +def int_hexagon_V6_vmpyuhe_acc_128B : +Hexagon_V65_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyuhe_acc_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqb,VI_ftype_QV,1) +// tag : V6_vprefixqb +def int_hexagon_V6_vprefixqb : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqb">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqb_128B,VI_ftype_QV,1) +// tag : V6_vprefixqb_128B +def int_hexagon_V6_vprefixqb_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqb_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqh,VI_ftype_QV,1) +// tag : V6_vprefixqh +def int_hexagon_V6_vprefixqh : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqh_128B,VI_ftype_QV,1) +// tag : V6_vprefixqh_128B +def int_hexagon_V6_vprefixqh_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqw,VI_ftype_QV,1) +// tag : V6_vprefixqw +def int_hexagon_V6_vprefixqw : +Hexagon_V65_v512v64i_Intrinsic<"HEXAGON_V6_vprefixqw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vprefixqw_128B,VI_ftype_QV,1) +// tag : V6_vprefixqw_128B +def int_hexagon_V6_vprefixqw_128B : +Hexagon_V65_v1024v128i_Intrinsic<"HEXAGON_V6_vprefixqw_128B">; + + +// The scatter/gather ones below will not be generated from iset.py. Make sure +// you don't overwrite these. +class Hexagon_V65_vvmemiiv512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemiiv1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemiiv2048_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemv64iiiv512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemv128iiiv1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemv64iiiv1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vvmemv128iiiv2048_Intrinsic + : Hexagon_Intrinsic; + +def int_hexagon_V6_vgathermw : +Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermw">; + +def int_hexagon_V6_vgathermw_128B : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermw_128B">; + +def int_hexagon_V6_vgathermh : +Hexagon_V65_vvmemiiv512_Intrinsic<"HEXAGON_V6_vgathermh">; + +def int_hexagon_V6_vgathermh_128B : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermh_128B">; + +def int_hexagon_V6_vgathermhw : +Hexagon_V65_vvmemiiv1024_Intrinsic<"HEXAGON_V6_vgathermhw">; + +def int_hexagon_V6_vgathermhw_128B : +Hexagon_V65_vvmemiiv2048_Intrinsic<"HEXAGON_V6_vgathermhw_128B">; + +def int_hexagon_V6_vgathermwq : +Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermwq">; + +def int_hexagon_V6_vgathermwq_128B : +Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermwq_128B">; + +def int_hexagon_V6_vgathermhq : +Hexagon_V65_vvmemv64iiiv512_Intrinsic<"HEXAGON_V6_vgathermhq">; + +def int_hexagon_V6_vgathermhq_128B : +Hexagon_V65_vvmemv128iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhq_128B">; + +def int_hexagon_V6_vgathermhwq : +Hexagon_V65_vvmemv64iiiv1024_Intrinsic<"HEXAGON_V6_vgathermhwq">; + +def int_hexagon_V6_vgathermhwq_128B : +Hexagon_V65_vvmemv128iiiv2048_Intrinsic<"HEXAGON_V6_vgathermhwq_128B">; + +class Hexagon_V65_viiv512v512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_viiv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vv64iiiv512v512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vv128iiiv1024v1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_viiv1024v512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_viiv2048v1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vv64iiiv1024v512_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_vv128iiiv2048v1024_Intrinsic + : Hexagon_Intrinsic; + +class Hexagon_V65_v2048_Intrinsic + : Hexagon_Intrinsic; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw +def int_hexagon_V6_vscattermw : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_128B +def int_hexagon_V6_vscattermw_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh +def int_hexagon_V6_vscattermh : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_128B +def int_hexagon_V6_vscattermh_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_add,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_add +def int_hexagon_V6_vscattermw_add : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermw_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermw_add_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermw_add_128B +def int_hexagon_V6_vscattermw_add_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermw_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_add,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_add +def int_hexagon_V6_vscattermh_add : +Hexagon_V65_viiv512v512_Intrinsic<"HEXAGON_V6_vscattermh_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermh_add_128B,v_ftype_SISIVIVI,4) +// tag : V6_vscattermh_add_128B +def int_hexagon_V6_vscattermh_add_128B : +Hexagon_V65_viiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermh_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermwq,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermwq +def int_hexagon_V6_vscattermwq : +Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermwq_128B,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermwq_128B +def int_hexagon_V6_vscattermwq_128B : +Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhq,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermhq +def int_hexagon_V6_vscattermhq : +Hexagon_V65_vv64iiiv512v512_Intrinsic<"HEXAGON_V6_vscattermhq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhq_128B,v_ftype_QVSISIVIVI,5) +// tag : V6_vscattermhq_128B +def int_hexagon_V6_vscattermhq_128B : +Hexagon_V65_vv128iiiv1024v1024_Intrinsic<"HEXAGON_V6_vscattermhq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw +def int_hexagon_V6_vscattermhw : +Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_128B,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_128B +def int_hexagon_V6_vscattermhw_128B : +Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhwq,v_ftype_QVSISIVDVI,5) +// tag : V6_vscattermhwq +def int_hexagon_V6_vscattermhwq : +Hexagon_V65_vv64iiiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhwq">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhwq_128B,v_ftype_QVSISIVDVI,5) +// tag : V6_vscattermhwq_128B +def int_hexagon_V6_vscattermhwq_128B : +Hexagon_V65_vv128iiiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhwq_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_add +def int_hexagon_V6_vscattermhw_add : +Hexagon_V65_viiv1024v512_Intrinsic<"HEXAGON_V6_vscattermhw_add">; + +// +// BUILTIN_INFO(HEXAGON.V6_vscattermhw_add_128B,v_ftype_SISIVDVI,4) +// tag : V6_vscattermhw_add_128B +def int_hexagon_V6_vscattermhw_add_128B : +Hexagon_V65_viiv2048v1024_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdd0,VD_ftype_,0) +// tag : V6_vdd0 +def int_hexagon_V6_vdd0 : +Hexagon_v1024_Intrinsic<"HEXAGON_V6_vdd0">; + +// +// BUILTIN_INFO(HEXAGON.V6_vdd0_128B,VD_ftype_,0) +// tag : V6_vdd0_128B +def int_hexagon_V6_vdd0_128B : +Hexagon_V65_v2048_Intrinsic<"HEXAGON_V6_vdd0_128B">; diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index 6f75e78ff615..73622ce9303f 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -682,6 +682,11 @@ let TargetPrefix = "nvvm" in { def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; +// FNS + + def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty], diff --git a/include/llvm/IR/IntrinsicsSystemZ.td b/include/llvm/IR/IntrinsicsSystemZ.td index 98065bc51d99..caa2ec209a31 100644 --- a/include/llvm/IR/IntrinsicsSystemZ.td +++ b/include/llvm/IR/IntrinsicsSystemZ.td @@ -198,17 +198,17 @@ multiclass SystemZQuaternaryIntCCBHF { let TargetPrefix = "s390" in { def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], - [IntrNoDuplicate]>; + [IntrNoDuplicate, IntrWriteMem]>; def int_s390_tabort : Intrinsic<[], [llvm_i64_ty], - [IntrNoReturn, Throws]>; + [IntrNoReturn, Throws, IntrWriteMem]>; def int_s390_tend : GCCBuiltin<"__builtin_tend">, Intrinsic<[llvm_i32_ty], []>; @@ -217,7 +217,7 @@ let TargetPrefix = "s390" in { Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty], - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">, Intrinsic<[], [llvm_i32_ty]>; @@ -260,9 +260,7 @@ let TargetPrefix = "s390" in { def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], - // In fact write-only but there's no property - // for that. - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; defm int_s390_vupl : SystemZUnaryExtBHWF<"vupl">; defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">; @@ -413,9 +411,7 @@ let TargetPrefix = "s390" in { def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">, Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], - // In fact write-only but there's no property - // for that. - [IntrArgMemOnly]>; + [IntrArgMemOnly, IntrWriteMem]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 459463003c72..c1879f65e431 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -63,6 +63,12 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>; } +// Read processor ID. +let TargetPrefix = "x86" in { + def int_x86_rdpid : GCCBuiltin<"__builtin_ia32_rdpid">, + Intrinsic<[llvm_i32_ty], [], []>; +} + //===----------------------------------------------------------------------===// // CET SS let TargetPrefix = "x86" in { @@ -3721,32 +3727,23 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Mask instructions // 16-bit mask - def int_x86_avx512_kand_w : GCCBuiltin<"__builtin_ia32_kandhi">, + def int_x86_avx512_kand_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kandn_w : GCCBuiltin<"__builtin_ia32_kandnhi">, + def int_x86_avx512_kandn_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_knot_w : GCCBuiltin<"__builtin_ia32_knothi">, + def int_x86_avx512_knot_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kor_w : GCCBuiltin<"__builtin_ia32_korhi">, - Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_kxor_w : GCCBuiltin<"__builtin_ia32_kxorhi">, + def int_x86_avx512_kor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kxnor_w : GCCBuiltin<"__builtin_ia32_kxnorhi">, + def int_x86_avx512_kxor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, + def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; @@ -3810,35 +3807,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">, - Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">, - Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">, - Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">, - Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">, - Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">, - Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">, - Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">, - Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">, - Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">, - Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>; - } // Pack ops. diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index a95634d32c21..a9ec1a166335 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -76,7 +76,7 @@ class LLVMContext { // Pinned metadata names, which always have the same value. This is a // compile-time performance optimization, not a correctness optimization. - enum { + enum : unsigned { MD_dbg = 0, // "dbg" MD_tbaa = 1, // "tbaa" MD_prof = 2, // "prof" @@ -108,7 +108,7 @@ class LLVMContext { /// operand bundle tags that LLVM has special knowledge of are listed here. /// Additionally, this scheme allows LLVM to efficiently check for specific /// operand bundle tags without comparing strings. - enum { + enum : unsigned { OB_deopt = 0, // "deopt" OB_funclet = 1, // "funclet" OB_gc_transition = 2, // "gc-transition" diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index 15c1b9cb60ef..d5218eadc4ab 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -30,6 +30,7 @@ class Constant; class ConstantAsMetadata; class MDNode; class MDString; +class Metadata; class MDBuilder { LLVMContext &Context; @@ -65,10 +66,11 @@ class MDBuilder { /// Return metadata specifying that a branch or switch is unpredictable. MDNode *createUnpredictable(); - /// Return metadata containing the entry \p Count for a function, and the + /// Return metadata containing the entry \p Count for a function, a boolean + /// \Synthetic indicating whether the counts were synthetized, and the /// GUIDs stored in \p Imports that need to be imported for sample PGO, to /// enable the same inlines as the profiled optimized binary - MDNode *createFunctionEntryCount(uint64_t Count, + MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic, const DenseSet *Imports); /// Return metadata containing the section prefix for a function. @@ -149,9 +151,9 @@ class MDBuilder { struct TBAAStructField { uint64_t Offset; uint64_t Size; - MDNode *TBAA; - TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *TBAA) : - Offset(Offset), Size(Size), TBAA(TBAA) {} + MDNode *Type; + TBAAStructField(uint64_t Offset, uint64_t Size, MDNode *Type) : + Offset(Offset), Size(Size), Type(Type) {} }; /// \brief Return metadata for a tbaa.struct node with the given @@ -174,6 +176,24 @@ class MDBuilder { MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, uint64_t Offset, bool IsConstant = false); + /// \brief Return metadata for a TBAA type node in the TBAA type DAG with the + /// given parent type, size in bytes, type identifier and a list of fields. + MDNode *createTBAATypeNode(MDNode *Parent, uint64_t Size, Metadata *Id, + ArrayRef Fields = + ArrayRef()); + + /// \brief Return metadata for a TBAA access tag with the given base type, + /// final access type, offset of the access relative to the base type, size of + /// the access and flag indicating whether the accessed object can be + /// considered immutable for the purposes of the TBAA analysis. + MDNode *createTBAAAccessTag(MDNode *BaseType, MDNode *AccessType, + uint64_t Offset, uint64_t Size, + bool IsImmutable = false); + + /// \brief Return mutable version of the given mutable or immutable TBAA + /// access tag. + MDNode *createMutableTBAAAccessTag(MDNode *Tag); + /// \brief Return metadata containing an irreducible loop header weight. MDNode *createIrrLoopHeaderWeight(uint64_t Weight); }; diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h index 56ee21392ccd..0261c00f524c 100644 --- a/include/llvm/IR/Mangler.h +++ b/include/llvm/IR/Mangler.h @@ -50,6 +50,9 @@ class Mangler { void emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV, const Triple &TT, Mangler &Mangler); +void emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV, + const Triple &T, Mangler &M); + } // End llvm namespace #endif diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index b1e58a2a0d9b..17f8951bf0e9 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -69,9 +69,27 @@ class GlobalValueSummary; using GlobalValueSummaryList = std::vector>; struct GlobalValueSummaryInfo { - /// The GlobalValue corresponding to this summary. This is only used in - /// per-module summaries. - const GlobalValue *GV = nullptr; + union NameOrGV { + NameOrGV(bool IsAnalysis) { + if (IsAnalysis) + GV = nullptr; + else + Name = ""; + } + + /// The GlobalValue corresponding to this summary. This is only used in + /// per-module summaries, when module analysis is being run. + const GlobalValue *GV; + + /// Summary string representation. This StringRef points to BC module + /// string table and is valid until module data is stored in memory. + /// This is guaranteed to happen until runThinLTOBackend function is + /// called, so it is safe to use this field during thin link. This field + /// is only valid if summary index was loaded from BC file. + StringRef Name; + } U; + + GlobalValueSummaryInfo(bool IsAnalysis) : U(IsAnalysis) {} /// List of global value summary structures for a particular value held /// in the GlobalValueMap. Requires a vector in the case of multiple @@ -91,32 +109,60 @@ using GlobalValueSummaryMapTy = /// Struct that holds a reference to a particular GUID in a global value /// summary. struct ValueInfo { - const GlobalValueSummaryMapTy::value_type *Ref = nullptr; + PointerIntPair + RefAndFlag; ValueInfo() = default; - ValueInfo(const GlobalValueSummaryMapTy::value_type *Ref) : Ref(Ref) {} + ValueInfo(bool IsAnalysis, const GlobalValueSummaryMapTy::value_type *R) { + RefAndFlag.setPointer(R); + RefAndFlag.setInt(IsAnalysis); + } - operator bool() const { return Ref; } + operator bool() const { return getRef(); } - GlobalValue::GUID getGUID() const { return Ref->first; } - const GlobalValue *getValue() const { return Ref->second.GV; } + GlobalValue::GUID getGUID() const { return getRef()->first; } + const GlobalValue *getValue() const { + assert(isFromAnalysis()); + return getRef()->second.U.GV; + } ArrayRef> getSummaryList() const { - return Ref->second.SummaryList; + return getRef()->second.SummaryList; + } + + StringRef name() const { + return isFromAnalysis() ? getRef()->second.U.GV->getName() + : getRef()->second.U.Name; + } + + bool isFromAnalysis() const { return RefAndFlag.getInt(); } + + const GlobalValueSummaryMapTy::value_type *getRef() const { + return RefAndFlag.getPointer(); } }; template <> struct DenseMapInfo { static inline ValueInfo getEmptyKey() { - return ValueInfo((GlobalValueSummaryMapTy::value_type *)-1); + return ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8); } static inline ValueInfo getTombstoneKey() { - return ValueInfo((GlobalValueSummaryMapTy::value_type *)-2); + return ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-16); + } + + static inline bool isSpecialKey(ValueInfo V) { + return V == getTombstoneKey() || V == getEmptyKey(); } - static bool isEqual(ValueInfo L, ValueInfo R) { return L.Ref == R.Ref; } - static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.Ref; } + static bool isEqual(ValueInfo L, ValueInfo R) { + // We are not supposed to mix ValueInfo(s) with different analysis flag + // in a same container. + assert(isSpecialKey(L) || isSpecialKey(R) || + (L.isFromAnalysis() == R.isFromAnalysis())); + return L.getRef() == R.getRef(); + } + static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); } }; /// \brief Function and variable summary information to aid decisions and @@ -246,6 +292,7 @@ class GlobalValueSummary { /// If this is an alias summary, returns the summary of the aliased object (a /// global variable or function), otherwise returns itself. GlobalValueSummary *getBaseObject(); + const GlobalValueSummary *getBaseObject() const; friend class ModuleSummaryIndex; friend void computeDeadSymbols(class ModuleSummaryIndex &, @@ -255,10 +302,14 @@ class GlobalValueSummary { /// \brief Alias summary information. class AliasSummary : public GlobalValueSummary { GlobalValueSummary *AliaseeSummary; + // AliaseeGUID is only set and accessed when we are building a combined index + // via the BitcodeReader. + GlobalValue::GUID AliaseeGUID; public: AliasSummary(GVFlags Flags) - : GlobalValueSummary(AliasKind, Flags, ArrayRef{}) {} + : GlobalValueSummary(AliasKind, Flags, ArrayRef{}), + AliaseeSummary(nullptr), AliaseeGUID(0) {} /// Check if this is an alias summary. static bool classof(const GlobalValueSummary *GVS) { @@ -266,6 +317,7 @@ class AliasSummary : public GlobalValueSummary { } void setAliasee(GlobalValueSummary *Aliasee) { AliaseeSummary = Aliasee; } + void setAliaseeGUID(GlobalValue::GUID GUID) { AliaseeGUID = GUID; } const GlobalValueSummary &getAliasee() const { assert(AliaseeSummary && "Unexpected missing aliasee summary"); @@ -276,8 +328,18 @@ class AliasSummary : public GlobalValueSummary { return const_cast( static_cast(this)->getAliasee()); } + const GlobalValue::GUID &getAliaseeGUID() const { + assert(AliaseeGUID && "Unexpected missing aliasee GUID"); + return AliaseeGUID; + } }; +const inline GlobalValueSummary *GlobalValueSummary::getBaseObject() const { + if (auto *AS = dyn_cast(this)) + return &AS->getAliasee(); + return this; +} + inline GlobalValueSummary *GlobalValueSummary::getBaseObject() { if (auto *AS = dyn_cast(this)) return &AS->getAliasee(); @@ -603,6 +665,11 @@ class ModuleSummaryIndex { /// considered live. bool WithGlobalValueDeadStripping = false; + /// If true then we're performing analysis of IR module, filling summary + /// accordingly. The value of 'false' means we're reading summary from + /// BC or YAML source. Affects the type of value stored in NameOrGV union + bool IsAnalysis; + std::set CfiFunctionDefs; std::set CfiFunctionDecls; @@ -611,10 +678,16 @@ class ModuleSummaryIndex { GlobalValueSummaryMapTy::value_type * getOrInsertValuePtr(GlobalValue::GUID GUID) { - return &*GlobalValueMap.emplace(GUID, GlobalValueSummaryInfo{}).first; + return &*GlobalValueMap.emplace(GUID, GlobalValueSummaryInfo(IsAnalysis)).first; } public: + // See IsAnalysis variable comment. + ModuleSummaryIndex(bool IsPerformingAnalysis) + : IsAnalysis(IsPerformingAnalysis) {} + + bool isPerformingAnalysis() const { return IsAnalysis; } + gvsummary_iterator begin() { return GlobalValueMap.begin(); } const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); } gvsummary_iterator end() { return GlobalValueMap.end(); } @@ -636,19 +709,28 @@ class ModuleSummaryIndex { /// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo(). ValueInfo getValueInfo(GlobalValue::GUID GUID) const { auto I = GlobalValueMap.find(GUID); - return ValueInfo(I == GlobalValueMap.end() ? nullptr : &*I); + return ValueInfo(IsAnalysis, I == GlobalValueMap.end() ? nullptr : &*I); } /// Return a ValueInfo for \p GUID. ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID) { - return ValueInfo(getOrInsertValuePtr(GUID)); + return ValueInfo(IsAnalysis, getOrInsertValuePtr(GUID)); + } + + /// Return a ValueInfo for \p GUID setting value \p Name. + ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID, StringRef Name) { + assert(!IsAnalysis); + auto VP = getOrInsertValuePtr(GUID); + VP->second.U.Name = Name; + return ValueInfo(IsAnalysis, VP); } /// Return a ValueInfo for \p GV and mark it as belonging to GV. ValueInfo getOrInsertValueInfo(const GlobalValue *GV) { + assert(IsAnalysis); auto VP = getOrInsertValuePtr(GV->getGUID()); - VP->second.GV = GV; - return ValueInfo(VP); + VP->second.U.GV = GV; + return ValueInfo(IsAnalysis, VP); } /// Return the GUID for \p OriginalId in the OidGuidMap. @@ -676,7 +758,7 @@ class ModuleSummaryIndex { addOriginalName(VI.getGUID(), Summary->getOriginalName()); // Here we have a notionally const VI, but the value it points to is owned // by the non-const *this. - const_cast(VI.Ref) + const_cast(VI.getRef()) ->second.SummaryList.push_back(std::move(Summary)); } @@ -807,6 +889,9 @@ class ModuleSummaryIndex { /// Summary). void collectDefinedGVSummariesPerModule( StringMap &ModuleToDefinedGVSummaries) const; + + /// Export summary to dot file for GraphViz. + void exportToDot(raw_ostream& OS) const; }; } // end namespace llvm diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 4687f2d53e7e..241f106a4d4b 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -207,7 +207,8 @@ template <> struct CustomMappingTraits { io.setError("key not an integer"); return; } - auto &Elem = V[KeyInt]; + auto P = V.emplace(KeyInt, /*IsAnalysis=*/false); + auto &Elem = (*P.first).second; for (auto &FSum : FSums) { Elem.SummaryList.push_back(llvm::make_unique( GlobalValueSummary::GVFlags( diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 5124607436f4..245d72fbd16e 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -956,6 +956,26 @@ inline CastClass_match m_FPExt(const OpTy &Op) { return CastClass_match(Op); } +//===----------------------------------------------------------------------===// +// Matcher for LoadInst classes +// + +template struct LoadClass_match { + Op_t Op; + + LoadClass_match(const Op_t &OpMatch) : Op(OpMatch) {} + + template bool match(OpTy *V) { + if (auto *LI = dyn_cast(V)) + return Op.match(LI->getPointerOperand()); + return false; + } +}; + +/// Matches LoadInst. +template inline LoadClass_match m_Load(const OpTy &Op) { + return LoadClass_match(Op); +} //===----------------------------------------------------------------------===// // Matchers for unary operators // diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index 12a4877320bc..d848fe921868 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -330,6 +330,10 @@ class Value { return UseList == nullptr; } + bool materialized_use_empty() const { + return UseList == nullptr; + } + using use_iterator = use_iterator_impl; using const_use_iterator = use_iterator_impl; @@ -566,7 +570,7 @@ class Value { /// /// If CanBeNull is set by this function the pointer can either be null or be /// dereferenceable up to the returned number of bytes. - unsigned getPointerDereferenceableBytes(const DataLayout &DL, + uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull) const; /// \brief Returns an alignment of the pointer value. diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h index 15e52d9e0742..bc10f330bc8a 100644 --- a/include/llvm/IR/Verifier.h +++ b/include/llvm/IR/Verifier.h @@ -61,11 +61,13 @@ class TBAAVerifier { /// \name Helper functions used by \c visitTBAAMetadata. /// @{ MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode, - APInt &Offset); + APInt &Offset, bool IsNewFormat); TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I, - const MDNode *BaseNode); + const MDNode *BaseNode, + bool IsNewFormat); TBAABaseNodeSummary verifyTBAABaseNodeImpl(Instruction &I, - const MDNode *BaseNode); + const MDNode *BaseNode, + bool IsNewFormat); bool isValidScalarTBAANode(const MDNode *MD); /// @} diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 4935ba1a30d1..cfa169e7106d 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -80,6 +80,7 @@ void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); +void initializeBreakFalseDepsPass(PassRegistry&); void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); @@ -99,6 +100,8 @@ void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); +void initializeEarlyMachineLICMPass(PassRegistry&); +void initializeEarlyTailDuplicatePass(PassRegistry&); void initializeEntryExitInstrumenterPass(PassRegistry&); void initializePostInlineEntryExitInstrumenterPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); @@ -161,6 +164,7 @@ void initializeIVUsersWrapperPassPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); void initializeImplicitNullChecksPass(PassRegistry&); void initializeIndVarSimplifyLegacyPassPass(PassRegistry&); +void initializeIndirectBrExpandPassPass(PassRegistry&); void initializeInductiveRangeCheckEliminationPass(PassRegistry&); void initializeInferAddressSpacesPass(PassRegistry&); void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); @@ -311,6 +315,7 @@ void initializeRegAllocFastPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); +void initializeReachingDefAnalysisPass(PassRegistry&); void initializeRegToMemPass(PassRegistry&); void initializeRegionInfoPassPass(PassRegistry&); void initializeRegionOnlyPrinterPass(PassRegistry&); @@ -321,7 +326,7 @@ void initializeRegisterCoalescerPass(PassRegistry&); void initializeRenameIndependentSubregsPass(PassRegistry&); void initializeResetMachineFunctionPass(PassRegistry&); void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); -void initializeRewriteStatepointsForGCPass(PassRegistry&); +void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &); void initializeRewriteSymbolsLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSCCPLegacyPassPass(PassRegistry&); @@ -360,8 +365,9 @@ void initializeStripNonDebugSymbolsPass(PassRegistry&); void initializeStripNonLineTableDebugInfoPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); +void initializeHWAddressSanitizerPass(PassRegistry&); void initializeTailCallElimPass(PassRegistry&); -void initializeTailDuplicatePassPass(PassRegistry&); +void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); void initializeTargetTransformInfoWrapperPassPass(PassRegistry&); diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h index f784d4997430..2a2b59847281 100644 --- a/include/llvm/LTO/LTO.h +++ b/include/llvm/LTO/LTO.h @@ -279,7 +279,6 @@ class LTO { unsigned ParallelCodeGenParallelismLevel; LTOLLVMContext Ctx; - bool HasModule = false; std::unique_ptr CombinedModule; std::unique_ptr Mover; @@ -372,8 +371,7 @@ class LTO { const SymbolResolution *&ResI, const SymbolResolution *ResE); Error runRegularLTO(AddStreamFn AddStream); - Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, - bool HasRegularLTO); + Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache); mutable bool CalledGetMaxTasks = false; }; diff --git a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h index 14f0c48266f0..d794535700e5 100644 --- a/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h +++ b/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h @@ -148,10 +148,14 @@ class ThinLTOCodeGenerator { /// incremental build. void setCacheDir(std::string Path) { CacheOptions.Path = std::move(Path); } - /// Cache policy: interval (seconds) between two prune of the cache. Set to a - /// negative value (default) to disable pruning. A value of 0 will be ignored. + /// Cache policy: interval (seconds) between two prunes of the cache. Set to a + /// negative value to disable pruning. A value of 0 will be ignored. void setCachePruningInterval(int Interval) { - if (Interval) + if (Interval == 0) + return; + if(Interval < 0) + CacheOptions.Policy.Interval.reset(); + else CacheOptions.Policy.Interval = std::chrono::seconds(Interval); } diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h index a2bdcd4e69c7..8c0b4ecb8fd4 100644 --- a/include/llvm/MC/LaneBitmask.h +++ b/include/llvm/MC/LaneBitmask.h @@ -80,9 +80,9 @@ namespace llvm { return Log2_32(Mask); } - static LaneBitmask getNone() { return LaneBitmask(0); } - static LaneBitmask getAll() { return ~LaneBitmask(0); } - static LaneBitmask getLane(unsigned Lane) { + static constexpr LaneBitmask getNone() { return LaneBitmask(0); } + static constexpr LaneBitmask getAll() { return ~LaneBitmask(0); } + static constexpr LaneBitmask getLane(unsigned Lane) { return LaneBitmask(Type(1) << Lane); } diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 234762f36dd4..c538c46fc072 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -165,7 +165,8 @@ class MCAsmInfo { const char *ZeroDirective; /// This directive allows emission of an ascii string with the standard C - /// escape characters embedded into it. Defaults to "\t.ascii\t" + /// escape characters embedded into it. If a target doesn't support this, it + /// can be set to null. Defaults to "\t.ascii\t" const char *AsciiDirective; /// If not null, this allows for special handling of zero terminated strings diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 1ce6b09355d6..034605557d4c 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCFixup.h" @@ -84,8 +85,12 @@ class MCAssembler { /// MachO specific deployment target version info. // A Major version of 0 indicates that no version information was supplied // and so the corresponding load command should not be emitted. - using VersionMinInfoType = struct { - MCVersionMinType Kind; + using VersionInfoType = struct { + bool EmitBuildVersion; + union { + MCVersionMinType Type; ///< Used when EmitBuildVersion==false. + MachO::PlatformType Platform; ///< Used when EmitBuildVersion==true. + } TypeOrPlatform; unsigned Major; unsigned Minor; unsigned Update; @@ -145,7 +150,7 @@ class MCAssembler { /// the Streamer and the .o writer MCLOHContainer LOHContainer; - VersionMinInfoType VersionMinInfo; + VersionInfoType VersionInfo; /// Evaluate a fixup to a relocatable expression and the value which should be /// placed into the fixup. @@ -243,13 +248,22 @@ class MCAssembler { void setELFHeaderEFlags(unsigned Flags) { ELFHeaderEFlags = Flags; } /// MachO deployment target version information. - const VersionMinInfoType &getVersionMinInfo() const { return VersionMinInfo; } - void setVersionMinInfo(MCVersionMinType Kind, unsigned Major, unsigned Minor, - unsigned Update) { - VersionMinInfo.Kind = Kind; - VersionMinInfo.Major = Major; - VersionMinInfo.Minor = Minor; - VersionMinInfo.Update = Update; + const VersionInfoType &getVersionInfo() const { return VersionInfo; } + void setVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor, + unsigned Update) { + VersionInfo.EmitBuildVersion = false; + VersionInfo.TypeOrPlatform.Type = Type; + VersionInfo.Major = Major; + VersionInfo.Minor = Minor; + VersionInfo.Update = Update; + } + void setBuildVersion(MachO::PlatformType Platform, unsigned Major, + unsigned Minor, unsigned Update) { + VersionInfo.EmitBuildVersion = true; + VersionInfo.TypeOrPlatform.Platform = Platform; + VersionInfo.Major = Major; + VersionInfo.Minor = Minor; + VersionInfo.Update = Update; } /// Reuse an assembler instance diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h index e2249f49c86c..c8f14515ed34 100644 --- a/include/llvm/MC/MCCodeView.h +++ b/include/llvm/MC/MCCodeView.h @@ -177,13 +177,7 @@ class CodeViewContext { unsigned IACol); /// Retreive the function info if this is a valid function id, or nullptr. - MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) { - if (FuncId >= Functions.size()) - return nullptr; - if (Functions[FuncId].isUnallocatedFunctionInfo()) - return nullptr; - return &Functions[FuncId]; - } + MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId); /// Saves the information from the currently parsed .cv_loc directive /// and sets CVLocSeen. When the next instruction is assembled an entry @@ -199,50 +193,22 @@ class CodeViewContext { CurrentCVLoc.setIsStmt(IsStmt); CVLocSeen = true; } - void clearCVLocSeen() { CVLocSeen = false; } bool getCVLocSeen() { return CVLocSeen; } + void clearCVLocSeen() { CVLocSeen = false; } + const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; } bool isValidCVFileNumber(unsigned FileNumber); /// \brief Add a line entry. - void addLineEntry(const MCCVLineEntry &LineEntry) { - size_t Offset = MCCVLines.size(); - auto I = MCCVLineStartStop.insert( - {LineEntry.getFunctionId(), {Offset, Offset + 1}}); - if (!I.second) - I.first->second.second = Offset + 1; - MCCVLines.push_back(LineEntry); - } + void addLineEntry(const MCCVLineEntry &LineEntry); - std::vector getFunctionLineEntries(unsigned FuncId) { - std::vector FilteredLines; + std::vector getFunctionLineEntries(unsigned FuncId); - auto I = MCCVLineStartStop.find(FuncId); - if (I != MCCVLineStartStop.end()) - for (size_t Idx = I->second.first, End = I->second.second; Idx != End; - ++Idx) - if (MCCVLines[Idx].getFunctionId() == FuncId) - FilteredLines.push_back(MCCVLines[Idx]); - return FilteredLines; - } - - std::pair getLineExtent(unsigned FuncId) { - auto I = MCCVLineStartStop.find(FuncId); - // Return an empty extent if there are no cv_locs for this function id. - if (I == MCCVLineStartStop.end()) - return {~0ULL, 0}; - return I->second; - } + std::pair getLineExtent(unsigned FuncId); - ArrayRef getLinesForExtent(size_t L, size_t R) { - if (R <= L) - return None; - if (L >= MCCVLines.size()) - return None; - return makeArrayRef(&MCCVLines[L], R - L); - } + ArrayRef getLinesForExtent(size_t L, size_t R); /// Emits a line table substream. void emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId, diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 432fc0ede072..8b4da7c8db24 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -23,6 +23,7 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -489,7 +490,8 @@ namespace llvm { /// Creates an entry in the dwarf file and directory tables. unsigned getDwarfFile(StringRef Directory, StringRef FileName, - unsigned FileNumber, unsigned CUID); + unsigned FileNumber, MD5::MD5Result *Checksum, + unsigned CUID); bool isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID = 0); diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 88ffa04128e6..ec88e11693be 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCSection.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -50,6 +51,10 @@ struct MCDwarfFile { // \brief The index into the list of directory names for this file name. unsigned DirIndex; + + /// The MD5 checksum, if there is one. Non-owning pointer to data allocated + /// in MCContext. + MD5::MD5Result *Checksum = nullptr; }; /// \brief Instances of this class represent the information from a @@ -203,11 +208,12 @@ struct MCDwarfLineTableHeader { SmallVector MCDwarfFiles; StringMap SourceIdMap; StringRef CompilationDir; + bool HasMD5 = false; MCDwarfLineTableHeader() = default; unsigned getFile(StringRef &Directory, StringRef &FileName, - unsigned FileNumber = 0); + MD5::MD5Result *Checksum, unsigned FileNumber = 0); std::pair Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params) const; std::pair @@ -223,8 +229,9 @@ class MCDwarfDwoLineTable { Header.CompilationDir = CompilationDir; } - unsigned getFile(StringRef Directory, StringRef FileName) { - return Header.getFile(Directory, FileName); + unsigned getFile(StringRef Directory, StringRef FileName, + MD5::MD5Result *Checksum) { + return Header.getFile(Directory, FileName, Checksum); } void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params) const; @@ -242,7 +249,7 @@ class MCDwarfLineTable { void EmitCU(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params) const; unsigned getFile(StringRef &Directory, StringRef &FileName, - unsigned FileNumber = 0); + MD5::MD5Result *Checksum, unsigned FileNumber = 0); MCSymbol *getLabel() const { return Header.Label; diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index a91a31414bdb..fcbbe650d26f 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -206,6 +206,14 @@ class MCSymbolRefExpr : public MCExpr { VK_ARM_TLSLDO, // symbol(tlsldo) VK_ARM_TLSDESCSEQ, + VK_AVR_NONE, + VK_AVR_LO8, + VK_AVR_HI8, + VK_AVR_HLO8, + VK_AVR_DIFF8, + VK_AVR_DIFF16, + VK_AVR_DIFF32, + VK_PPC_LO, // symbol@l VK_PPC_HI, // symbol@h VK_PPC_HA, // symbol@ha diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index 7ebde03a758c..38c365538e3c 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -422,14 +422,21 @@ class MCFillFragment : public MCFragment { uint8_t Value; /// The number of bytes to insert. - uint64_t Size; + const MCExpr &Size; + + /// Source location of the directive that this fragment was created for. + SMLoc Loc; public: - MCFillFragment(uint8_t Value, uint64_t Size, MCSection *Sec = nullptr) - : MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size) {} + MCFillFragment(uint8_t Value, const MCExpr &Size, SMLoc Loc, + MCSection *Sec = nullptr) + : MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size), Loc(Loc) { + } uint8_t getValue() const { return Value; } - uint64_t getSize() const { return Size; } + const MCExpr &getSize() const { return Size; } + + SMLoc getLoc() const { return Loc; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Fill; @@ -437,13 +444,13 @@ class MCFillFragment : public MCFragment { }; class MCOrgFragment : public MCFragment { - /// Offset - The offset this fragment should start at. + /// The offset this fragment should start at. const MCExpr *Offset; - /// Value - Value to use for filling bytes. + /// Value to use for filling bytes. int8_t Value; - /// Loc - Source location of the directive that this fragment was created for. + /// Source location of the directive that this fragment was created for. SMLoc Loc; public: diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index d95f84d1d816..8f5ca4dee651 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -128,6 +128,7 @@ class MCObjectFileInfo { MCSection *COFFDebugSymbolsSection; MCSection *COFFDebugTypesSection; + MCSection *COFFGlobalTypeHashesSection; /// Extra TLS Variable Data section. /// @@ -154,6 +155,9 @@ class MCObjectFileInfo { /// It is initialized on demand so it can be overwritten (with uniquing). MCSection *EHFrameSection; + /// Section containing metadata on function stack sizes. + MCSection *StackSizesSection; + // ELF specific sections. MCSection *DataRelROSection; MCSection *MergeableConst4Section; @@ -192,6 +196,7 @@ class MCObjectFileInfo { MCSection *PDataSection; MCSection *XDataSection; MCSection *SXDataSection; + MCSection *GFIDsSection; public: void InitMCObjectFileInfo(const Triple &TT, bool PIC, MCContext &ctx, @@ -278,7 +283,9 @@ class MCObjectFileInfo { MCSection *getCOFFDebugTypesSection() const { return COFFDebugTypesSection; } - + MCSection *getCOFFGlobalTypeHashesSection() const { + return COFFGlobalTypeHashesSection; + } MCSection *getTLSExtraDataSection() const { return TLSExtraDataSection; } const MCSection *getTLSDataSection() const { return TLSDataSection; } @@ -287,6 +294,8 @@ class MCObjectFileInfo { MCSection *getStackMapSection() const { return StackMapSection; } MCSection *getFaultMapSection() const { return FaultMapSection; } + MCSection *getStackSizesSection() const { return StackSizesSection; } + // ELF specific sections. MCSection *getDataRelROSection() const { return DataRelROSection; } const MCSection *getMergeableConst4Section() const { @@ -341,6 +350,7 @@ class MCObjectFileInfo { MCSection *getPDataSection() const { return PDataSection; } MCSection *getXDataSection() const { return XDataSection; } MCSection *getSXDataSection() const { return SXDataSection; } + MCSection *getGFIDsSection() const { return GFIDsSection; } MCSection *getEHFrameSection() { return EHFrameSection; diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index a3dbc56ebc10..43ed00b4a7a7 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -161,7 +161,6 @@ class MCObjectStreamer : public MCStreamer { bool EmitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr, SMLoc Loc) override; using MCStreamer::emitFill; - void emitFill(uint64_t NumBytes, uint8_t FillValue) override; void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h index 9f8550c3887c..d628794b32e2 100644 --- a/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -271,6 +271,7 @@ class MCTargetAsmParser : public MCAsmParserExtension { public: enum MatchResultTy { Match_InvalidOperand, + Match_InvalidTiedOperand, Match_MissingFeature, Match_MnemonicFail, Match_Success, diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index de98abe0dc46..c57c9ef709da 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -407,6 +407,15 @@ class MCRegisterInfo { /// \brief Map a dwarf register back to a target register. int getLLVMRegNum(unsigned RegNum, bool isEH) const; + /// \brief Map a DWARF EH register back to a target register (same as + /// getLLVMRegNum(RegNum, true)) but return -1 if there is no mapping, + /// rather than asserting that there must be one. + int getLLVMRegNumFromEH(unsigned RegNum) const; + + /// \brief Map a target EH register number to an equivalent DWARF register + /// number. + int getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const; + /// \brief Map a target register to an equivalent SEH register /// number. Returns LLVM register number if there is no equivalent value. int getSEHRegNum(unsigned RegNum) const; diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 58003d7d596c..a4a1b5034a80 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -23,6 +23,7 @@ #include "llvm/MC/MCLinkerOptimizationHint.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWinEH.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetParser.h" #include @@ -95,6 +96,17 @@ class MCTargetStreamer { virtual void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS, const MCInst &Inst, const MCSubtargetInfo &STI); + virtual void emitDwarfFileDirective(StringRef Directive); + + /// Update streamer for a new active section. + /// + /// This is called by PopSection and SwitchSection, if the current + /// section changes. + virtual void changeSection(const MCSection *CurSection, MCSection *Section, + const MCExpr *SubSection, raw_ostream &OS); + + virtual void emitValue(const MCExpr *Value); + virtual void finish(); }; @@ -421,9 +433,16 @@ class MCStreamer { /// \brief Note in the output the specified region \p Kind. virtual void EmitDataRegion(MCDataRegionType Kind) {} - /// \brief Specify the MachO minimum deployment target version. - virtual void EmitVersionMin(MCVersionMinType, unsigned Major, unsigned Minor, - unsigned Update) {} + /// \brief Specify the Mach-O minimum deployment target version. + virtual void EmitVersionMin(MCVersionMinType Type, unsigned Major, + unsigned Minor, unsigned Update) {} + + /// Emit/Specify Mach-O build version command. + /// \p Platform should be one of MachO::PlatformType. + virtual void EmitBuildVersion(unsigned Platform, unsigned Major, + unsigned Minor, unsigned Update) {} + + void EmitVersionForTarget(const Triple &Target); /// \brief Note in the output that the specified \p Func is a Thumb mode /// function (ARM target only). @@ -481,6 +500,9 @@ class MCStreamer { virtual void EmitCOFFSafeSEH(MCSymbol const *Symbol); + /// \brief Emits the symbol table index of a Symbol into the current section. + virtual void EmitCOFFSymbolIndex(MCSymbol const *Symbol); + /// \brief Emits a COFF section index. /// /// \param Symbol - Symbol the section number relocation should point to. @@ -644,7 +666,7 @@ class MCStreamer { /// \brief Emit NumBytes bytes worth of the value specified by FillValue. /// This implements directives such as '.space'. - virtual void emitFill(uint64_t NumBytes, uint8_t FillValue); + void emitFill(uint64_t NumBytes, uint8_t FillValue); /// \brief Emit \p Size bytes worth of the value specified by \p FillValue. /// @@ -664,7 +686,6 @@ class MCStreamer { /// \param NumValues - The number of copies of \p Size bytes to emit. /// \param Size - The size (in bytes) of each repeated value. /// \param Expr - The expression from which \p Size bytes are used. - virtual void emitFill(uint64_t NumValues, int64_t Size, int64_t Expr); virtual void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()); @@ -737,6 +758,7 @@ class MCStreamer { /// implements the DWARF2 '.file 4 "foo.c"' assembler directive. virtual unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, + MD5::MD5Result *Checksum = nullptr, unsigned CUID = 0); /// \brief This implements the DWARF2 '.loc fileno lineno ...' assembler diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 9b1cc6e7d7e8..cc8fc02968a5 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -177,8 +177,8 @@ class MCSymbol { llvm_unreachable("Constructor throws?"); } - MCSection *getSectionPtr(bool SetUsed = true) const { - if (MCFragment *F = getFragment(SetUsed)) { + MCSection *getSectionPtr() const { + if (MCFragment *F = getFragment()) { assert(F != AbsolutePseudoFragment); return F->getParent(); } @@ -221,7 +221,6 @@ class MCSymbol { /// isUsed - Check if this is used. bool isUsed() const { return IsUsed; } - void setUsed(bool Value) const { IsUsed |= Value; } /// \brief Check if this symbol is redefinable. bool isRedefinable() const { return IsRedefinable; } @@ -246,28 +245,28 @@ class MCSymbol { /// isDefined - Check if this symbol is defined (i.e., it has an address). /// /// Defined symbols are either absolute or in some section. - bool isDefined(bool SetUsed = true) const { - return getFragment(SetUsed) != nullptr; - } + bool isDefined() const { return !isUndefined(); } /// isInSection - Check if this symbol is defined in some section (i.e., it /// is defined but not absolute). - bool isInSection(bool SetUsed = true) const { - return isDefined(SetUsed) && !isAbsolute(SetUsed); + bool isInSection() const { + return isDefined() && !isAbsolute(); } /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). - bool isUndefined(bool SetUsed = true) const { return !isDefined(SetUsed); } + bool isUndefined(bool SetUsed = true) const { + return getFragment(SetUsed) == nullptr; + } /// isAbsolute - Check if this is an absolute symbol. - bool isAbsolute(bool SetUsed = true) const { - return getFragment(SetUsed) == AbsolutePseudoFragment; + bool isAbsolute() const { + return getFragment() == AbsolutePseudoFragment; } /// Get the section associated with a defined, non-absolute symbol. - MCSection &getSection(bool SetUsed = true) const { - assert(isInSection(SetUsed) && "Invalid accessor!"); - return *getSectionPtr(SetUsed); + MCSection &getSection() const { + assert(isInSection() && "Invalid accessor!"); + return *getSectionPtr(); } /// Mark the symbol as defined in the fragment \p F. diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index 9bae6c582faa..dc8d26a88587 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -18,6 +18,8 @@ class MCSymbolWasm : public MCSymbol { private: bool IsFunction = false; bool IsWeak = false; + bool IsHidden = false; + bool IsComdat = false; std::string ModuleName; SmallVector Returns; SmallVector Params; @@ -45,6 +47,12 @@ class MCSymbolWasm : public MCSymbol { bool isWeak() const { return IsWeak; } void setWeak(bool isWeak) { IsWeak = isWeak; } + bool isHidden() const { return IsHidden; } + void setHidden(bool isHidden) { IsHidden = isHidden; } + + bool isComdat() const { return IsComdat; } + void setComdat(bool isComdat) { IsComdat = isComdat; } + const StringRef getModuleName() const { return ModuleName; } const SmallVector &getReturns() const { diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h index a2500c06efa1..60c17cac9f28 100644 --- a/include/llvm/MC/MCWinCOFFStreamer.h +++ b/include/llvm/MC/MCWinCOFFStreamer.h @@ -50,6 +50,7 @@ class MCWinCOFFStreamer : public MCObjectStreamer { void EmitCOFFSymbolType(int Type) override; void EndCOFFSymbolDef() override; void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; + void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index 2fb896eed97b..b072dd5ba7d9 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -743,6 +743,12 @@ struct coff_resource_dir_table { support::ulittle16_t NumberOfIDEntries; }; +struct debug_h_header { + support::ulittle32_t Magic; + support::ulittle16_t Version; + support::ulittle16_t HashAlgorithm; +}; + class COFFObjectFile : public ObjectFile { private: friend class ImportDirectoryEntryRef; @@ -920,7 +926,7 @@ class COFFObjectFile : public ObjectFile { uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; - unsigned getArch() const override; + Triple::ArchType getArch() const override; SubtargetFeatures getFeatures() const override { return SubtargetFeatures(); } import_directory_iterator import_directory_begin() const; diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h index 4b284de679b3..7ca416ff1b22 100644 --- a/include/llvm/Object/COFFImportFile.h +++ b/include/llvm/Object/COFFImportFile.h @@ -98,7 +98,8 @@ struct COFFShortExport { Error writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, - COFF::MachineTypes Machine, bool MakeWeakAliases); + COFF::MachineTypes Machine, bool MakeWeakAliases, + bool MinGW); } // namespace object } // namespace llvm diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index c24b6310465e..5f233bf009f0 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -177,10 +177,10 @@ class ELFFile { Expected> getSectionContents(const Elf_Shdr *Sec) const; }; -using ELF32LEFile = ELFFile>; -using ELF64LEFile = ELFFile>; -using ELF32BEFile = ELFFile>; -using ELF64BEFile = ELFFile>; +using ELF32LEFile = ELFFile; +using ELF64LEFile = ELFFile; +using ELF32BEFile = ELFFile; +using ELF64BEFile = ELFFile; template inline Expected @@ -277,6 +277,9 @@ ELFFile::getSectionContentsAsArray(const Elf_Shdr *Sec) const { Offset + Size > Buf.size()) return createError("invalid section offset"); + if (Offset % alignof(T)) + return createError("unaligned data"); + const T *Start = reinterpret_cast(base() + Offset); return makeArrayRef(Start, Size / sizeof(T)); } diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 905ce450f7f1..32aabec952ab 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -200,14 +200,14 @@ template class ELFObjectFile : public ELFObjectFileBase { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - using uintX_t = typename ELFFile::uintX_t; + using uintX_t = typename ELFT::uint; - using Elf_Sym = typename ELFFile::Elf_Sym; - using Elf_Shdr = typename ELFFile::Elf_Shdr; - using Elf_Ehdr = typename ELFFile::Elf_Ehdr; - using Elf_Rel = typename ELFFile::Elf_Rel; - using Elf_Rela = typename ELFFile::Elf_Rela; - using Elf_Dyn = typename ELFFile::Elf_Dyn; + using Elf_Sym = typename ELFT::Sym; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Ehdr = typename ELFT::Ehdr; + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Dyn = typename ELFT::Dyn; private: ELFObjectFile(MemoryBufferRef Object, ELFFile EF, @@ -362,7 +362,7 @@ template class ELFObjectFile : public ELFObjectFileBase { uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; - unsigned getArch() const override; + Triple::ArchType getArch() const override; std::error_code getPlatformFlags(unsigned &Result) const override { Result = EF.getHeader()->e_flags; @@ -404,10 +404,10 @@ template class ELFObjectFile : public ELFObjectFileBase { bool isRelocatableObject() const override; }; -using ELF32LEObjectFile = ELFObjectFile>; -using ELF64LEObjectFile = ELFObjectFile>; -using ELF32BEObjectFile = ELFObjectFile>; -using ELF64BEObjectFile = ELFObjectFile>; +using ELF32LEObjectFile = ELFObjectFile; +using ELF64LEObjectFile = ELFObjectFile; +using ELF32BEObjectFile = ELFObjectFile; +using ELF64BEObjectFile = ELFObjectFile; template void ELFObjectFile::moveSymbolNext(DataRefImpl &Sym) const { @@ -1026,8 +1026,7 @@ StringRef ELFObjectFile::getFileFormatName() const { } } -template -unsigned ELFObjectFile::getArch() const { +template Triple::ArchType ELFObjectFile::getArch() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; switch (EF.getHeader()->e_machine) { case ELF::EM_386: diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 83b688548fdc..aa50a8b62fe9 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -44,7 +44,7 @@ template struct Elf_Chdr_Impl; template struct ELFType { private: template - using packed = support::detail::packed_endian_specific_integral; + using packed = support::detail::packed_endian_specific_integral; public: static const endianness TargetEndianness = E; @@ -90,46 +90,7 @@ using ELF64BE = ELFType; // Use an alignment of 2 for the typedefs since that is the worst case for // ELF files in archives. -// Templates to choose Elf_Addr and Elf_Off depending on is64Bits. -template struct ELFDataTypeTypedefHelperCommon { - using Elf_Half = support::detail::packed_endian_specific_integral< - uint16_t, target_endianness, 2>; - using Elf_Word = support::detail::packed_endian_specific_integral< - uint32_t, target_endianness, 2>; - using Elf_Sword = support::detail::packed_endian_specific_integral< - int32_t, target_endianness, 2>; - using Elf_Xword = support::detail::packed_endian_specific_integral< - uint64_t, target_endianness, 2>; - using Elf_Sxword = support::detail::packed_endian_specific_integral< - int64_t, target_endianness, 2>; -}; - -template struct ELFDataTypeTypedefHelper; - -/// ELF 32bit types. -template -struct ELFDataTypeTypedefHelper> - : ELFDataTypeTypedefHelperCommon { - using value_type = uint32_t; - using Elf_Addr = support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2>; - using Elf_Off = support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2>; -}; - -/// ELF 64bit types. -template -struct ELFDataTypeTypedefHelper> - : ELFDataTypeTypedefHelperCommon { - using value_type = uint64_t; - using Elf_Addr = support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2>; - using Elf_Off = support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2>; -}; - // I really don't like doing this, but the alternative is copypasta. - #define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) \ using Elf_Addr = typename ELFT::Addr; \ using Elf_Off = typename ELFT::Off; \ @@ -139,9 +100,9 @@ struct ELFDataTypeTypedefHelper> using Elf_Xword = typename ELFT::Xword; \ using Elf_Sxword = typename ELFT::Sxword; -#define LLD_ELF_COMMA , +#define LLVM_ELF_COMMA , #define LLVM_ELF_IMPORT_TYPES(E, W) \ - LLVM_ELF_IMPORT_TYPES_ELFT(ELFType) + LLVM_ELF_IMPORT_TYPES_ELFT(ELFType) // Section header. template struct Elf_Shdr_Base; diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index 03fd52fb482f..d0cc40da4293 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -329,6 +329,9 @@ class MachOObjectFile : public ObjectFile { return make_range(extrel_begin(), extrel_end()); } + relocation_iterator locrel_begin() const; + relocation_iterator locrel_end() const; + void moveRelocationNext(DataRefImpl &Rel) const override; uint64_t getRelocationOffset(DataRefImpl Rel) const override; symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; @@ -360,7 +363,7 @@ class MachOObjectFile : public ObjectFile { uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; - unsigned getArch() const override; + Triple::ArchType getArch() const override; SubtargetFeatures getFeatures() const override { return SubtargetFeatures(); } Triple getArchTriple(const char **McpuDefault = nullptr) const; diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index c7943512f0cf..079a59468156 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -15,6 +15,7 @@ #define LLVM_OBJECT_OBJECTFILE_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/MC/SubtargetFeature.h" @@ -279,7 +280,7 @@ class ObjectFile : public SymbolicFile { virtual uint8_t getBytesInAddress() const = 0; virtual StringRef getFileFormatName() const = 0; - virtual /* Triple::ArchType */ unsigned getArch() const = 0; + virtual Triple::ArchType getArch() const = 0; virtual SubtargetFeatures getFeatures() const = 0; virtual void setARMSubArch(Triple &TheTriple) const { } diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h index c1e2a82c9f88..2d0e938f06fd 100644 --- a/include/llvm/Object/RelocVisitor.h +++ b/include/llvm/Object/RelocVisitor.h @@ -302,6 +302,8 @@ class RelocVisitor { return Value; } break; + default: + break; } HasError = true; return 0; diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h index e138faeed342..22e19a16bc79 100644 --- a/include/llvm/Object/Wasm.h +++ b/include/llvm/Object/Wasm.h @@ -39,13 +39,12 @@ class WasmSymbol { FUNCTION_EXPORT, GLOBAL_IMPORT, GLOBAL_EXPORT, - DEBUG_FUNCTION_NAME, }; WasmSymbol(StringRef Name, SymbolType Type, uint32_t Section, - uint32_t ElementIndex, uint32_t ImportIndex = 0) + uint32_t ElementIndex, uint32_t FunctionType = 0) : Name(Name), Type(Type), Section(Section), ElementIndex(ElementIndex), - ImportIndex(ImportIndex) {} + FunctionType(FunctionType) {} StringRef Name; SymbolType Type; @@ -55,13 +54,22 @@ class WasmSymbol { // Index into either the function or global index space. uint32_t ElementIndex; - // For imports, the index into the import table - uint32_t ImportIndex; + // For function, the type index + uint32_t FunctionType; + + // Symbols can be both exported and imported (in the case of the weakly + // defined symbol). In this the import index is stored as AltIndex. + uint32_t AltIndex = 0; + bool HasAltIndex = false; + + void setAltIndex(uint32_t Index) { + HasAltIndex = true; + AltIndex = Index; + } bool isFunction() const { return Type == WasmSymbol::SymbolType::FUNCTION_IMPORT || - Type == WasmSymbol::SymbolType::FUNCTION_EXPORT || - Type == WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME; + Type == WasmSymbol::SymbolType::FUNCTION_EXPORT; } @@ -81,10 +89,17 @@ class WasmSymbol { return Flags & wasm::WASM_SYMBOL_BINDING_MASK; } + bool isHidden() const { + return getVisibility() == wasm::WASM_SYMBOL_VISIBILITY_HIDDEN; + } + + unsigned getVisibility() const { + return Flags & wasm::WASM_SYMBOL_VISIBILITY_MASK; + } + void print(raw_ostream &Out) const { Out << "Name=" << Name << ", Type=" << static_cast(Type) - << ", Flags=" << Flags << " ElemIndex=" << ElementIndex - << ", ImportIndex=" << ImportIndex; + << ", Flags=" << Flags << " ElemIndex=" << ElementIndex; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -120,29 +135,20 @@ class WasmObjectFile : public ObjectFile { static bool classof(const Binary *v) { return v->isWasm(); } - const std::vector& types() const { return Signatures; } - const std::vector& functionTypes() const { return FunctionTypes; } - const std::vector& imports() const { return Imports; } - const std::vector& tables() const { return Tables; } - const std::vector& memories() const { return Memories; } - const std::vector& globals() const { return Globals; } - const std::vector& exports() const { return Exports; } + ArrayRef types() const { return Signatures; } + ArrayRef functionTypes() const { return FunctionTypes; } + ArrayRef imports() const { return Imports; } + ArrayRef tables() const { return Tables; } + ArrayRef memories() const { return Memories; } + ArrayRef globals() const { return Globals; } + ArrayRef exports() const { return Exports; } const wasm::WasmLinkingData& linkingData() const { return LinkingData; } - - uint32_t getNumberOfSymbols() const { - return Symbols.size(); - } - - const std::vector& elements() const { - return ElemSegments; - } - - const std::vector& dataSegments() const { - return DataSegments; - } - - const std::vector& functions() const { return Functions; } - const ArrayRef& code() const { return CodeSection; } + uint32_t getNumberOfSymbols() const { return Symbols.size(); } + ArrayRef elements() const { return ElemSegments; } + ArrayRef dataSegments() const { return DataSegments; } + ArrayRef functions() const { return Functions; } + ArrayRef comdats() const { return Comdats; } + ArrayRef debugNames() const { return DebugNames; } uint32_t startFunction() const { return StartFunction; } void moveSymbolNext(DataRefImpl &Symb) const override; @@ -193,11 +199,12 @@ class WasmObjectFile : public ObjectFile { section_iterator section_end() const override; uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; - unsigned getArch() const override; + Triple::ArchType getArch() const override; SubtargetFeatures getFeatures() const override; bool isRelocatableObject() const override; private: + bool isValidFunctionIndex(uint32_t Index) const; const WasmSection &getWasmSection(DataRefImpl Ref) const; const wasm::WasmRelocation &getWasmRelocation(DataRefImpl Ref) const; @@ -225,6 +232,7 @@ class WasmObjectFile : public ObjectFile { // Custom section types Error parseNameSection(const uint8_t *Ptr, const uint8_t *End); Error parseLinkingSection(const uint8_t *Ptr, const uint8_t *End); + Error parseLinkingSectionComdat(const uint8_t *&Ptr, const uint8_t *End); Error parseRelocSection(StringRef Name, const uint8_t *Ptr, const uint8_t *End); @@ -243,7 +251,8 @@ class WasmObjectFile : public ObjectFile { std::vector DataSegments; std::vector Functions; std::vector Symbols; - ArrayRef CodeSection; + std::vector Comdats; + std::vector DebugNames; uint32_t StartFunction = -1; bool HasLinkingSection = false; wasm::WasmLinkingData LinkingData; diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h index 1fce46c125f7..8794eaa6d59a 100644 --- a/include/llvm/ObjectYAML/COFFYAML.h +++ b/include/llvm/ObjectYAML/COFFYAML.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" +#include "llvm/ObjectYAML/CodeViewYAMLTypeHashing.h" #include "llvm/ObjectYAML/CodeViewYAMLTypes.h" #include "llvm/ObjectYAML/YAML.h" #include @@ -66,6 +67,7 @@ struct Section { yaml::BinaryRef SectionData; std::vector DebugS; std::vector DebugT; + Optional DebugH; std::vector Relocations; StringRef Name; diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h new file mode 100644 index 000000000000..4f0d9efb963b --- /dev/null +++ b/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h @@ -0,0 +1,62 @@ +//==- CodeViewYAMLTypeHashing.h - CodeView YAMLIO Type hashing ----*- C++-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLTYPEHASHING_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLTYPEHASHING_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/YAMLTraits.h" +#include +#include +#include + +namespace llvm { + +namespace CodeViewYAML { + +struct GlobalHash { + GlobalHash() = default; + explicit GlobalHash(StringRef S) : Hash(S) { + assert(S.size() == 20 && "Invalid hash size!"); + } + explicit GlobalHash(ArrayRef S) : Hash(S) { + assert(S.size() == 20 && "Invalid hash size!"); + } + yaml::BinaryRef Hash; +}; + +struct DebugHSection { + uint32_t Magic; + uint16_t Version; + uint16_t HashAlgorithm; + std::vector Hashes; +}; + +DebugHSection fromDebugH(ArrayRef DebugT); +ArrayRef toDebugH(const DebugHSection &DebugH, + BumpPtrAllocator &Alloc); + +} // end namespace CodeViewYAML + +} // end namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::DebugHSection) +LLVM_YAML_DECLARE_SCALAR_TRAITS(CodeViewYAML::GlobalHash, QuotingType::None) +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::GlobalHash) + +#endif // LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h index 88a5668f0a14..bc3b5567c2f9 100644 --- a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h +++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h @@ -27,10 +27,8 @@ namespace llvm { namespace codeview { - -class TypeTableBuilder; - -} // end namespace codeview +class AppendingTypeTableBuilder; +} namespace CodeViewYAML { @@ -48,8 +46,8 @@ struct MemberRecord { struct LeafRecord { std::shared_ptr Leaf; - codeview::CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const; - codeview::CVType toCodeViewRecord(codeview::TypeTableBuilder &TS) const; + codeview::CVType + toCodeViewRecord(codeview::AppendingTypeTableBuilder &Serializer) const; static Expected fromCodeViewRecord(codeview::CVType Type); }; @@ -60,7 +58,7 @@ ArrayRef toDebugT(ArrayRef, BumpPtrAllocator &Alloc); } // end namespace llvm -LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, true) +LLVM_YAML_DECLARE_SCALAR_TRAITS(codeview::GUID, QuotingType::Single) LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord) LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord) diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h index b84c093cd4ec..1fa8f92e516a 100644 --- a/include/llvm/ObjectYAML/MachOYAML.h +++ b/include/llvm/ObjectYAML/MachOYAML.h @@ -261,7 +261,7 @@ using char_16 = char[16]; template <> struct ScalarTraits { static void output(const char_16 &Val, void *, raw_ostream &Out); static StringRef input(StringRef Scalar, void *, char_16 &Val); - static bool mustQuote(StringRef S); + static QuotingType mustQuote(StringRef S); }; // This trait is used for UUIDs. It reads and writes them matching otool's @@ -271,7 +271,7 @@ using uuid_t = raw_ostream::uuid_t; template <> struct ScalarTraits { static void output(const uuid_t &Val, void *, raw_ostream &Out); static StringRef input(StringRef Scalar, void *, uuid_t &Val); - static bool mustQuote(StringRef S); + static QuotingType mustQuote(StringRef S); }; // Load Command struct mapping traits diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index d26faa148623..83040aa99a1c 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -34,13 +34,17 @@ LLVM_YAML_STRONG_TYPEDEF(int32_t, SignatureForm) LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportKind) LLVM_YAML_STRONG_TYPEDEF(uint32_t, Opcode) LLVM_YAML_STRONG_TYPEDEF(uint32_t, RelocType) +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SymbolFlags) +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SegmentFlags) +LLVM_YAML_STRONG_TYPEDEF(uint32_t, LimitFlags) +LLVM_YAML_STRONG_TYPEDEF(uint32_t, ComdatKind) struct FileHeader { yaml::Hex32 Version; }; struct Limits { - yaml::Hex32 Flags; + LimitFlags Flags; yaml::Hex32 Initial; yaml::Hex32 Maximum; }; @@ -63,6 +67,7 @@ struct ElemSegment { }; struct Global { + uint32_t Index; ValueType Type; bool Mutable; wasm::WasmInitExpr InitExpr; @@ -86,6 +91,7 @@ struct LocalDecl { }; struct Function { + uint32_t Index; std::vector Locals; yaml::BinaryRef Body; }; @@ -113,7 +119,7 @@ struct SegmentInfo { uint32_t Index; StringRef Name; uint32_t Alignment; - uint32_t Flags; + SegmentFlags Flags; }; struct Signature { @@ -125,7 +131,22 @@ struct Signature { struct SymbolInfo { StringRef Name; - uint32_t Flags; + SymbolFlags Flags; +}; + +struct InitFunction { + uint32_t Priority; + uint32_t FunctionIndex; +}; + +struct ComdatEntry { + ComdatKind Kind; + uint32_t Index; +}; + +struct Comdat { + StringRef Name; + std::vector Entries; }; struct Section { @@ -170,6 +191,8 @@ struct LinkingSection : CustomSection { uint32_t DataSize; std::vector SymbolInfos; std::vector SegmentInfos; + std::vector InitFunctions; + std::vector Comdats; }; struct TypeSection : Section { @@ -306,6 +329,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SegmentInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat) namespace llvm { namespace yaml { @@ -334,6 +360,18 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::Global &Global); }; +template <> struct ScalarBitSetTraits { + static void bitset(IO &IO, WasmYAML::LimitFlags &Value); +}; + +template <> struct ScalarBitSetTraits { + static void bitset(IO &IO, WasmYAML::SymbolFlags &Value); +}; + +template <> struct ScalarBitSetTraits { + static void bitset(IO &IO, WasmYAML::SegmentFlags &Value); +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, WasmYAML::SectionType &Type); }; @@ -386,6 +424,22 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::SymbolInfo &Info); }; +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::InitFunction &Init); +}; + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, WasmYAML::ComdatKind &Kind); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::ComdatEntry &ComdatEntry); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::Comdat &Comdat); +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, WasmYAML::ValueType &Type); }; diff --git a/include/llvm/ObjectYAML/YAML.h b/include/llvm/ObjectYAML/YAML.h index 29151a269df0..93266dd67f1a 100644 --- a/include/llvm/ObjectYAML/YAML.h +++ b/include/llvm/ObjectYAML/YAML.h @@ -107,7 +107,7 @@ inline bool operator==(const BinaryRef &LHS, const BinaryRef &RHS) { template <> struct ScalarTraits { static void output(const BinaryRef &, void *, raw_ostream &); static StringRef input(StringRef, void *, BinaryRef &); - static bool mustQuote(StringRef S) { return needsQuotes(S); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; } // end namespace yaml diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index aaea68bf8e27..a80921fa8421 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -390,6 +390,8 @@ class InputArgList final : public ArgList { void releaseMemory(); public: + InputArgList() : NumInputArgStrings(0) {} + InputArgList(const char* const *ArgBegin, const char* const *ArgEnd); InputArgList(InputArgList &&RHS) diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h index 57a6954f4878..20b9bba7e25f 100644 --- a/include/llvm/Option/OptTable.h +++ b/include/llvm/Option/OptTable.h @@ -143,6 +143,26 @@ class OptTable { std::vector findByPrefix(StringRef Cur, unsigned short DisableFlags) const; + /// Find the OptTable option that most closely matches the given string. + /// + /// \param [in] Option - A string, such as "-stdlibs=l", that represents user + /// input of an option that may not exist in the OptTable. Note that the + /// string includes prefix dashes "-" as well as values "=l". + /// \param [out] NearestString - The nearest option string found in the + /// OptTable. + /// \param [in] FlagsToInclude - Only find options with any of these flags. + /// Zero is the default, which includes all flags. + /// \param [in] FlagsToExclude - Don't find options with this flag. Zero + /// is the default, and means exclude nothing. + /// \param [in] MinimumLength - Don't find options shorter than this length. + /// For example, a minimum length of 3 prevents "-x" from being considered + /// near to "-S". + /// + /// \return The edit distance of the nearest string found. + unsigned findNearest(StringRef Option, std::string &NearestString, + unsigned FlagsToInclude = 0, unsigned FlagsToExclude = 0, + unsigned MinimumLength = 4) const; + /// Add Values to Option's Values class /// /// \param [in] Option - Prefix + Name of the flag which Values will be diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index ff6411703509..a29b3771abb4 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -361,6 +361,12 @@ extern bool TimePassesIsEnabled; // @brief Tells if the function IR should be printed by PrinterPass. extern bool isFunctionInPrintList(StringRef FunctionName); +/// forcePrintModuleIR - returns true if IR printing passes should +// be printing module IR (even for local-pass printers e.g. function-pass) +// to provide more context, as enabled by debugging option -print-module-scope +// @brief Tells if IR printer should be printing module IR +extern bool forcePrintModuleIR(); + } // end namespace llvm // Include support files that contain important APIs commonly used by Passes, diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 48cb5016659e..641631cc4ec9 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -185,7 +185,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); class FunctionSamples; using BodySampleMap = std::map; -using FunctionSamplesMap = StringMap; +// NOTE: Using a StringMap here makes parsed profiles consume around 17% more +// memory, which is *very* significant for large profiles. +using FunctionSamplesMap = std::map; using CallsiteSampleMap = std::map; /// Representation of the samples collected for a function. @@ -224,8 +226,8 @@ class FunctionSamples { sampleprof_error addCalledTargetSamples(uint32_t LineOffset, uint32_t Discriminator, - const std::string &FName, - uint64_t Num, uint64_t Weight = 1) { + StringRef FName, uint64_t Num, + uint64_t Weight = 1) { return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( FName, Num, Weight); } @@ -278,7 +280,7 @@ class FunctionSamples { return nullptr; auto FS = iter->second.find(CalleeName); if (FS != iter->second.end()) - return &FS->getValue(); + return &FS->second; // If we cannot find exact match of the callee name, return the FS with // the max total count. uint64_t MaxTotalSamples = 0; @@ -347,7 +349,7 @@ class FunctionSamples { const LineLocation &Loc = I.first; FunctionSamplesMap &FSMap = functionSamplesAt(Loc); for (const auto &Rec : I.second) - MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight)); + MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); } return Result; } diff --git a/include/llvm/Support/AMDGPUMetadata.h b/include/llvm/Support/AMDGPUMetadata.h index 0c8d02287737..00039a75c51d 100644 --- a/include/llvm/Support/AMDGPUMetadata.h +++ b/include/llvm/Support/AMDGPUMetadata.h @@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize"; constexpr char IsDynamicCallStack[] = "IsDynamicCallStack"; /// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled. constexpr char IsXNACKEnabled[] = "IsXNACKEnabled"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs. +constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs. +constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs"; } // end namespace Key /// \brief In-memory representation of kernel code properties metadata. @@ -275,6 +279,10 @@ struct Metadata final { /// \brief True if the generated machine code is capable of supporting XNACK. /// Optional. bool mIsXNACKEnabled = false; + /// \brief Number of SGPRs spilled by a wavefront. Optional. + uint16_t mNumSpilledSGPRs = 0; + /// \brief Number of VGPRs spilled by a workitem. Optional. + uint16_t mNumSpilledVGPRs = 0; /// \brief Default constructor. Metadata() = default; diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index a94aa8fb1f2a..7f9c39345b43 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -94,7 +95,11 @@ class MallocAllocator : public AllocatorBase { LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, size_t /*Alignment*/) { - return malloc(Size); + void* memPtr = malloc(Size); + if (memPtr == nullptr) + report_bad_alloc_error("Allocation in MallocAllocator failed."); + + return memPtr; } // Pull in base class overloads. diff --git a/include/llvm/Support/BinaryByteStream.h b/include/llvm/Support/BinaryByteStream.h index a87a9bea0e3c..db1ccba1398b 100644 --- a/include/llvm/Support/BinaryByteStream.h +++ b/include/llvm/Support/BinaryByteStream.h @@ -135,7 +135,7 @@ class MutableBinaryByteStream : public WritableBinaryStream { /// causing the underlying data to grow. This class owns the underlying data. class AppendingBinaryByteStream : public WritableBinaryStream { std::vector Data; - llvm::support::endianness Endian; + llvm::support::endianness Endian = llvm::support::little; public: AppendingBinaryByteStream() = default; @@ -155,6 +155,10 @@ class AppendingBinaryByteStream : public WritableBinaryStream { return Error::success(); } + void insert(uint32_t Offset, ArrayRef Bytes) { + Data.insert(Data.begin() + Offset, Bytes.begin(), Bytes.end()); + } + Error readLongestContiguousChunk(uint32_t Offset, ArrayRef &Buffer) override { if (auto EC = checkOffsetForWrite(Offset, 1)) diff --git a/include/llvm/Support/CachePruning.h b/include/llvm/Support/CachePruning.h index c577e9b8b631..327c7df4570f 100644 --- a/include/llvm/Support/CachePruning.h +++ b/include/llvm/Support/CachePruning.h @@ -27,8 +27,9 @@ template class Expected; struct CachePruningPolicy { /// The pruning interval. This is intended to be used to avoid scanning the /// directory too often. It does not impact the decision of which file to - /// prune. A value of 0 forces the scan to occur. - std::chrono::seconds Interval = std::chrono::seconds(1200); + /// prune. A value of 0 forces the scan to occur. A value of None disables + /// pruning. + llvm::Optional Interval = std::chrono::seconds(1200); /// The expiration for a file. When a file hasn't been accessed for Expiration /// seconds, it is removed from the cache. A value of 0 disables the diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index d1901db7c68e..f043c112861b 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -1862,6 +1862,33 @@ using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv, bool MarkEOLs); +/// Tokenizes content of configuration file. +/// +/// \param [in] Source The string representing content of config file. +/// \param [in] Saver Delegates back to the caller for saving parsed strings. +/// \param [out] NewArgv All parsed strings are appended to NewArgv. +/// \param [in] MarkEOLs Added for compatibility with TokenizerCallback. +/// +/// It works like TokenizeGNUCommandLine with ability to skip comment lines. +/// +void tokenizeConfigFile(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs = false); + +/// Reads command line options from the given configuration file. +/// +/// \param [in] CfgFileName Path to configuration file. +/// \param [in] Saver Objects that saves allocated strings. +/// \param [out] Argv Array to which the read options are added. +/// \return true if the file was successfully read. +/// +/// It reads content of the specified file, tokenizes it and expands "@file" +/// commands resolving file names in them relative to the directory where +/// CfgFilename resides. +/// +bool readConfigFile(StringRef CfgFileName, StringSaver &Saver, + SmallVectorImpl &Argv); + /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. Argv should contain the command line /// before expansion and will be modified in place. If requested, Argv will diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h index b45f6348390e..acd89873328c 100644 --- a/include/llvm/Support/ErrorHandling.h +++ b/include/llvm/Support/ErrorHandling.h @@ -110,7 +110,7 @@ void remove_bad_alloc_error_handler(); /// in the unwind chain. /// /// If no error handler is installed (default), then a bad_alloc exception -/// is thrown if LLVM is compiled with exception support, otherwise an assertion +/// is thrown, if LLVM is compiled with exception support, otherwise an assertion /// is called. void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index 8f801662d0fb..25175fe66aa8 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -628,7 +628,7 @@ struct SemiNCAInfo { DecreasingLevel> Bucket; // Queue of tree nodes sorted by level in descending order. SmallDenseSet Affected; - SmallDenseSet Visited; + SmallDenseMap Visited; SmallVector AffectedQueue; SmallVector VisitedNotAffectedQueue; }; @@ -706,7 +706,7 @@ struct SemiNCAInfo { // algorithm does not really know or use the set of roots and can make a // different (implicit) decision about which nodes within an infinite loop // becomes a root. - if (DT.isVirtualRoot(TN->getIDom())) { + if (TN && !DT.isVirtualRoot(TN->getIDom())) { DEBUG(dbgs() << "Root " << BlockNamePrinter(R) << " is not virtual root's child\n" << "The entire tree needs to be rebuilt\n"); @@ -753,14 +753,16 @@ struct SemiNCAInfo { while (!II.Bucket.empty()) { const TreeNodePtr CurrentNode = II.Bucket.top().second; + const unsigned CurrentLevel = CurrentNode->getLevel(); II.Bucket.pop(); DEBUG(dbgs() << "\tAdding to Visited and AffectedQueue: " << BlockNamePrinter(CurrentNode) << "\n"); - II.Visited.insert(CurrentNode); + + II.Visited.insert({CurrentNode, CurrentLevel}); II.AffectedQueue.push_back(CurrentNode); // Discover and collect affected successors of the current node. - VisitInsertion(DT, BUI, CurrentNode, CurrentNode->getLevel(), NCD, II); + VisitInsertion(DT, BUI, CurrentNode, CurrentLevel, NCD, II); } // Finish by updating immediate dominators and levels. @@ -772,13 +774,17 @@ struct SemiNCAInfo { const TreeNodePtr TN, const unsigned RootLevel, const TreeNodePtr NCD, InsertionInfo &II) { const unsigned NCDLevel = NCD->getLevel(); - DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << "\n"); + DEBUG(dbgs() << "Visiting " << BlockNamePrinter(TN) << ", RootLevel " + << RootLevel << "\n"); SmallVector Stack = {TN}; assert(TN->getBlock() && II.Visited.count(TN) && "Preconditions!"); + SmallPtrSet Processed; + do { TreeNodePtr Next = Stack.pop_back_val(); + DEBUG(dbgs() << " Next: " << BlockNamePrinter(Next) << "\n"); for (const NodePtr Succ : ChildrenGetter::Get(Next->getBlock(), BUI)) { @@ -786,19 +792,31 @@ struct SemiNCAInfo { assert(SuccTN && "Unreachable successor found at reachable insertion"); const unsigned SuccLevel = SuccTN->getLevel(); - DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) - << ", level = " << SuccLevel << "\n"); + DEBUG(dbgs() << "\tSuccessor " << BlockNamePrinter(Succ) << ", level = " + << SuccLevel << "\n"); + + // Do not process the same node multiple times. + if (Processed.count(Next) > 0) + continue; // Succ dominated by subtree From -- not affected. // (Based on the lemma 2.5 from the second paper.) if (SuccLevel > RootLevel) { DEBUG(dbgs() << "\t\tDominated by subtree From\n"); - if (II.Visited.count(SuccTN) != 0) - continue; + if (II.Visited.count(SuccTN) != 0) { + DEBUG(dbgs() << "\t\t\talready visited at level " + << II.Visited[SuccTN] << "\n\t\t\tcurrent level " + << RootLevel << ")\n"); + + // A node can be necessary to visit again if we see it again at + // a lower level than before. + if (II.Visited[SuccTN] >= RootLevel) + continue; + } DEBUG(dbgs() << "\t\tMarking visited not affected " << BlockNamePrinter(Succ) << "\n"); - II.Visited.insert(SuccTN); + II.Visited.insert({SuccTN, RootLevel}); II.VisitedNotAffectedQueue.push_back(SuccTN); Stack.push_back(SuccTN); } else if ((SuccLevel > NCDLevel + 1) && @@ -809,6 +827,8 @@ struct SemiNCAInfo { II.Bucket.push({SuccLevel, SuccTN}); } } + + Processed.insert(Next); } while (!Stack.empty()); } @@ -920,21 +940,21 @@ struct SemiNCAInfo { const NodePtr NCDBlock = DT.findNearestCommonDominator(From, To); const TreeNodePtr NCD = DT.getNode(NCDBlock); - // To dominates From -- nothing to do. - if (ToTN == NCD) return; + // If To dominates From -- nothing to do. + if (ToTN != NCD) { + DT.DFSInfoValid = false; - DT.DFSInfoValid = false; - - const TreeNodePtr ToIDom = ToTN->getIDom(); - DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " - << BlockNamePrinter(ToIDom) << "\n"); + const TreeNodePtr ToIDom = ToTN->getIDom(); + DEBUG(dbgs() << "\tNCD " << BlockNamePrinter(NCD) << ", ToIDom " + << BlockNamePrinter(ToIDom) << "\n"); - // To remains reachable after deletion. - // (Based on the caption under Figure 4. from the second paper.) - if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN)) - DeleteReachable(DT, BUI, FromTN, ToTN); - else - DeleteUnreachable(DT, BUI, ToTN); + // To remains reachable after deletion. + // (Based on the caption under Figure 4. from the second paper.) + if (FromTN != ToIDom || HasProperSupport(DT, BUI, ToTN)) + DeleteReachable(DT, BUI, FromTN, ToTN); + else + DeleteUnreachable(DT, BUI, ToTN); + } if (IsPostDom) UpdateRootsAfterUpdate(DT, BUI); } diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h index 7a4de3e5ff12..97e73b13fca3 100644 --- a/include/llvm/Support/KnownBits.h +++ b/include/llvm/Support/KnownBits.h @@ -100,13 +100,11 @@ struct KnownBits { /// Make this value negative. void makeNegative() { - assert(!isNonNegative() && "Can't make a non-negative value negative"); One.setSignBit(); } /// Make this value negative. void makeNonNegative() { - assert(!isNegative() && "Can't make a negative value non-negative"); Zero.setSignBit(); } diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h index 6af6e9f34474..9feb07229225 100644 --- a/include/llvm/Support/LEB128.h +++ b/include/llvm/Support/LEB128.h @@ -19,9 +19,10 @@ namespace llvm { -/// Utility function to encode a SLEB128 value to an output stream. -inline void encodeSLEB128(int64_t Value, raw_ostream &OS, - unsigned PadTo = 0) { +/// Utility function to encode a SLEB128 value to an output stream. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, + unsigned PadTo = 0) { bool More; unsigned Count = 0; do { @@ -42,7 +43,9 @@ inline void encodeSLEB128(int64_t Value, raw_ostream &OS, for (; Count < PadTo - 1; ++Count) OS << char(PadValue | 0x80); OS << char(PadValue); + Count++; } + return Count; } /// Utility function to encode a SLEB128 value to a buffer. Returns @@ -73,9 +76,10 @@ inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { return (unsigned)(p - orig_p); } -/// Utility function to encode a ULEB128 value to an output stream. -inline void encodeULEB128(uint64_t Value, raw_ostream &OS, - unsigned PadTo = 0) { +/// Utility function to encode a ULEB128 value to an output stream. Returns +/// the length in bytes of the encoded value. +inline unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, + unsigned PadTo = 0) { unsigned Count = 0; do { uint8_t Byte = Value & 0x7f; @@ -93,6 +97,7 @@ inline void encodeULEB128(uint64_t Value, raw_ostream &OS, OS << '\x00'; Count++; } + return Count; } /// Utility function to encode a ULEB128 value to a buffer. Returns diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index 59c93f15d7b8..9e13715fd9a2 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_MEMORYBUFFER_H #include "llvm-c/Types.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" @@ -47,6 +48,9 @@ class MemoryBuffer { void init(const char *BufStart, const char *BufEnd, bool RequiresNullTerminator); + + static constexpr bool Writable = false; + public: MemoryBuffer(const MemoryBuffer &) = delete; MemoryBuffer &operator=(const MemoryBuffer &) = delete; @@ -113,18 +117,6 @@ class MemoryBuffer { static std::unique_ptr getMemBufferCopy(StringRef InputData, const Twine &BufferName = ""); - /// Allocate a new zero-initialized MemoryBuffer of the specified size. Note - /// that the caller need not initialize the memory allocated by this method. - /// The memory is owned by the MemoryBuffer object. - static std::unique_ptr - getNewMemBuffer(size_t Size, StringRef BufferName = ""); - - /// Allocate a new MemoryBuffer of the specified size that is not initialized. - /// Note that the caller should initialize the memory allocated by this - /// method. The memory is owned by the MemoryBuffer object. - static std::unique_ptr - getNewUninitMemBuffer(size_t Size, const Twine &BufferName = ""); - /// Read all of stdin into a file buffer, and return it. static ErrorOr> getSTDIN(); @@ -156,6 +148,67 @@ class MemoryBuffer { MemoryBufferRef getMemBufferRef() const; }; +/// This class is an extension of MemoryBuffer, which allows writing to the +/// underlying contents. It only supports creation methods that are guaranteed +/// to produce a writable buffer. For example, mapping a file read-only is not +/// supported. +class WritableMemoryBuffer : public MemoryBuffer { +protected: + WritableMemoryBuffer() = default; + + static constexpr bool Writable = true; + +public: + using MemoryBuffer::getBuffer; + using MemoryBuffer::getBufferEnd; + using MemoryBuffer::getBufferStart; + + // const_cast is well-defined here, because the underlying buffer is + // guaranteed to have been initialized with a mutable buffer. + char *getBufferStart() { + return const_cast(MemoryBuffer::getBufferStart()); + } + char *getBufferEnd() { + return const_cast(MemoryBuffer::getBufferEnd()); + } + MutableArrayRef getBuffer() { + return {getBufferStart(), getBufferEnd()}; + } + + static ErrorOr> + getFile(const Twine &Filename, int64_t FileSize = -1, + bool IsVolatile = false); + + /// Map a subrange of the specified file as a WritableMemoryBuffer. + static ErrorOr> + getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, + bool IsVolatile = false); + + /// Allocate a new MemoryBuffer of the specified size that is not initialized. + /// Note that the caller should initialize the memory allocated by this + /// method. The memory is owned by the MemoryBuffer object. + static std::unique_ptr + getNewUninitMemBuffer(size_t Size, const Twine &BufferName = ""); + + /// Allocate a new zero-initialized MemoryBuffer of the specified size. Note + /// that the caller need not initialize the memory allocated by this method. + /// The memory is owned by the MemoryBuffer object. + static std::unique_ptr + getNewMemBuffer(size_t Size, const Twine &BufferName = ""); + +private: + // Hide these base class factory function so one can't write + // WritableMemoryBuffer::getXXX() + // and be surprised that he got a read-only Buffer. + using MemoryBuffer::getFileAsStream; + using MemoryBuffer::getFileOrSTDIN; + using MemoryBuffer::getMemBuffer; + using MemoryBuffer::getMemBufferCopy; + using MemoryBuffer::getOpenFile; + using MemoryBuffer::getOpenFileSlice; + using MemoryBuffer::getSTDIN; +}; + class MemoryBufferRef { StringRef Buffer; StringRef Identifier; diff --git a/include/llvm/Support/ScopedPrinter.h b/include/llvm/Support/ScopedPrinter.h index 1b6651932212..1c22da693713 100644 --- a/include/llvm/Support/ScopedPrinter.h +++ b/include/llvm/Support/ScopedPrinter.h @@ -261,7 +261,11 @@ class ScopedPrinter { } void printString(StringRef Label, const std::string &Value) { - startLine() << Label << ": " << Value << "\n"; + printString(Label, StringRef(Value)); + } + + void printString(StringRef Label, const char* Value) { + printString(Label, StringRef(Value)); } template diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h index cbd6f686a778..dec5f5804fd9 100644 --- a/include/llvm/Support/Signals.h +++ b/include/llvm/Support/Signals.h @@ -36,7 +36,7 @@ namespace sys { /// signal delivery. void DontRemoveFileOnSignal(StringRef Filename); - /// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the + /// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the /// process, print a stack trace and then exit. /// \brief Print a stack trace if a fatal signal occurs. /// \param Argv0 the current binary name, used to find the symbolizer diff --git a/include/llvm/Support/TarWriter.h b/include/llvm/Support/TarWriter.h index 44bdcaf2c465..639f61b53892 100644 --- a/include/llvm/Support/TarWriter.h +++ b/include/llvm/Support/TarWriter.h @@ -11,6 +11,7 @@ #define LLVM_SUPPORT_TAR_WRITER_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" @@ -26,6 +27,7 @@ class TarWriter { TarWriter(int FD, StringRef BaseDir); raw_fd_ostream OS; std::string BaseDir; + StringSet<> Files; }; } diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h index 13b7befb8ce4..2c019e181099 100644 --- a/include/llvm/Support/TargetParser.h +++ b/include/llvm/Support/TargetParser.h @@ -203,7 +203,7 @@ StringRef getDefaultCPU(StringRef Arch); // Parser unsigned parseFPU(StringRef FPU); AArch64::ArchKind parseArch(StringRef Arch); -unsigned parseArchExt(StringRef ArchExt); +ArchExtKind parseArchExt(StringRef ArchExt); ArchKind parseCPUArch(StringRef CPU); ARM::ISAKind parseArchISA(StringRef Arch); ARM::EndianKind parseArchEndian(StringRef Arch); diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index bd096e2f74f6..8a429ab728ed 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -123,8 +123,8 @@ class Target { using AsmPrinterCtorTy = AsmPrinter *(*)( TargetMachine &TM, std::unique_ptr &&Streamer); using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); using MCAsmParserCtorTy = MCTargetAsmParser *(*)( const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII, @@ -381,15 +381,12 @@ class Target { } /// createMCAsmBackend - Create a target specific assembly parser. - /// - /// \param TheTriple The target triple string. - MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI, - StringRef TheTriple, StringRef CPU, - const MCTargetOptions &Options) - const { + MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options) const { if (!MCAsmBackendCtorFn) return nullptr; - return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU, Options); + return MCAsmBackendCtorFn(*this, STI, MRI, Options); } /// createMCAsmParser - Create a target specific assembly parser. @@ -1106,10 +1103,10 @@ template struct RegisterMCAsmBackend { } private: - static MCAsmBackend *Allocator(const Target &T, const MCRegisterInfo &MRI, - const Triple &TheTriple, StringRef CPU, + static MCAsmBackend *Allocator(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options) { - return new MCAsmBackendImpl(T, MRI, TheTriple, CPU); + return new MCAsmBackendImpl(T, STI, MRI); } }; diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 71fdf47f1979..b874ad519416 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -12,6 +12,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -117,6 +118,11 @@ struct ScalarBitSetTraits { // static void bitset(IO &io, T &value); }; +/// Describe which type of quotes should be used when quoting is necessary. +/// Some non-printable characters need to be double-quoted, while some others +/// are fine with simple-quoting, and some don't need any quoting. +enum class QuotingType { None, Single, Double }; + /// This class should be specialized by type that requires custom conversion /// to/from a yaml scalar. For example: /// @@ -131,7 +137,7 @@ struct ScalarBitSetTraits { /// // return empty string on success, or error string /// return StringRef(); /// } -/// static bool mustQuote(StringRef) { return true; } +/// static QuotingType mustQuote(StringRef) { return QuotingType::Single; } /// }; template struct ScalarTraits { @@ -145,7 +151,7 @@ struct ScalarTraits { //static StringRef input(StringRef scalar, void *ctxt, T &value); // // Function to determine if the value should be quoted. - //static bool mustQuote(StringRef); + //static QuotingType mustQuote(StringRef); }; /// This class should be specialized by type that requires custom conversion @@ -270,7 +276,7 @@ struct has_ScalarTraits { using Signature_input = StringRef (*)(StringRef, void*, T&); using Signature_output = void (*)(const T&, void*, raw_ostream&); - using Signature_mustQuote = bool (*)(StringRef); + using Signature_mustQuote = QuotingType (*)(StringRef); template static char test(SameType *, @@ -495,28 +501,71 @@ inline bool isBool(StringRef S) { S.equals("false") || S.equals("False") || S.equals("FALSE"); } -inline bool needsQuotes(StringRef S) { +// 5.1. Character Set +// The allowed character range explicitly excludes the C0 control block #x0-#x1F +// (except for TAB #x9, LF #xA, and CR #xD which are allowed), DEL #x7F, the C1 +// control block #x80-#x9F (except for NEL #x85 which is allowed), the surrogate +// block #xD800-#xDFFF, #xFFFE, and #xFFFF. +inline QuotingType needsQuotes(StringRef S) { if (S.empty()) - return true; + return QuotingType::Single; if (isspace(S.front()) || isspace(S.back())) - return true; - if (S.front() == ',') - return true; - - static const char ScalarSafeChars[] = - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t"; - if (S.find_first_not_of(ScalarSafeChars) != StringRef::npos) - return true; - + return QuotingType::Single; if (isNull(S)) - return true; + return QuotingType::Single; if (isBool(S)) - return true; + return QuotingType::Single; if (isNumeric(S)) - return true; + return QuotingType::Single; + + // 7.3.3 Plain Style + // Plain scalars must not begin with most indicators, as this would cause + // ambiguity with other YAML constructs. + static constexpr char Indicators[] = R"(-?:\,[]{}#&*!|>'"%@`)"; + if (S.find_first_of(Indicators) == 0) + return QuotingType::Single; + + QuotingType MaxQuotingNeeded = QuotingType::None; + for (unsigned char C : S) { + // Alphanum is safe. + if (isAlnum(C)) + continue; + + switch (C) { + // Safe scalar characters. + case '_': + case '-': + case '/': + case '^': + case '.': + case ',': + case ' ': + // TAB (0x9), LF (0xA), CR (0xD) and NEL (0x85) are allowed. + case 0x9: + case 0xA: + case 0xD: + case 0x85: + continue; + // DEL (0x7F) are excluded from the allowed character range. + case 0x7F: + return QuotingType::Double; + default: { + // C0 control block (0x0 - 0x1F) is excluded from the allowed character + // range. + if (C <= 0x1F) + return QuotingType::Double; + + // Always double quote UTF-8. + if ((C & 0x80) != 0) + return QuotingType::Double; + + // The character is not safe, at least simple quoting needed. + MaxQuotingNeeded = QuotingType::Single; + } + } + } - return false; + return MaxQuotingNeeded; } template @@ -581,7 +630,7 @@ class IO { virtual bool bitSetMatch(const char*, bool) = 0; virtual void endBitSetScalar() = 0; - virtual void scalarString(StringRef &, bool) = 0; + virtual void scalarString(StringRef &, QuotingType) = 0; virtual void blockScalarString(StringRef &) = 0; virtual void setError(const Twine &) = 0; @@ -911,91 +960,91 @@ template<> struct ScalarTraits { static void output(const bool &, void* , raw_ostream &); static StringRef input(StringRef, void *, bool &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const StringRef &, void *, raw_ostream &); static StringRef input(StringRef, void *, StringRef &); - static bool mustQuote(StringRef S) { return needsQuotes(S); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; template<> struct ScalarTraits { static void output(const std::string &, void *, raw_ostream &); static StringRef input(StringRef, void *, std::string &); - static bool mustQuote(StringRef S) { return needsQuotes(S); } + static QuotingType mustQuote(StringRef S) { return needsQuotes(S); } }; template<> struct ScalarTraits { static void output(const uint8_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, uint8_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const uint16_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, uint16_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const uint32_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, uint32_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const uint64_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, uint64_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const int8_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, int8_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const int16_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, int16_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const int32_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, int32_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const int64_t &, void *, raw_ostream &); static StringRef input(StringRef, void *, int64_t &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const float &, void *, raw_ostream &); static StringRef input(StringRef, void *, float &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const double &, void *, raw_ostream &); static StringRef input(StringRef, void *, double &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; // For endian types, we just use the existing ScalarTraits for the underlying @@ -1019,7 +1068,7 @@ struct ScalarTraits::mustQuote(Str); } }; @@ -1148,7 +1197,7 @@ class Input : public IO { bool beginBitSetScalar(bool &) override; bool bitSetMatch(const char *, bool ) override; void endBitSetScalar() override; - void scalarString(StringRef &, bool) override; + void scalarString(StringRef &, QuotingType) override; void blockScalarString(StringRef &) override; void setError(const Twine &message) override; bool canElideEmptySequence() override; @@ -1293,7 +1342,7 @@ class Output : public IO { bool beginBitSetScalar(bool &) override; bool bitSetMatch(const char *, bool ) override; void endBitSetScalar() override; - void scalarString(StringRef &, bool) override; + void scalarString(StringRef &, QuotingType) override; void blockScalarString(StringRef &) override; void setError(const Twine &message) override; bool canElideEmptySequence() override; @@ -1371,28 +1420,28 @@ template<> struct ScalarTraits { static void output(const Hex8 &, void *, raw_ostream &); static StringRef input(StringRef, void *, Hex8 &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const Hex16 &, void *, raw_ostream &); static StringRef input(StringRef, void *, Hex16 &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const Hex32 &, void *, raw_ostream &); static StringRef input(StringRef, void *, Hex32 &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; template<> struct ScalarTraits { static void output(const Hex64 &, void *, raw_ostream &); static StringRef input(StringRef, void *, Hex64 &); - static bool mustQuote(StringRef) { return false; } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; // Define non-member operator>> so that Input can stream in a document list. @@ -1681,7 +1730,7 @@ template struct StdMapStringCustomMappingTraitsImpl { template <> struct ScalarTraits { \ static void output(const Type &Value, void *ctx, raw_ostream &Out); \ static StringRef input(StringRef Scalar, void *ctxt, Type &Value); \ - static bool mustQuote(StringRef) { return MustQuote; } \ + static QuotingType mustQuote(StringRef) { return MustQuote; } \ }; \ } \ } diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h index 7c919ffec7b6..09d2092d43b0 100644 --- a/include/llvm/TableGen/StringMatcher.h +++ b/include/llvm/TableGen/StringMatcher.h @@ -43,11 +43,12 @@ class StringMatcher { const std::vector &matches, raw_ostream &os) : StrVariableName(strVariableName), Matches(matches), OS(os) {} - void Emit(unsigned Indent = 0) const; + void Emit(unsigned Indent = 0, bool IgnoreDuplicates = false) const; private: - bool EmitStringMatcherForChar(const std::vector &Matches, - unsigned CharNo, unsigned IndentCount) const; + bool EmitStringMatcherForChar(const std::vector &Matches, + unsigned CharNo, unsigned IndentCount, + bool IgnoreDuplicates) const; }; } // end namespace llvm diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td index 557217c34562..28c90bf22767 100644 --- a/include/llvm/Target/GenericOpcodes.td +++ b/include/llvm/Target/GenericOpcodes.td @@ -16,9 +16,11 @@ // Unary ops. //------------------------------------------------------------------------------ +class GenericInstruction : StandardPseudoInstruction; + // Extend the underlying scalar type of an operation, leaving the high bits // unspecified. -def G_ANYEXT : Instruction { +def G_ANYEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; @@ -26,7 +28,7 @@ def G_ANYEXT : Instruction { // Sign extend the underlying scalar type of an operation, copying the sign bit // into the newly-created space. -def G_SEXT : Instruction { +def G_SEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; @@ -34,7 +36,7 @@ def G_SEXT : Instruction { // Zero extend the underlying scalar type of an operation, putting zero bits // into the newly-created space. -def G_ZEXT : Instruction { +def G_ZEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; @@ -43,74 +45,74 @@ def G_ZEXT : Instruction { // Truncate the underlying scalar type of an operation. This is equivalent to // G_EXTRACT for scalar types, but acts elementwise on vectors. -def G_TRUNC : Instruction { +def G_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_IMPLICIT_DEF : Instruction { +def G_IMPLICIT_DEF : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins); let hasSideEffects = 0; } -def G_PHI : Instruction { +def G_PHI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins variable_ops); let hasSideEffects = 0; } -def G_FRAME_INDEX : Instruction { +def G_FRAME_INDEX : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$src2); let hasSideEffects = 0; } -def G_GLOBAL_VALUE : Instruction { +def G_GLOBAL_VALUE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$src); let hasSideEffects = 0; } -def G_INTTOPTR : Instruction { +def G_INTTOPTR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_PTRTOINT : Instruction { +def G_PTRTOINT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_BITCAST : Instruction { +def G_BITCAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_CONSTANT : Instruction { +def G_CONSTANT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$imm); let hasSideEffects = 0; } -def G_FCONSTANT : Instruction { +def G_FCONSTANT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$imm); let hasSideEffects = 0; } -def G_VASTART : Instruction { +def G_VASTART : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$list); let hasSideEffects = 0; let mayStore = 1; } -def G_VAARG : Instruction { +def G_VAARG : GenericInstruction { let OutOperandList = (outs type0:$val); let InOperandList = (ins type1:$list, unknown:$align); let hasSideEffects = 0; @@ -118,7 +120,7 @@ def G_VAARG : Instruction { let mayStore = 1; } -def G_BSWAP : Instruction { +def G_BSWAP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); let hasSideEffects = 0; @@ -129,7 +131,7 @@ def G_BSWAP : Instruction { //------------------------------------------------------------------------------ // Generic addition. -def G_ADD : Instruction { +def G_ADD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -137,7 +139,7 @@ def G_ADD : Instruction { } // Generic subtraction. -def G_SUB : Instruction { +def G_SUB : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -145,7 +147,7 @@ def G_SUB : Instruction { } // Generic multiplication. -def G_MUL : Instruction { +def G_MUL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -153,7 +155,7 @@ def G_MUL : Instruction { } // Generic signed division. -def G_SDIV : Instruction { +def G_SDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -161,7 +163,7 @@ def G_SDIV : Instruction { } // Generic unsigned division. -def G_UDIV : Instruction { +def G_UDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -169,7 +171,7 @@ def G_UDIV : Instruction { } // Generic signed remainder. -def G_SREM : Instruction { +def G_SREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -177,7 +179,7 @@ def G_SREM : Instruction { } // Generic unsigned remainder. -def G_UREM : Instruction { +def G_UREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -185,7 +187,7 @@ def G_UREM : Instruction { } // Generic bitwise and. -def G_AND : Instruction { +def G_AND : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -193,7 +195,7 @@ def G_AND : Instruction { } // Generic bitwise or. -def G_OR : Instruction { +def G_OR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -201,7 +203,7 @@ def G_OR : Instruction { } // Generic bitwise xor. -def G_XOR : Instruction { +def G_XOR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -209,55 +211,55 @@ def G_XOR : Instruction { } // Generic left-shift. -def G_SHL : Instruction { +def G_SHL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic logical right-shift. -def G_LSHR : Instruction { +def G_LSHR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic arithmetic right-shift. -def G_ASHR : Instruction { +def G_ASHR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic integer comparison. -def G_ICMP : Instruction { +def G_ICMP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2); let hasSideEffects = 0; } // Generic floating-point comparison. -def G_FCMP : Instruction { +def G_FCMP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2); let hasSideEffects = 0; } // Generic select -def G_SELECT : Instruction { +def G_SELECT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$tst, type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic pointer offset. -def G_GEP : Instruction { +def G_GEP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type1:$src2); let hasSideEffects = 0; } -def G_PTR_MASK : Instruction { +def G_PTR_MASK : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, unknown:$bits); let hasSideEffects = 0; @@ -268,14 +270,14 @@ def G_PTR_MASK : Instruction { //------------------------------------------------------------------------------ // Generic unsigned addition consuming and producing a carry flag. -def G_UADDE : Instruction { +def G_UADDE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); let hasSideEffects = 0; } // Generic signed addition producing a carry flag. -def G_SADDO : Instruction { +def G_SADDO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -283,21 +285,21 @@ def G_SADDO : Instruction { } // Generic unsigned subtraction consuming and producing a carry flag. -def G_USUBE : Instruction { +def G_USUBE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); let hasSideEffects = 0; } // Generic unsigned subtraction producing a carry flag. -def G_SSUBO : Instruction { +def G_SSUBO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic unsigned multiplication producing a carry flag. -def G_UMULO : Instruction { +def G_UMULO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -305,7 +307,7 @@ def G_UMULO : Instruction { } // Generic signed multiplication producing a carry flag. -def G_SMULO : Instruction { +def G_SMULO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -314,7 +316,7 @@ def G_SMULO : Instruction { // Multiply two numbers at twice the incoming bit width (unsigned) and return // the high half of the result. -def G_UMULH : Instruction { +def G_UMULH : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -323,7 +325,7 @@ def G_UMULH : Instruction { // Multiply two numbers at twice the incoming bit width (signed) and return // the high half of the result. -def G_SMULH : Instruction { +def G_SMULH : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -334,43 +336,43 @@ def G_SMULH : Instruction { // Floating Point Unary Ops. //------------------------------------------------------------------------------ -def G_FNEG : Instruction { +def G_FNEG : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); let hasSideEffects = 0; } -def G_FPEXT : Instruction { +def G_FPEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_FPTRUNC : Instruction { +def G_FPTRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_FPTOSI : Instruction { +def G_FPTOSI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_FPTOUI : Instruction { +def G_FPTOUI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_SITOFP : Instruction { +def G_SITOFP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; } -def G_UITOFP : Instruction { +def G_UITOFP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); let hasSideEffects = 0; @@ -381,7 +383,7 @@ def G_UITOFP : Instruction { //------------------------------------------------------------------------------ // Generic FP addition. -def G_FADD : Instruction { +def G_FADD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -389,7 +391,7 @@ def G_FADD : Instruction { } // Generic FP subtraction. -def G_FSUB : Instruction { +def G_FSUB : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -397,7 +399,7 @@ def G_FSUB : Instruction { } // Generic FP multiplication. -def G_FMUL : Instruction { +def G_FMUL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; @@ -406,7 +408,7 @@ def G_FMUL : Instruction { // Generic fused multiply-add instruction. // Behaves like llvm fma intrinsic ie src1 * src2 + src3 -def G_FMA : Instruction { +def G_FMA : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); let hasSideEffects = 0; @@ -414,49 +416,49 @@ def G_FMA : Instruction { } // Generic FP division. -def G_FDIV : Instruction { +def G_FDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Generic FP remainder. -def G_FREM : Instruction { +def G_FREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Floating point exponentiation. -def G_FPOW : Instruction { +def G_FPOW : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); let hasSideEffects = 0; } // Floating point base-e exponential of a value. -def G_FEXP : Instruction { +def G_FEXP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); let hasSideEffects = 0; } // Floating point base-2 exponential of a value. -def G_FEXP2 : Instruction { +def G_FEXP2 : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); let hasSideEffects = 0; } // Floating point base-2 logarithm of a value. -def G_FLOG : Instruction { +def G_FLOG : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); let hasSideEffects = 0; } // Floating point base-2 logarithm of a value. -def G_FLOG2 : Instruction { +def G_FLOG2 : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); let hasSideEffects = 0; @@ -467,7 +469,7 @@ def G_FLOG2 : Instruction { //------------------------------------------------------------------------------ // Generic load. Expects a MachineMemOperand in addition to explicit operands. -def G_LOAD : Instruction { +def G_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); let hasSideEffects = 0; @@ -475,13 +477,55 @@ def G_LOAD : Instruction { } // Generic store. Expects a MachineMemOperand in addition to explicit operands. -def G_STORE : Instruction { +def G_STORE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$src, ptype1:$addr); let hasSideEffects = 0; let mayStore = 1; } +// Generic atomic cmpxchg with internal success check. Expects a +// MachineMemOperand in addition to explicit operands. +def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { + let OutOperandList = (outs type0:$oldval, type1:$success); + let InOperandList = (ins type2:$addr, type0:$cmpval, type0:$newval); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 1; +} + +// Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit +// operands. +def G_ATOMIC_CMPXCHG : GenericInstruction { + let OutOperandList = (outs type0:$oldval); + let InOperandList = (ins ptype1:$addr, type0:$cmpval, type0:$newval); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 1; +} + +// Generic atomicrmw. Expects a MachineMemOperand in addition to explicit +// operands. +class G_ATOMICRMW_OP : GenericInstruction { + let OutOperandList = (outs type0:$oldval); + let InOperandList = (ins ptype1:$addr, type0:$val); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 1; +} + +def G_ATOMICRMW_XCHG : G_ATOMICRMW_OP; +def G_ATOMICRMW_ADD : G_ATOMICRMW_OP; +def G_ATOMICRMW_SUB : G_ATOMICRMW_OP; +def G_ATOMICRMW_AND : G_ATOMICRMW_OP; +def G_ATOMICRMW_NAND : G_ATOMICRMW_OP; +def G_ATOMICRMW_OR : G_ATOMICRMW_OP; +def G_ATOMICRMW_XOR : G_ATOMICRMW_OP; +def G_ATOMICRMW_MAX : G_ATOMICRMW_OP; +def G_ATOMICRMW_MIN : G_ATOMICRMW_OP; +def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; +def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; + //------------------------------------------------------------------------------ // Variadic ops //------------------------------------------------------------------------------ @@ -489,7 +533,7 @@ def G_STORE : Instruction { // Extract a register of the specified size, starting from the block given by // index. This will almost certainly be mapped to sub-register COPYs after // register banks have been selected. -def G_EXTRACT : Instruction { +def G_EXTRACT : GenericInstruction { let OutOperandList = (outs type0:$res); let InOperandList = (ins type1:$src, unknown:$offset); let hasSideEffects = 0; @@ -498,35 +542,35 @@ def G_EXTRACT : Instruction { // Extract multiple registers specified size, starting from blocks given by // indexes. This will almost certainly be mapped to sub-register COPYs after // register banks have been selected. -def G_UNMERGE_VALUES : Instruction { - let OutOperandList = (outs); - let InOperandList = (ins variable_ops); +def G_UNMERGE_VALUES : GenericInstruction { + let OutOperandList = (outs type0:$dst0, variable_ops); + let InOperandList = (ins type1:$src); let hasSideEffects = 0; } // Insert a smaller register into a larger one at the specified bit-index. -def G_INSERT : Instruction { +def G_INSERT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, type1:$op, unknown:$offset); let hasSideEffects = 0; } -/// Concatenante multiple registers of the same size into a wider register. -def G_MERGE_VALUES : Instruction { +/// Concatenate multiple registers of the same size into a wider register. +def G_MERGE_VALUES : GenericInstruction { let OutOperandList = (outs type0:$dst); - let InOperandList = (ins variable_ops); + let InOperandList = (ins type1:$src0, variable_ops); let hasSideEffects = 0; } // Intrinsic without side effects. -def G_INTRINSIC : Instruction { +def G_INTRINSIC : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$intrin, variable_ops); let hasSideEffects = 0; } // Intrinsic with side effects. -def G_INTRINSIC_W_SIDE_EFFECTS : Instruction { +def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$intrin, variable_ops); let hasSideEffects = 1; @@ -539,7 +583,7 @@ def G_INTRINSIC_W_SIDE_EFFECTS : Instruction { //------------------------------------------------------------------------------ // Generic unconditional branch. -def G_BR : Instruction { +def G_BR : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$src1); let hasSideEffects = 0; @@ -549,7 +593,7 @@ def G_BR : Instruction { } // Generic conditional branch. -def G_BRCOND : Instruction { +def G_BRCOND : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$tst, unknown:$truebb); let hasSideEffects = 0; @@ -558,7 +602,7 @@ def G_BRCOND : Instruction { } // Generic indirect branch. -def G_BRINDIRECT : Instruction { +def G_BRINDIRECT : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$src1); let hasSideEffects = 0; @@ -571,21 +615,21 @@ def G_BRINDIRECT : Instruction { //------------------------------------------------------------------------------ // Generic insertelement. -def G_INSERT_VECTOR_ELT : Instruction { +def G_INSERT_VECTOR_ELT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, type1:$elt, type2:$idx); let hasSideEffects = 0; } // Generic extractelement. -def G_EXTRACT_VECTOR_ELT : Instruction { +def G_EXTRACT_VECTOR_ELT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src, type2:$idx); let hasSideEffects = 0; } // Generic shufflevector. -def G_SHUFFLE_VECTOR: Instruction { +def G_SHUFFLE_VECTOR: GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$v1, type1:$v2, type2:$mask); let hasSideEffects = 0; diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index c012b20fd7b2..0d3b4a4686e8 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -94,8 +94,27 @@ def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } // G_STORE with a non-atomic MachineMemOperand. def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. // Should be used on defs that subclass GIComplexOperandMatcher<>. class GIComplexPatternEquiv { ComplexPattern SelDAGEquivalent = seldag; } + +// Specifies the GlobalISel equivalents for SelectionDAG's SDNodeXForm. +// Should be used on defs that subclass GICustomOperandRenderer<>. +class GISDNodeXFormEquiv { + SDNodeXForm SelDAGEquivalent = seldag; +} diff --git a/include/llvm/Target/GlobalISel/Target.td b/include/llvm/Target/GlobalISel/Target.td index fd2ebca86d60..6740f404a9d3 100644 --- a/include/llvm/Target/GlobalISel/Target.td +++ b/include/llvm/Target/GlobalISel/Target.td @@ -46,3 +46,16 @@ class GIComplexOperandMatcher { // overwritten. string MatcherFn = matcherfn; } + +// Defines a custom renderer. This is analogous to SDNodeXForm from +// SelectionDAG. Unlike SDNodeXForm, this matches a MachineInstr and +// renders directly to the result instruction without an intermediate node. +// +// Definitions that inherit from this may also inherit from GISDNodeXFormEquiv +// to enable the import of SelectionDAG patterns involving those SDNodeXForms. +class GICustomOperandRenderer { + // The function renders the operand(s) of the matched instruction to + // the specified instruction. It should be of the form: + // void render(MachineInstrBuilder &MIB, const MachineInstr &MI) + string RendererFn = rendererfn; +} diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 86fa3c03fb50..82a3be5e63d4 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -897,21 +897,27 @@ class InstrInfo { // Ensure mayLoad and mayStore have a default value, so as not to break // targets that set guessInstructionProperties=0. Any local definition of // mayLoad/mayStore takes precedence over these default values. -let mayLoad = 0, mayStore = 0, isCodeGenOnly = 1, isPseudo = 1, - hasNoSchedulingInfo = 1, Namespace = "TargetOpcode" in { -def PHI : Instruction { +class StandardPseudoInstruction : Instruction { + let mayLoad = 0; + let mayStore = 0; + let isCodeGenOnly = 1; + let isPseudo = 1; + let hasNoSchedulingInfo = 1; + let Namespace = "TargetOpcode"; +} +def PHI : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = "PHINODE"; let hasSideEffects = 0; } -def INLINEASM : Instruction { +def INLINEASM : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; let hasSideEffects = 0; // Note side effect is encoded in an operand. } -def CFI_INSTRUCTION : Instruction { +def CFI_INSTRUCTION : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; @@ -919,7 +925,7 @@ def CFI_INSTRUCTION : Instruction { let hasSideEffects = 0; let isNotDuplicable = 1; } -def EH_LABEL : Instruction { +def EH_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; @@ -927,7 +933,7 @@ def EH_LABEL : Instruction { let hasSideEffects = 0; let isNotDuplicable = 1; } -def GC_LABEL : Instruction { +def GC_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; @@ -935,7 +941,7 @@ def GC_LABEL : Instruction { let hasSideEffects = 0; let isNotDuplicable = 1; } -def ANNOTATION_LABEL : Instruction { +def ANNOTATION_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; @@ -943,26 +949,26 @@ def ANNOTATION_LABEL : Instruction { let hasSideEffects = 0; let isNotDuplicable = 1; } -def KILL : Instruction { +def KILL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; let hasSideEffects = 0; } -def EXTRACT_SUBREG : Instruction { +def EXTRACT_SUBREG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, i32imm:$subidx); let AsmString = ""; let hasSideEffects = 0; } -def INSERT_SUBREG : Instruction { +def INSERT_SUBREG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; let hasSideEffects = 0; let Constraints = "$supersrc = $dst"; } -def IMPLICIT_DEF : Instruction { +def IMPLICIT_DEF : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins); let AsmString = ""; @@ -970,33 +976,33 @@ def IMPLICIT_DEF : Instruction { let isReMaterializable = 1; let isAsCheapAsAMove = 1; } -def SUBREG_TO_REG : Instruction { +def SUBREG_TO_REG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; let hasSideEffects = 0; } -def COPY_TO_REGCLASS : Instruction { +def COPY_TO_REGCLASS : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$src, i32imm:$regclass); let AsmString = ""; let hasSideEffects = 0; let isAsCheapAsAMove = 1; } -def DBG_VALUE : Instruction { +def DBG_VALUE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_VALUE"; let hasSideEffects = 0; } -def REG_SEQUENCE : Instruction { +def REG_SEQUENCE : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, variable_ops); let AsmString = ""; let hasSideEffects = 0; let isAsCheapAsAMove = 1; } -def COPY : Instruction { +def COPY : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$src); let AsmString = ""; @@ -1004,25 +1010,25 @@ def COPY : Instruction { let isAsCheapAsAMove = 1; let hasNoSchedulingInfo = 0; } -def BUNDLE : Instruction { +def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; let hasSideEffects = 1; } -def LIFETIME_START : Instruction { +def LIFETIME_START : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_START"; let hasSideEffects = 0; } -def LIFETIME_END : Instruction { +def LIFETIME_END : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_END"; let hasSideEffects = 0; } -def STACKMAP : Instruction { +def STACKMAP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops); let hasSideEffects = 1; @@ -1030,7 +1036,7 @@ def STACKMAP : Instruction { let mayLoad = 1; let usesCustomInserter = 1; } -def PATCHPOINT : Instruction { +def PATCHPOINT : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, unknown:$callee, i32imm:$nargs, i32imm:$cc, variable_ops); @@ -1039,7 +1045,7 @@ def PATCHPOINT : Instruction { let mayLoad = 1; let usesCustomInserter = 1; } -def STATEPOINT : Instruction { +def STATEPOINT : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let usesCustomInserter = 1; @@ -1048,7 +1054,7 @@ def STATEPOINT : Instruction { let hasSideEffects = 1; let isCall = 1; } -def LOAD_STACK_GUARD : Instruction { +def LOAD_STACK_GUARD : StandardPseudoInstruction { let OutOperandList = (outs ptr_rc:$dst); let InOperandList = (ins); let mayLoad = 1; @@ -1056,7 +1062,7 @@ def LOAD_STACK_GUARD : Instruction { let hasSideEffects = 0; bit isPseudo = 1; } -def LOCAL_ESCAPE : Instruction { +def LOCAL_ESCAPE : StandardPseudoInstruction { // This instruction is really just a label. It has to be part of the chain so // that it doesn't get dropped from the DAG, but it produces nothing and has // no side effects. @@ -1065,7 +1071,7 @@ def LOCAL_ESCAPE : Instruction { let hasSideEffects = 0; let hasCtrlDep = 1; } -def FAULTING_OP : Instruction { +def FAULTING_OP : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let usesCustomInserter = 1; @@ -1075,7 +1081,7 @@ def FAULTING_OP : Instruction { let isTerminator = 1; let isBranch = 1; } -def PATCHABLE_OP : Instruction { +def PATCHABLE_OP : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let usesCustomInserter = 1; @@ -1083,14 +1089,14 @@ def PATCHABLE_OP : Instruction { let mayStore = 1; let hasSideEffects = 1; } -def PATCHABLE_FUNCTION_ENTER : Instruction { +def PATCHABLE_FUNCTION_ENTER : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins); let AsmString = "# XRay Function Enter."; let usesCustomInserter = 1; let hasSideEffects = 0; } -def PATCHABLE_RET : Instruction { +def PATCHABLE_RET : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = "# XRay Function Patchable RET."; @@ -1099,7 +1105,7 @@ def PATCHABLE_RET : Instruction { let isTerminator = 1; let isReturn = 1; } -def PATCHABLE_FUNCTION_EXIT : Instruction { +def PATCHABLE_FUNCTION_EXIT : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins); let AsmString = "# XRay Function Exit."; @@ -1107,7 +1113,7 @@ def PATCHABLE_FUNCTION_EXIT : Instruction { let hasSideEffects = 0; // FIXME: is this correct? let isReturn = 0; // Original return instruction will follow } -def PATCHABLE_TAIL_CALL : Instruction { +def PATCHABLE_TAIL_CALL : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = "# XRay Tail Call Exit."; @@ -1115,7 +1121,7 @@ def PATCHABLE_TAIL_CALL : Instruction { let hasSideEffects = 1; let isReturn = 1; } -def PATCHABLE_EVENT_CALL : Instruction { +def PATCHABLE_EVENT_CALL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins ptr_rc:$event, i8imm:$size); let AsmString = "# XRay Custom Event Log."; @@ -1125,7 +1131,7 @@ def PATCHABLE_EVENT_CALL : Instruction { let mayStore = 1; let hasSideEffects = 1; } -def FENTRY_CALL : Instruction { +def FENTRY_CALL : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = "# FEntry call"; @@ -1138,8 +1144,6 @@ def FENTRY_CALL : Instruction { // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" -} - //===----------------------------------------------------------------------===// // AsmParser - This class can be implemented by targets that wish to implement // .s file parsing. @@ -1170,6 +1174,14 @@ class AsmParser { // several registers share the same alias (i.e. not a 1:1 mapping). bit ShouldEmitMatchRegisterAltName = 0; + // Set to true if MatchRegisterName and MatchRegisterAltName functions + // should be generated even if there are duplicate register names. The + // target is responsible for coercing aliased registers as necessary + // (e.g. in validateTargetOperandClass), and there are no guarantees about + // which numeric register identifier will be returned in the case of + // multiple matches. + bit AllowDuplicateRegisterNames = 0; + // HasMnemonicFirst - Set to false if target instructions don't always // start with a mnemonic as the first token. bit HasMnemonicFirst = 1; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index f17b1d9d8994..f02eab3c229f 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -24,6 +24,7 @@ namespace llvm { +class Function; class GlobalValue; class MachineModuleInfo; class Mangler; @@ -38,6 +39,7 @@ class PassManagerBuilder; class Target; class TargetIntrinsicInfo; class TargetIRAnalysis; +class TargetTransformInfo; class TargetLoweringObjectFile; class TargetPassConfig; class TargetSubtargetInfo; @@ -182,6 +184,7 @@ class TargetMachine { void setFastISel(bool Enable) { Options.EnableFastISel = Enable; } bool getO0WantsFastISel() { return O0WantsFastISel; } void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; } + void setGlobalISel(bool Enable) { Options.EnableGlobalISel = Enable; } bool shouldPrintMachineCode() const { return Options.PrintMachineCode; } @@ -204,7 +207,13 @@ class TargetMachine { /// This is used to construct the new pass manager's target IR analysis pass, /// set up appropriately for this target machine. Even the old pass manager /// uses this to answer queries about the IR. - virtual TargetIRAnalysis getTargetIRAnalysis(); + TargetIRAnalysis getTargetIRAnalysis(); + + /// \brief Return a TargetTransformInfo for a given function. + /// + /// The returned TargetTransformInfo is specialized to the subtarget + /// corresponding to \p F. + virtual TargetTransformInfo getTargetTransformInfo(const Function &F); /// Allow the target to modify the pass manager, e.g. by calling /// PassManagerBuilder::addExtension. @@ -283,11 +292,11 @@ class LLVMTargetMachine : public TargetMachine { void initAsmInfo(); public: - /// \brief Get a TargetIRAnalysis implementation for the target. + /// \brief Get a TargetTransformInfo implementation for the target. /// - /// This analysis will produce a TTI result which uses the common code - /// generator to answer queries about the IR. - TargetIRAnalysis getTargetIRAnalysis() override; + /// The TTI returned uses the common code generator to answer queries about + /// the IR. + TargetTransformInfo getTargetTransformInfo(const Function &F) override; /// Create a pass configuration object to be used by addPassToEmitX methods /// for generating a pipeline of CodeGen passes. diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 5c2063880f8b..f21b9792d108 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -104,11 +104,11 @@ namespace llvm { NoSignedZerosFPMath(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), - EnableFastISel(false), UseInitArray(false), + EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), DisableIntegratedAS(false), RelaxELFRelocations(false), FunctionSections(false), DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), EmulatedTLS(false), - EnableIPRA(false) {} + EnableIPRA(false), EmitStackSizeSection(false) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -186,6 +186,9 @@ namespace llvm { /// compile time. unsigned EnableFastISel : 1; + /// EnableGlobalISel - This flag enables global instruction selection. + unsigned EnableGlobalISel : 1; + /// UseInitArray - Use .init_array instead of .ctors for static /// constructors. unsigned UseInitArray : 1; @@ -216,6 +219,9 @@ namespace llvm { /// This flag enables InterProcedural Register Allocation (IPRA). unsigned EnableIPRA : 1; + /// Emit section containing metadata on function stack sizes. + unsigned EmitStackSizeSection : 1; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 98eaeda89c02..f6162377b8b7 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -285,32 +285,6 @@ class SDCallSeqStart constraints> : class SDCallSeqEnd constraints> : SDTypeProfile<0, 2, constraints>; -//===----------------------------------------------------------------------===// -// Selection DAG Node Properties. -// -// Note: These are hard coded into tblgen. -// -class SDNodeProperty; -def SDNPCommutative : SDNodeProperty; // X op Y == Y op X -def SDNPAssociative : SDNodeProperty; // (X op Y) op Z == X op (Y op Z) -def SDNPHasChain : SDNodeProperty; // R/W chain operand and result -def SDNPOutGlue : SDNodeProperty; // Write a flag result -def SDNPInGlue : SDNodeProperty; // Read a flag operand -def SDNPOptInGlue : SDNodeProperty; // Optionally read a flag operand -def SDNPMayStore : SDNodeProperty; // May write to memory, sets 'mayStore'. -def SDNPMayLoad : SDNodeProperty; // May read memory, sets 'mayLoad'. -def SDNPSideEffect : SDNodeProperty; // Sets 'HasUnmodelledSideEffects'. -def SDNPMemOperand : SDNodeProperty; // Touches memory, has assoc MemOperand -def SDNPVariadic : SDNodeProperty; // Node has variable arguments. -def SDNPWantRoot : SDNodeProperty; // ComplexPattern gets the root of match -def SDNPWantParent : SDNodeProperty; // ComplexPattern gets the parent - -//===----------------------------------------------------------------------===// -// Selection DAG Pattern Operations -class SDPatternOperator { - list Properties = []; -} - //===----------------------------------------------------------------------===// // Selection DAG Node definitions. // @@ -689,6 +663,14 @@ class PatFrag(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent bit IsAtomicOrderingSequentiallyConsistent = ?; + // isAcquireOrStronger(cast(N)->getOrdering()) + // !isAcquireOrStronger(cast(N)->getOrdering()) + bit IsAtomicOrderingAcquireOrStronger = ?; + + // isReleaseOrStronger(cast(N)->getOrdering()) + // !isReleaseOrStronger(cast(N)->getOrdering()) + bit IsAtomicOrderingReleaseOrStronger = ?; + // cast(N)->getMemoryVT() == MVT::; // cast(N)->getMemoryVT() == MVT::; ValueType MemoryVT = ?; diff --git a/include/llvm/Testing/Support/Error.h b/include/llvm/Testing/Support/Error.h index f23d289266ad..50889b9c66f5 100644 --- a/include/llvm/Testing/Support/Error.h +++ b/include/llvm/Testing/Support/Error.h @@ -22,17 +22,66 @@ namespace detail { ErrorHolder TakeError(Error Err); template ExpectedHolder TakeExpected(Expected &Exp) { - llvm::detail::ExpectedHolder Result; - auto &EH = static_cast(Result); - EH = TakeError(Exp.takeError()); - if (Result.Success) - Result.Value = &(*Exp); - return Result; + return {TakeError(Exp.takeError()), Exp}; } template ExpectedHolder TakeExpected(Expected &&Exp) { return TakeExpected(Exp); } + +template +class ValueMatchesMono + : public testing::MatcherInterface &> { +public: + explicit ValueMatchesMono(const testing::Matcher &Matcher) + : Matcher(Matcher) {} + + bool MatchAndExplain(const ExpectedHolder &Holder, + testing::MatchResultListener *listener) const override { + if (!Holder.Success) + return false; + + bool result = Matcher.MatchAndExplain(*Holder.Exp, listener); + + if (result) + return result; + *listener << "("; + Matcher.DescribeNegationTo(listener->stream()); + *listener << ")"; + return result; + } + + void DescribeTo(std::ostream *OS) const override { + *OS << "succeeded with value ("; + Matcher.DescribeTo(OS); + *OS << ")"; + } + + void DescribeNegationTo(std::ostream *OS) const override { + *OS << "did not succeed or value ("; + Matcher.DescribeNegationTo(OS); + *OS << ")"; + } + +private: + testing::Matcher Matcher; +}; + +template +class ValueMatchesPoly { +public: + explicit ValueMatchesPoly(const M &Matcher) : Matcher(Matcher) {} + + template + operator testing::Matcher &>() const { + return MakeMatcher( + new ValueMatchesMono(testing::SafeMatcherCast(Matcher))); + } + +private: + M Matcher; +}; + } // namespace detail #define EXPECT_THAT_ERROR(Err, Matcher) \ @@ -48,22 +97,11 @@ template ExpectedHolder TakeExpected(Expected &&Exp) { MATCHER(Succeeded, "") { return arg.Success; } MATCHER(Failed, "") { return !arg.Success; } -MATCHER_P(HasValue, value, - "succeeded with value " + testing::PrintToString(value)) { - if (!arg.Success) { - *result_listener << "operation failed"; - return false; - } - - assert(arg.Value.hasValue()); - if (**arg.Value != value) { - *result_listener << "but \"" + testing::PrintToString(**arg.Value) + - "\" != " + testing::PrintToString(value); - return false; - } - - return true; +template +detail::ValueMatchesPoly HasValue(M Matcher) { + return detail::ValueMatchesPoly(Matcher); } + } // namespace llvm #endif diff --git a/include/llvm/Testing/Support/SupportHelpers.h b/include/llvm/Testing/Support/SupportHelpers.h index c4dd414b80db..d7f0c7142b2c 100644 --- a/include/llvm/Testing/Support/SupportHelpers.h +++ b/include/llvm/Testing/Support/SupportHelpers.h @@ -22,7 +22,10 @@ struct ErrorHolder { }; template struct ExpectedHolder : public ErrorHolder { - Optional Value; + ExpectedHolder(ErrorHolder Err, Expected &Exp) + : ErrorHolder(std::move(Err)), Exp(Exp) {} + + Expected &Exp; }; inline void PrintTo(const ErrorHolder &Err, std::ostream *Out) { @@ -35,8 +38,7 @@ inline void PrintTo(const ErrorHolder &Err, std::ostream *Out) { template void PrintTo(const ExpectedHolder &Item, std::ostream *Out) { if (Item.Success) { - *Out << "succeeded with value \"" << ::testing::PrintToString(**Item.Value) - << "\""; + *Out << "succeeded with value " << ::testing::PrintToString(*Item.Exp); } else { PrintTo(static_cast(Item), Out); } diff --git a/include/llvm/Transforms/IPO/AlwaysInliner.h b/include/llvm/Transforms/IPO/AlwaysInliner.h index 15c80357e4a8..b52c0fdbd2c9 100644 --- a/include/llvm/Transforms/IPO/AlwaysInliner.h +++ b/include/llvm/Transforms/IPO/AlwaysInliner.h @@ -27,7 +27,13 @@ namespace llvm { /// be the simplest possible pass to remove always_inline function definitions' /// uses by inlining them. The \c GlobalDCE pass can be used to remove these /// functions once all users are gone. -struct AlwaysInlinerPass : PassInfoMixin { +class AlwaysInlinerPass : public PassInfoMixin { + bool InsertLifetime; + +public: + AlwaysInlinerPass(bool InsertLifetime = true) + : InsertLifetime(InsertLifetime) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; diff --git a/include/llvm/Transforms/IPO/ArgumentPromotion.h b/include/llvm/Transforms/IPO/ArgumentPromotion.h index 82ffc69a166e..49ca6cc73393 100644 --- a/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -22,7 +22,11 @@ namespace llvm { /// transform it and all of its callers to replace indirect arguments with /// direct (by-value) arguments. class ArgumentPromotionPass : public PassInfoMixin { + unsigned MaxElements; + public: + ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); }; diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index 63c73af44e87..39e5b5c8ae6f 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -98,6 +98,15 @@ void ComputeCrossModuleImportForModule( StringRef ModulePath, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList); +/// Mark all external summaries in \p Index for import into the given module. +/// Used for distributed builds using a distributed index. +/// +/// \p ImportList will be populated with a map that can be passed to +/// FunctionImporter::importFunctions() above (see description there). +void ComputeCrossModuleImportForModuleFromIndex( + StringRef ModulePath, const ModuleSummaryIndex &Index, + FunctionImporter::ImportMapTy &ImportList); + /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in /// \p GUIDPreservedSymbols. diff --git a/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h b/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h new file mode 100644 index 000000000000..0b3ba86bc9e4 --- /dev/null +++ b/include/llvm/Transforms/IPO/SyntheticCountsPropagation.h @@ -0,0 +1,19 @@ +#ifndef LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H +#define LLVM_TRANSFORMS_IPO_SYNTHETIC_COUNTS_PROPAGATION_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/ScaledNumber.h" + +namespace llvm { +class Function; +class Module; + +class SyntheticCountsPropagation + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; +} // namespace llvm +#endif diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 0d76328a2f8d..b1e13f17aef1 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -77,9 +77,12 @@ ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); -// Helper function to check if it is legal to promote indirect call \p Inst -// to a direct call of function \p F. Stores the reason in \p Reason. -bool isLegalToPromote(Instruction *Inst, Function *F, const char **Reason); +// The pgo-specific indirect call promotion function declared below is used by +// the pgo-driven indirect call promotion and sample profile passes. It's a +// wrapper around llvm::promoteCall, et al. that additionally computes !prof +// metadata. We place it in a pgo namespace so it's not confused with the +// generic utilities. +namespace pgo { // Helper function that transforms Inst (either an indirect-call instruction, or // an invoke instruction , to a conditional call to F. This is like: @@ -98,6 +101,7 @@ Instruction *promoteIndirectCall(Instruction *Inst, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE); +} // namespace pgo /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { @@ -129,6 +133,8 @@ ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false, FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0, bool Recover = false); +FunctionPass *createHWAddressSanitizerPass(bool Recover = false); + // Insert ThreadSanitizer (race detection) instrumentation FunctionPass *createThreadSanitizerPass(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 07d3d7fc8f6e..49186bc5cd66 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -267,7 +267,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1); // FunctionPass *createCFGSimplificationPass( unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, + bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, std::function Ftor = nullptr); //===----------------------------------------------------------------------===// @@ -521,7 +521,7 @@ FunctionPass *createPlaceSafepointsPass(); // RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have // explicit relocations to include explicit relocations. // -ModulePass *createRewriteStatepointsForGCPass(); +ModulePass *createRewriteStatepointsForGCLegacyPass(); //===----------------------------------------------------------------------===// // diff --git a/include/llvm/Transforms/Scalar/JumpThreading.h b/include/llvm/Transforms/Scalar/JumpThreading.h index a9466713b8e6..b3493a292498 100644 --- a/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/include/llvm/Transforms/Scalar/JumpThreading.h @@ -34,6 +34,7 @@ class BinaryOperator; class BranchInst; class CmpInst; class Constant; +class DeferredDominance; class Function; class Instruction; class IntrinsicInst; @@ -77,6 +78,7 @@ class JumpThreadingPass : public PassInfoMixin { TargetLibraryInfo *TLI; LazyValueInfo *LVI; AliasAnalysis *AA; + DeferredDominance *DDT; std::unique_ptr BFI; std::unique_ptr BPI; bool HasProfileData = false; @@ -107,8 +109,8 @@ class JumpThreadingPass : public PassInfoMixin { // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, - AliasAnalysis *AA_, bool HasProfileData_, - std::unique_ptr BFI_, + AliasAnalysis *AA_, DeferredDominance *DDT_, + bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/include/llvm/Transforms/Scalar/LoopPassManager.h b/include/llvm/Transforms/Scalar/LoopPassManager.h index 473b97dc7e8d..56a45ed34178 100644 --- a/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -264,7 +264,8 @@ template class FunctionToLoopPassAdaptor : public PassInfoMixin> { public: - explicit FunctionToLoopPassAdaptor(LoopPassT Pass) : Pass(std::move(Pass)) { + explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) + : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); } @@ -384,8 +385,8 @@ class FunctionToLoopPassAdaptor /// adaptor. template FunctionToLoopPassAdaptor -createFunctionToLoopPassAdaptor(LoopPassT Pass) { - return FunctionToLoopPassAdaptor(std::move(Pass)); +createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) { + return FunctionToLoopPassAdaptor(std::move(Pass), DebugLogging); } /// \brief Pass for printing a loop's contents as textual IR. diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h index fa87673e3e47..9997dfa5b6f3 100644 --- a/include/llvm/Transforms/Scalar/Reassociate.h +++ b/include/llvm/Transforms/Scalar/Reassociate.h @@ -72,6 +72,13 @@ class ReassociatePass : public PassInfoMixin { DenseMap RankMap; DenseMap, unsigned> ValueRankMap; SetVector> RedoInsts; + + // Arbitrary, but prevents quadratic behavior. + static const unsigned GlobalReassociateLimit = 10; + static const unsigned NumBinaryOps = + Instruction::BinaryOpsEnd - Instruction::BinaryOpsBegin; + DenseMap, unsigned> PairMap[NumBinaryOps]; + bool MadeChange; public: @@ -105,6 +112,7 @@ class ReassociatePass : public PassInfoMixin { SetVector> &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); + void BuildPairMap(ReversePostOrderTraversal &RPOT); }; } // end namespace llvm diff --git a/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h new file mode 100644 index 000000000000..128f176f4420 --- /dev/null +++ b/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h @@ -0,0 +1,39 @@ +//===- RewriteStatepointsForGC.h - ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides interface to "Rewrite Statepoints for GC" pass. +// +// This passe rewrites call/invoke instructions so as to make potential +// relocations performed by the garbage collector explicit in the IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_REWRITE_STATEPOINTS_FOR_GC_H +#define LLVM_TRANSFORMS_SCALAR_REWRITE_STATEPOINTS_FOR_GC_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class DominatorTree; +class Function; +class Module; +class TargetTransformInfo; +class TargetLibraryInfo; + +struct RewriteStatepointsForGC : public PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + bool runOnFunction(Function &F, DominatorTree &, TargetTransformInfo &, + const TargetLibraryInfo &); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_REWRITE_STATEPOINTS_FOR_GC_H diff --git a/include/llvm/Transforms/Scalar/SimplifyCFG.h b/include/llvm/Transforms/Scalar/SimplifyCFG.h index ed6b1b1853b1..1afb9c7f954f 100644 --- a/include/llvm/Transforms/Scalar/SimplifyCFG.h +++ b/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -39,7 +39,8 @@ class SimplifyCFGPass : public PassInfoMixin { : SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(false) .convertSwitchToLookupTable(false) - .needCanonicalLoops(true)) {} + .needCanonicalLoops(true) + .sinkCommonInsts(false)) {} /// Construct a pass with optional optimizations. diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 88873a991d5d..6f0d2deac0a0 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -25,6 +25,9 @@ namespace llvm { +class BlockFrequencyInfo; +class BranchProbabilityInfo; +class DeferredDominance; class DominatorTree; class Function; class Instruction; @@ -36,7 +39,7 @@ class TargetLibraryInfo; class Value; /// Delete the specified block, which must have no predecessors. -void DeleteDeadBlock(BasicBlock *BB); +void DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT = nullptr); /// We know that BB has one predecessor. If there are any single-entry PHI nodes /// in it, fold them away. This handles the case when all entries to the PHI @@ -283,6 +286,29 @@ void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse); +// Split critical edges where the source of the edge is an indirectbr +// instruction. This isn't always possible, but we can handle some easy cases. +// This is useful because MI is unable to split such critical edges, +// which means it will not be able to sink instructions along those edges. +// This is especially painful for indirect branches with many successors, where +// we end up having to prepare all outgoing values in the origin block. +// +// Our normal algorithm for splitting critical edges requires us to update +// the outgoing edges of the edge origin block, but for an indirectbr this +// is hard, since it would require finding and updating the block addresses +// the indirect branch uses. But if a block only has a single indirectbr +// predecessor, with the others being regular branches, we can do it in a +// different way. +// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr. +// We can split D into D0 and D1, where D0 contains only the PHIs from D, +// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and +// create the following structure: +// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 +// If BPI and BFI aren't non-null, BPI/BFI will be updated accordingly. +bool SplitIndirectBrCriticalEdges(Function &F, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index a067a685b837..3a71559a93fe 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -15,6 +15,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_BUILDLIBCALLS_H #define LLVM_TRANSFORMS_UTILS_BUILDLIBCALLS_H +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" namespace llvm { @@ -29,6 +30,12 @@ namespace llvm { /// Returns true if any attributes were set and false otherwise. bool inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI); + /// Check whether the overloaded unary floating point function + /// corresponding to \a Ty is available. + bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn); + /// Return V if it is an i8*, otherwise cast it to i8*. Value *castToCStr(Value *V, IRBuilder<> &B); diff --git a/include/llvm/Transforms/Utils/CallPromotionUtils.h b/include/llvm/Transforms/Utils/CallPromotionUtils.h new file mode 100644 index 000000000000..6e8ece723638 --- /dev/null +++ b/include/llvm/Transforms/Utils/CallPromotionUtils.h @@ -0,0 +1,54 @@ +//===- CallPromotionUtils.h - Utilities for call promotion ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H +#define LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H + +#include "llvm/IR/CallSite.h" + +namespace llvm { + +/// Return true if the given indirect call site can be made to call \p Callee. +/// +/// This function ensures that the number and type of the call site's arguments +/// and return value match those of the given function. If the types do not +/// match exactly, they must at least be bitcast compatible. If \p FailureReason +/// is non-null and the indirect call cannot be promoted, the failure reason +/// will be stored in it. +bool isLegalToPromote(CallSite CS, Function *Callee, + const char **FailureReason = nullptr); + +/// Promote the given indirect call site to unconditionally call \p Callee. +/// +/// This function promotes the given call site, returning the direct call or +/// invoke instruction. If the function type of the call site doesn't match that +/// of the callee, bitcast instructions are inserted where appropriate. If \p +/// RetBitCast is non-null, it will be used to store the return value bitcast, +/// if created. +Instruction *promoteCall(CallSite CS, Function *Callee, + CastInst **RetBitCast = nullptr); + +/// Promote the given indirect call site to conditionally call \p Callee. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The original call site is moved into the "else" block. A clone of the +/// indirect call site is promoted, placed in the "then" block, and returned. If +/// \p BranchWeights is non-null, it will be used to set !prof metadata on the +/// new conditional branch. +Instruction *promoteCallWithIfThenElse(CallSite CS, Function *Callee, + MDNode *BranchWeights = nullptr); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_CALLPROMOTIONUTILS_H diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 8d54ef3436aa..153af700447b 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -63,16 +63,20 @@ struct SimplifyCFGOptions { bool ForwardSwitchCondToPhi; bool ConvertSwitchToLookupTable; bool NeedCanonicalLoop; + bool SinkCommonInsts; AssumptionCache *AC; SimplifyCFGOptions(unsigned BonusThreshold = 1, bool ForwardSwitchCond = false, bool SwitchToLookup = false, bool CanonicalLoops = true, + bool SinkCommon = false, AssumptionCache *AssumpCache = nullptr) : BonusInstThreshold(BonusThreshold), ForwardSwitchCondToPhi(ForwardSwitchCond), ConvertSwitchToLookupTable(SwitchToLookup), - NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {} + NeedCanonicalLoop(CanonicalLoops), + SinkCommonInsts(SinkCommon), + AC(AssumpCache) {} // Support 'builder' pattern to set members by name at construction time. SimplifyCFGOptions &bonusInstThreshold(int I) { @@ -91,6 +95,10 @@ struct SimplifyCFGOptions { NeedCanonicalLoop = B; return *this; } + SimplifyCFGOptions &sinkCommonInsts(bool B) { + SinkCommonInsts = B; + return *this; + } SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { AC = Cache; return *this; @@ -109,7 +117,8 @@ struct SimplifyCFGOptions { /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false, - const TargetLibraryInfo *TLI = nullptr); + const TargetLibraryInfo *TLI = nullptr, + DeferredDominance *DDT = nullptr); //===----------------------------------------------------------------------===// // Local dead code elimination. @@ -163,18 +172,21 @@ bool SimplifyInstructionsInBlock(BasicBlock *BB, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the 'and' to 0. -void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred); +void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DeferredDominance *DDT = nullptr); /// BB is a block with one predecessor and its predecessor is known to have one /// successor (BB!). Eliminate the edge between them, moving the instructions in /// the predecessor into BB. This deletes the predecessor block. -void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr); +void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, DominatorTree *DT = nullptr, + DeferredDominance *DDT = nullptr); /// BB is known to contain an unconditional branch, and contains no instructions /// other than PHI nodes, potential debug intrinsics and the branch. If /// possible, eliminate BB by rewriting all the predecessors to branch to the /// successor block and return true. If we can't transform, return false. -bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB); +bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DeferredDominance *DDT = nullptr); /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try /// to be clever about PHI nodes which differ only in the order of the incoming @@ -335,22 +347,27 @@ TinyPtrVector FindDbgAddrUses(Value *V); /// Finds the llvm.dbg.value intrinsics describing a value. void findDbgValues(SmallVectorImpl &DbgValues, Value *V); -/// Replaces llvm.dbg.declare instruction when the address it describes -/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is -/// prepended to the expression. If Offset is non-zero, a constant displacement -/// is added to the expression (after the optional Deref). Offset can be -/// negative. +/// Finds the debug info intrinsics describing a value. +void findDbgUsers(SmallVectorImpl &DbgInsts, Value *V); + +/// Replaces llvm.dbg.declare instruction when the address it +/// describes is replaced with a new value. If Deref is true, an +/// additional DW_OP_deref is prepended to the expression. If Offset +/// is non-zero, a constant displacement is added to the expression +/// (between the optional Deref operations). Offset can be negative. bool replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, - bool Deref, int Offset); + bool DerefBefore, int Offset, bool DerefAfter); /// Replaces llvm.dbg.declare instruction when the alloca it describes -/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is -/// prepended to the expression. If Offset is non-zero, a constant displacement -/// is added to the expression (after the optional Deref). Offset can be -/// negative. New llvm.dbg.declare is inserted immediately before AI. +/// is replaced with a new value. If Deref is true, an additional +/// DW_OP_deref is prepended to the expression. If Offset is non-zero, +/// a constant displacement is added to the expression (between the +/// optional Deref operations). Offset can be negative. The new +/// llvm.dbg.declare is inserted immediately before AI. bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref, int Offset = 0); + DIBuilder &Builder, bool DerefBefore, + int Offset, bool DerefAfter); /// Replaces multiple llvm.dbg.value instructions when the alloca it describes /// is replaced with a new value. If Offset is non-zero, a constant displacement @@ -372,7 +389,8 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB); /// Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA = false); + bool PreserveLCSSA = false, + DeferredDominance *DDT = nullptr); /// Convert the CallInst to InvokeInst with the specified unwind edge basic /// block. This also splits the basic block where CI is located, because @@ -387,12 +405,13 @@ BasicBlock *changeToInvokeAndSplitBasicBlock(CallInst *CI, /// /// \param BB Block whose terminator will be replaced. Its terminator must /// have an unwind successor. -void removeUnwindEdge(BasicBlock *BB); +void removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT = nullptr); /// Remove all blocks that can not be reached from the function's entry. /// /// Returns true if any basic block was removed. -bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr); +bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr, + DeferredDominance *DDT = nullptr); /// Combine the metadata of two instructions so that K can replace J /// @@ -445,7 +464,7 @@ void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, // Intrinsic pattern matching // -/// Try and match a bswap or bitreverse idiom. +/// Try to match a bswap or bitreverse idiom. /// /// If an idiom is matched, an intrinsic call is inserted before \c I. Any added /// instructions are returned in \c InsertedInsts. They will all have been added diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index a59b188f8d6c..750666136507 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -306,10 +306,13 @@ class InductionDescriptor { /// induction, the induction descriptor \p D will contain the data describing /// this induction. If by some other means the caller has a better SCEV /// expression for \p Phi than the one returned by the ScalarEvolution - /// analysis, it can be passed through \p Expr. - static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr = nullptr); + /// analysis, it can be passed through \p Expr. If the def-use chain + /// associated with the phi includes casts (that we know we can ignore + /// under proper runtime checks), they are passed through \p CastsToIgnore. + static bool + isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr = nullptr, + SmallVectorImpl *CastsToIgnore = nullptr); /// Returns true if \p Phi is a floating point induction in the loop \p L. /// If \p Phi is an induction, the induction descriptor \p D will contain @@ -348,10 +351,18 @@ class InductionDescriptor { Instruction::BinaryOpsEnd; } + /// Returns a reference to the type cast instructions in the induction + /// update chain, that are redundant when guarded with a runtime + /// SCEV overflow check. + const SmallVectorImpl &getCastInsts() const { + return RedundantCasts; + } + private: /// Private constructor - used by \c isInductionPHI. InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, - BinaryOperator *InductionBinOp = nullptr); + BinaryOperator *InductionBinOp = nullptr, + SmallVectorImpl *Casts = nullptr); /// Start value. TrackingVH StartValue; @@ -361,6 +372,9 @@ class InductionDescriptor { const SCEV *Step = nullptr; // Instruction that advances induction variable. BinaryOperator *InductionBinOp = nullptr; + // Instructions used for type-casts of the induction variable, + // that are redundant when guarded with a runtime SCEV overflow check. + SmallVector RedundantCasts; }; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, @@ -422,8 +436,9 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + AliasSetTracker *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth diff --git a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h index 4554b5cbc644..2b7d0f67a324 100644 --- a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -25,12 +25,6 @@ class MemSetInst; class TargetTransformInfo; class Value; -/// Emit a loop implementing the semantics of llvm.memcpy with the equivalent -/// arguments at \p InsertBefore. -void createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, - Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, - bool SrcIsVolatile, bool DstIsVolatile); - /// Emit a loop implementing the semantics of llvm.memcpy where the size is not /// a compile-time constant. Loop will be insterted at \p InsertBefore. void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index b1611d49a456..3c8bd1724e62 100644 --- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -389,12 +389,8 @@ class SSAUpdaterImpl { /// FindExistingPHI - Look through the PHI nodes in a block to see if any of /// them match what is needed. void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) { - for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end(); - BBI != BBE; ++BBI) { - PhiT *SomePHI = Traits::InstrIsPHI(&*BBI); - if (!SomePHI) - break; - if (CheckIfPHIMatches(SomePHI)) { + for (auto &SomePHI : BB->phis()) { + if (CheckIfPHIMatches(&SomePHI)) { RecordMatchingPHIs(BlockList); break; } diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index c8a373433212..73a62f59203b 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -129,6 +129,7 @@ class LibCallSimplifier { Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); // Math Library Optimizations + Value *optimizeCAbs(CallInst *CI, IRBuilder<> &B); Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B); diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 06f6fa11a943..781a628a0974 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -95,11 +95,8 @@ struct SLPVectorizerPass : public PassInfoMixin { bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); /// \brief Try to vectorize a list of operands. - /// \@param BuildVector A list of users to ignore for the purpose of - /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef VL, slpvectorizer::BoUpSLP &R, - ArrayRef BuildVector = None, bool AllowReorder = false); /// \brief Try to vectorize a chain that may start at the operands of \p I. diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index 3c94883e7d1a..d8b07c4f54da 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -25,6 +25,7 @@ module LLVM_Backend { exclude header "CodeGen/LinkAllCodegenComponents.h" // These are intended for (repeated) textual inclusion. + textual header "CodeGen/CommandFlags.def" textual header "CodeGen/DIEValue.def" textual header "CodeGen/RuntimeLibcalls.def" textual header "CodeGen/TargetOpcodes.def" @@ -60,7 +61,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/ELFRelocs/SystemZ.def" textual header "BinaryFormat/ELFRelocs/x86_64.def" textual header "BinaryFormat/ELFRelocs/WebAssembly.def" - textual header "BinaryFormat/WasmRelocs/WebAssembly.def" + textual header "BinaryFormat/WasmRelocs.def" } module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } } @@ -228,8 +229,7 @@ module LLVM_MC { umbrella "MC" module * { export * } - // Exclude this; it's fundamentally non-modular. - exclude header "MC/MCTargetOptionsCommandFlags.h" + textual header "MC/MCTargetOptionsCommandFlags.def" } // Used by llvm-tblgen diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 897f89d31148..5f9fdb060e42 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -119,49 +119,50 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, } ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx)); + Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) - return Result; + if (isNoModRef(Result)) + return ModRefInfo::NoModRef; } return Result; } ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) { - // We may have two calls + // We may have two calls. if (auto CS = ImmutableCallSite(I)) { - // Check if the two calls modify the same memory + // Check if the two calls modify the same memory. return getModRefInfo(CS, Call); } else if (I->isFenceLike()) { - // If this is a fence, just return MRI_ModRef. - return MRI_ModRef; + // If this is a fence, just return ModRef. + return ModRefInfo::ModRef; } else { // Otherwise, check if the call modifies or references the // location this memory access defines. The best we can say // is that if the call references what this instruction // defines, it must be clobbered by this location. const MemoryLocation DefLoc = MemoryLocation::get(I); - if (getModRefInfo(Call, DefLoc) != MRI_NoModRef) - return MRI_ModRef; + ModRefInfo MR = getModRefInfo(Call, DefLoc); + if (isModOrRefSet(MR)) + return setModAndRef(MR); } - return MRI_NoModRef; + return ModRefInfo::NoModRef; } ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc)); + Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) - return Result; + if (isNoModRef(Result)) + return ModRefInfo::NoModRef; } // Try to refine the mod-ref info further using other API entry points to the @@ -169,16 +170,17 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, auto MRB = getModRefBehavior(CS); if (MRB == FMRB_DoesNotAccessMemory || MRB == FMRB_OnlyAccessesInaccessibleMem) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (onlyReadsMemory(MRB)) - Result = ModRefInfo(Result & MRI_Ref); + Result = clearMod(Result); else if (doesNotReadMemory(MRB)) - Result = ModRefInfo(Result & MRI_Mod); + Result = clearRef(Result); if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) { bool DoesAlias = false; - ModRefInfo AllArgsMask = MRI_NoModRef; + bool IsMustAlias = true; + ModRefInfo AllArgsMask = ModRefInfo::NoModRef; if (doesAccessArgPointees(MRB)) { for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { const Value *Arg = *AI; @@ -190,34 +192,39 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, if (ArgAlias != NoAlias) { ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx); DoesAlias = true; - AllArgsMask = ModRefInfo(AllArgsMask | ArgMask); + AllArgsMask = unionModRef(AllArgsMask, ArgMask); } + // Conservatively clear IsMustAlias unless only MustAlias is found. + IsMustAlias &= (ArgAlias == MustAlias); } } + // Return NoModRef if no alias found with any argument. if (!DoesAlias) - return MRI_NoModRef; - Result = ModRefInfo(Result & AllArgsMask); + return ModRefInfo::NoModRef; + // Logical & between other AA analyses and argument analysis. + Result = intersectModRef(Result, AllArgsMask); + // If only MustAlias found above, set Must bit. + Result = IsMustAlias ? setMust(Result) : clearMust(Result); } // If Loc is a constant memory location, the call definitely could not // modify the memory location. - if ((Result & MRI_Mod) && - pointsToConstantMemory(Loc, /*OrLocal*/ false)) - Result = ModRefInfo(Result & ~MRI_Mod); + if (isModSet(Result) && pointsToConstantMemory(Loc, /*OrLocal*/ false)) + Result = clearMod(Result); return Result; } ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - ModRefInfo Result = MRI_ModRef; + ModRefInfo Result = ModRefInfo::ModRef; for (const auto &AA : AAs) { - Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2)); + Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2)); // Early-exit the moment we reach the bottom of the lattice. - if (Result == MRI_NoModRef) - return Result; + if (isNoModRef(Result)) + return ModRefInfo::NoModRef; } // Try to refine the mod-ref info further using other API entry points to the @@ -226,82 +233,112 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, // If CS1 or CS2 are readnone, they don't interact. auto CS1B = getModRefBehavior(CS1); if (CS1B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; + return ModRefInfo::NoModRef; auto CS2B = getModRefBehavior(CS2); if (CS2B == FMRB_DoesNotAccessMemory) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If they both only read from memory, there is no dependence. if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If CS1 only reads memory, the only dependence on CS2 can be // from CS1 reading memory written by CS2. if (onlyReadsMemory(CS1B)) - Result = ModRefInfo(Result & MRI_Ref); + Result = clearMod(Result); else if (doesNotReadMemory(CS1B)) - Result = ModRefInfo(Result & MRI_Mod); + Result = clearRef(Result); // If CS2 only access memory through arguments, accumulate the mod/ref // information from CS1's references to the memory referenced by // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { - ModRefInfo R = MRI_NoModRef; - if (doesAccessArgPointees(CS2B)) { - for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); - auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); - - // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence - // of CS1 on that location is the inverse. - ModRefInfo ArgMask = getArgModRefInfo(CS2, CS2ArgIdx); - if (ArgMask == MRI_Mod) - ArgMask = MRI_ModRef; - else if (ArgMask == MRI_Ref) - ArgMask = MRI_Mod; - - ArgMask = ModRefInfo(ArgMask & getModRefInfo(CS1, CS2ArgLoc)); - - R = ModRefInfo((R | ArgMask) & Result); - if (R == Result) - break; + if (!doesAccessArgPointees(CS2B)) + return ModRefInfo::NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; + bool IsMustAlias = true; + for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); + auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI); + + // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the + // dependence of CS1 on that location is the inverse: + // - If CS2 modifies location, dependence exists if CS1 reads or writes. + // - If CS2 only reads location, dependence exists if CS1 writes. + ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx); + ModRefInfo ArgMask = ModRefInfo::NoModRef; + if (isModSet(ArgModRefCS2)) + ArgMask = ModRefInfo::ModRef; + else if (isRefSet(ArgModRefCS2)) + ArgMask = ModRefInfo::Mod; + + // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use + // above ArgMask to update dependence info. + ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc); + ArgMask = intersectModRef(ArgMask, ModRefCS1); + + // Conservatively clear IsMustAlias unless only MustAlias is found. + IsMustAlias &= isMustSet(ModRefCS1); + + R = intersectModRef(unionModRef(R, ArgMask), Result); + if (R == Result) { + // On early exit, not all args were checked, cannot set Must. + if (I + 1 != E) + IsMustAlias = false; + break; } } - return R; + + if (isNoModRef(R)) + return ModRefInfo::NoModRef; + + // If MustAlias found above, set Must bit. + return IsMustAlias ? setMust(R) : clearMust(R); } // If CS1 only accesses memory through arguments, check if CS2 references // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { - ModRefInfo R = MRI_NoModRef; - if (doesAccessArgPointees(CS1B)) { - for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { - const Value *Arg = *I; - if (!Arg->getType()->isPointerTy()) - continue; - unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); - auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); - - // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod - // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 - // might Ref, then we care only about a Mod by CS2. - ModRefInfo ArgMask = getArgModRefInfo(CS1, CS1ArgIdx); - ModRefInfo ArgR = getModRefInfo(CS2, CS1ArgLoc); - if (((ArgMask & MRI_Mod) != MRI_NoModRef && - (ArgR & MRI_ModRef) != MRI_NoModRef) || - ((ArgMask & MRI_Ref) != MRI_NoModRef && - (ArgR & MRI_Mod) != MRI_NoModRef)) - R = ModRefInfo((R | ArgMask) & Result); - - if (R == Result) - break; + if (!doesAccessArgPointees(CS1B)) + return ModRefInfo::NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; + bool IsMustAlias = true; + for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); + auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI); + + // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might + // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If + // CS1 might Ref, then we care only about a Mod by CS2. + ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx); + ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc); + if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) || + (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2))) + R = intersectModRef(unionModRef(R, ArgModRefCS1), Result); + + // Conservatively clear IsMustAlias unless only MustAlias is found. + IsMustAlias &= isMustSet(ModRefCS2); + + if (R == Result) { + // On early exit, not all args were checked, cannot set Must. + if (I + 1 != E) + IsMustAlias = false; + break; } } - return R; + + if (isNoModRef(R)) + return ModRefInfo::NoModRef; + + // If MustAlias found above, set Must bit. + return IsMustAlias ? setMust(R) : clearMust(R); } return Result; @@ -343,63 +380,77 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { // Be conservative in the face of atomic. if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered)) - return MRI_ModRef; + return ModRefInfo::ModRef; // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) - return MRI_NoModRef; - + if (Loc.Ptr) { + AliasResult AR = alias(MemoryLocation::get(L), Loc); + if (AR == NoAlias) + return ModRefInfo::NoModRef; + if (AR == MustAlias) + return ModRefInfo::MustRef; + } // Otherwise, a load just reads. - return MRI_Ref; + return ModRefInfo::Ref; } ModRefInfo AAResults::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { // Be conservative in the face of atomic. if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered)) - return MRI_ModRef; + return ModRefInfo::ModRef; if (Loc.Ptr) { + AliasResult AR = alias(MemoryLocation::get(S), Loc); // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. - if (!alias(MemoryLocation::get(S), Loc)) - return MRI_NoModRef; + if (AR == NoAlias) + return ModRefInfo::NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this store. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; + + // If the store address aliases the pointer as must alias, set Must. + if (AR == MustAlias) + return ModRefInfo::MustMod; } // Otherwise, a store just writes. - return MRI_Mod; + return ModRefInfo::Mod; } ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { // If we know that the location is a constant memory location, the fence // cannot modify this location. if (Loc.Ptr && pointsToConstantMemory(Loc)) - return MRI_Ref; - return MRI_ModRef; + return ModRefInfo::Ref; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { if (Loc.Ptr) { + AliasResult AR = alias(MemoryLocation::get(V), Loc); // If the va_arg address cannot alias the pointer in question, then the // specified memory cannot be accessed by the va_arg. - if (!alias(MemoryLocation::get(V), Loc)) - return MRI_NoModRef; + if (AR == NoAlias) + return ModRefInfo::NoModRef; // If the pointer is a pointer to constant memory, then it could not have // been modified by this va_arg. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; + + // If the va_arg aliases the pointer as must alias, set Must. + if (AR == MustAlias) + return ModRefInfo::MustModRef; } // Otherwise, a va_arg reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, @@ -408,11 +459,11 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a catchpad reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, @@ -421,42 +472,58 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, // If the pointer is a pointer to constant memory, // then it could not have been modified by this catchpad. if (pointsToConstantMemory(Loc)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // Otherwise, a catchret reads and writes. - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, const MemoryLocation &Loc) { // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(CX->getSuccessOrdering())) - return MRI_ModRef; + return ModRefInfo::ModRef; - // If the cmpxchg address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) - return MRI_NoModRef; + if (Loc.Ptr) { + AliasResult AR = alias(MemoryLocation::get(CX), Loc); + // If the cmpxchg address does not alias the location, it does not access + // it. + if (AR == NoAlias) + return ModRefInfo::NoModRef; + + // If the cmpxchg address aliases the pointer as must alias, set Must. + if (AR == MustAlias) + return ModRefInfo::MustModRef; + } - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc) { // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. if (isStrongerThanMonotonic(RMW->getOrdering())) - return MRI_ModRef; + return ModRefInfo::ModRef; - // If the atomicrmw address does not alias the location, it does not access it. - if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) - return MRI_NoModRef; + if (Loc.Ptr) { + AliasResult AR = alias(MemoryLocation::get(RMW), Loc); + // If the atomicrmw address does not alias the location, it does not access + // it. + if (AR == NoAlias) + return ModRefInfo::NoModRef; + + // If the atomicrmw address aliases the pointer as must alias, set Must. + if (AR == MustAlias) + return ModRefInfo::MustModRef; + } - return MRI_ModRef; + return ModRefInfo::ModRef; } /// \brief Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I -/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up +/// in a BasicBlock. An ordered basic block \p OBB can be used to speed up /// instruction-ordering queries inside the BasicBlock containing \p I. /// FIXME: this is really just shoring-up a deficiency in alias analysis. /// BasicAA isn't willing to spend linear time determining whether an alloca @@ -467,26 +534,28 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, DominatorTree *DT, OrderedBasicBlock *OBB) { if (!DT) - return MRI_ModRef; + return ModRefInfo::ModRef; const Value *Object = GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout()); if (!isIdentifiedObject(Object) || isa(Object) || isa(Object)) - return MRI_ModRef; + return ModRefInfo::ModRef; ImmutableCallSite CS(I); if (!CS.getInstruction() || CS.getInstruction() == Object) - return MRI_ModRef; + return ModRefInfo::ModRef; if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, /* StoreCaptures */ true, I, DT, /* include Object */ true, /* OrderedBasicBlock */ OBB)) - return MRI_ModRef; + return ModRefInfo::ModRef; unsigned ArgNo = 0; - ModRefInfo R = MRI_NoModRef; + ModRefInfo R = ModRefInfo::NoModRef; + bool MustAlias = true; + // Set flag only if no May found and all operands processed. for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -497,21 +566,25 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo))) continue; + AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object)); // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) + if (AR != MustAlias) + MustAlias = false; + if (AR == NoAlias) continue; if (CS.doesNotAccessMemory(ArgNo)) continue; if (CS.onlyReadsMemory(ArgNo)) { - R = MRI_Ref; + R = ModRefInfo::Ref; continue; } - return MRI_ModRef; + // Not returning MustModRef since we have not seen all the arguments. + return ModRefInfo::ModRef; } - return R; + return MustAlias ? setMust(R) : clearMust(R); } /// canBasicBlockModify - Return true if it is possible for execution of the @@ -519,7 +592,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I, /// bool AAResults::canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc) { - return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod); + return canInstructionRangeModRef(BB.front(), BB.back(), Loc, ModRefInfo::Mod); } /// canInstructionRangeModRef - Return true if it is possible for the @@ -538,7 +611,7 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1, ++E; // Convert from inclusive to exclusive range. for (; I != E; ++I) // Check every instruction in range - if (getModRefInfo(&*I, Loc) & Mode) + if (isModOrRefSet(intersectModRef(getModRefInfo(&*I, Loc), Mode))) return true; return false; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 435c782d97a5..f737cecc43d1 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -31,9 +31,13 @@ static cl::opt PrintPartialAlias("print-partial-aliases", cl::ReallyHidden static cl::opt PrintMustAlias("print-must-aliases", cl::ReallyHidden); static cl::opt PrintNoModRef("print-no-modref", cl::ReallyHidden); -static cl::opt PrintMod("print-mod", cl::ReallyHidden); static cl::opt PrintRef("print-ref", cl::ReallyHidden); +static cl::opt PrintMod("print-mod", cl::ReallyHidden); static cl::opt PrintModRef("print-modref", cl::ReallyHidden); +static cl::opt PrintMust("print-must", cl::ReallyHidden); +static cl::opt PrintMustRef("print-mustref", cl::ReallyHidden); +static cl::opt PrintMustMod("print-mustmod", cl::ReallyHidden); +static cl::opt PrintMustModRef("print-mustmodref", cl::ReallyHidden); static cl::opt EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden); @@ -244,24 +248,43 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(C, Pointer, Size)) { - case MRI_NoModRef: + case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer, F.getParent()); ++NoModRefCount; break; - case MRI_Mod: + case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent()); ++ModCount; break; - case MRI_Ref: + case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent()); ++RefCount; break; - case MRI_ModRef: + case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, I, Pointer, F.getParent()); ++ModRefCount; break; + case ModRefInfo::Must: + PrintModRefResults("Must", PrintMust, I, Pointer, F.getParent()); + ++MustCount; + break; + case ModRefInfo::MustMod: + PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, I, Pointer, + F.getParent()); + ++MustModCount; + break; + case ModRefInfo::MustRef: + PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, I, Pointer, + F.getParent()); + ++MustRefCount; + break; + case ModRefInfo::MustModRef: + PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, I, + Pointer, F.getParent()); + ++MustModRefCount; + break; } } } @@ -272,22 +295,41 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) { if (D == C) continue; switch (AA.getModRefInfo(*C, *D)) { - case MRI_NoModRef: + case ModRefInfo::NoModRef: PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); ++NoModRefCount; break; - case MRI_Mod: + case ModRefInfo::Mod: PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); ++ModCount; break; - case MRI_Ref: + case ModRefInfo::Ref: PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); ++RefCount; break; - case MRI_ModRef: + case ModRefInfo::ModRef: PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); ++ModRefCount; break; + case ModRefInfo::Must: + PrintModRefResults("Must", PrintMust, *C, *D, F.getParent()); + ++MustCount; + break; + case ModRefInfo::MustMod: + PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, *C, *D, + F.getParent()); + ++MustModCount; + break; + case ModRefInfo::MustRef: + PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, *C, *D, + F.getParent()); + ++MustRefCount; + break; + case ModRefInfo::MustModRef: + PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, *C, *D, + F.getParent()); + ++MustModRefCount; + break; } } } @@ -325,7 +367,8 @@ AAEvaluator::~AAEvaluator() { } // Display the summary for mod/ref analysis - int64_t ModRefSum = NoModRefCount + ModCount + RefCount + ModRefCount; + int64_t ModRefSum = NoModRefCount + RefCount + ModCount + ModRefCount + + MustCount + MustRefCount + MustModCount + MustModRefCount; if (ModRefSum == 0) { errs() << " Alias Analysis Mod/Ref Evaluator Summary: no " "mod/ref!\n"; @@ -339,10 +382,22 @@ AAEvaluator::~AAEvaluator() { PrintPercent(RefCount, ModRefSum); errs() << " " << ModRefCount << " mod & ref responses "; PrintPercent(ModRefCount, ModRefSum); + errs() << " " << MustCount << " must responses "; + PrintPercent(MustCount, ModRefSum); + errs() << " " << MustModCount << " must mod responses "; + PrintPercent(MustModCount, ModRefSum); + errs() << " " << MustRefCount << " must ref responses "; + PrintPercent(MustRefCount, ModRefSum); + errs() << " " << MustModRefCount << " must mod & ref responses "; + PrintPercent(MustModRefCount, ModRefSum); errs() << " Alias Analysis Evaluator Mod/Ref Summary: " << NoModRefCount * 100 / ModRefSum << "%/" << ModCount * 100 / ModRefSum << "%/" << RefCount * 100 / ModRefSum - << "%/" << ModRefCount * 100 / ModRefSum << "%\n"; + << "%/" << ModRefCount * 100 / ModRefSum << "%/" + << MustCount * 100 / ModRefSum << "%/" + << MustRefCount * 100 / ModRefSum << "%/" + << MustModCount * 100 / ModRefSum << "%/" + << MustModRefCount * 100 / ModRefSum << "%\n"; } } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index b575944092a9..c88e0dd7dc44 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -211,8 +211,8 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) if (auto *Inst = getUnknownInst(i)) - if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) != - MRI_NoModRef) + if (isModOrRefSet( + AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)))) return true; } @@ -231,15 +231,15 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst, for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (auto *UnknownInst = getUnknownInst(i)) { ImmutableCallSite C1(UnknownInst), C2(Inst); - if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || - AA.getModRefInfo(C2, C1) != MRI_NoModRef) + if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) || + isModOrRefSet(AA.getModRefInfo(C2, C1))) return true; } } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(), - I.getAAInfo())) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo( + Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))) return true; return false; @@ -572,12 +572,11 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() { AliasAnyAS->AliasAny = true; for (auto Cur : ASVector) { - // If Cur was already forwarding, just forward to the new AS instead. AliasSet *FwdTo = Cur->Forward; if (FwdTo) { Cur->Forward = AliasAnyAS; - AliasAnyAS->addRef(); + AliasAnyAS->addRef(); FwdTo->dropRef(*this); continue; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index fb9ece2bd206..142589b68f80 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -285,6 +285,19 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); + + // We're trying to linearize an expression of the kind: + // shl i8 -128, 36 + // where the shift count exceeds the bitwidth of the type. + // We can't decompose this further (the expression would return + // a poison value). + if (Offset.getBitWidth() < RHS.getLimitedValue() || + Scale.getBitWidth() < RHS.getLimitedValue()) { + Scale = 1; + Offset = 0; + return V; + } + Offset <<= RHS.getLimitedValue(); Scale <<= RHS.getLimitedValue(); // the semantics of nsw and nuw for left shifts don't match those of @@ -490,6 +503,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V, Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, SExtBits, DL, 0, AC, DT, NSW, NUW); + // All GEP math happens in the width of the pointer type, + // so we can truncate the value to 64-bits as we don't handle + // currently pointers larger than 64 bits and we would crash + // later. TODO: Make `Scale` an APInt to avoid this problem. + if (IndexScale.getBitWidth() > 64) + IndexScale = IndexScale.sextOrTrunc(64); + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale; @@ -687,13 +707,13 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { // Checking for known builtin intrinsics and target library functions. if (isWriteOnlyParam(CS, ArgIdx, TLI)) - return MRI_Mod; + return ModRefInfo::Mod; if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly)) - return MRI_Ref; + return ModRefInfo::Ref; if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getArgModRefInfo(CS, ArgIdx); } @@ -770,7 +790,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, if (isa(Object)) if (const CallInst *CI = dyn_cast(CS.getInstruction())) if (CI->isTailCall()) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer @@ -780,7 +800,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // Optimistically assume that call doesn't touch Object and check this // assumption in the following loop. - ModRefInfo Result = MRI_NoModRef; + ModRefInfo Result = ModRefInfo::NoModRef; + bool IsMustAlias = true; unsigned OperandNo = 0; for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end(); @@ -802,29 +823,40 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // is impossible to alias the pointer we're checking. AliasResult AR = getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); - + if (AR != MustAlias) + IsMustAlias = false; // Operand doesnt alias 'Object', continue looking for other aliases if (AR == NoAlias) continue; // Operand aliases 'Object', but call doesn't modify it. Strengthen // initial assumption and keep looking in case if there are more aliases. if (CS.onlyReadsMemory(OperandNo)) { - Result = static_cast(Result | MRI_Ref); + Result = setRef(Result); continue; } // Operand aliases 'Object' but call only writes into it. if (CS.doesNotReadMemory(OperandNo)) { - Result = static_cast(Result | MRI_Mod); + Result = setMod(Result); continue; } // This operand aliases 'Object' and call reads and writes into it. - Result = MRI_ModRef; + // Setting ModRef will not yield an early return below, MustAlias is not + // used further. + Result = ModRefInfo::ModRef; break; } + // No operand aliases, reset Must bit. Add below if at least one aliases + // and all aliases found are MustAlias. + if (isNoModRef(Result)) + IsMustAlias = false; + // Early return if we improved mod ref information - if (Result != MRI_ModRef) - return Result; + if (!isModAndRefSet(Result)) { + if (isNoModRef(Result)) + return ModRefInfo::NoModRef; + return IsMustAlias ? setMust(Result) : clearMust(Result); + } } // If the CallSite is to malloc or calloc, we can assume that it doesn't @@ -832,13 +864,13 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // routines do not read values visible in the IR. TODO: Consider special // casing realloc and strdup routines which access only their arguments as // well. Or alternatively, replace all of this with inaccessiblememonly once - // that's implemented fully. + // that's implemented fully. auto *Inst = CS.getInstruction(); if (isMallocOrCallocLikeFn(Inst, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias) - return MRI_NoModRef; + return ModRefInfo::NoModRef; } // The semantics of memcpy intrinsics forbid overlap between their respective @@ -851,18 +883,18 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst), Loc)) == MustAlias) // Loc is exactly the memcpy source thus disjoint from memcpy dest. - return MRI_Ref; + return ModRefInfo::Ref; if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc)) == MustAlias) // The converse case. - return MRI_Mod; + return ModRefInfo::Mod; // It's also possible for Loc to alias both src and dest, or neither. - ModRefInfo rv = MRI_NoModRef; + ModRefInfo rv = ModRefInfo::NoModRef; if (SrcAA != NoAlias) - rv = static_cast(rv | MRI_Ref); + rv = setRef(rv); if (DestAA != NoAlias) - rv = static_cast(rv | MRI_Mod); + rv = setMod(rv); return rv; } @@ -870,7 +902,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // proper control dependencies will be maintained, it never aliases any // particular memory location. if (isIntrinsicCall(CS, Intrinsic::assume)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // Like assumes, guard intrinsics are also marked as arbitrarily writing so // that proper control dependencies are maintained but they never mods any @@ -880,7 +912,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // heap state at the point the guard is issued needs to be consistent in case // the guard invokes the "deopt" continuation. if (isIntrinsicCall(CS, Intrinsic::experimental_guard)) - return MRI_Ref; + return ModRefInfo::Ref; // Like assumes, invariant.start intrinsics were also marked as arbitrarily // writing so that proper control dependencies are maintained but they never @@ -906,7 +938,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // rules of invariant.start) and print 40, while the first program always // prints 50. if (isIntrinsicCall(CS, Intrinsic::invariant_start)) - return MRI_Ref; + return ModRefInfo::Ref; // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS, Loc); @@ -919,7 +951,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, // particular memory location. if (isIntrinsicCall(CS1, Intrinsic::assume) || isIntrinsicCall(CS2, Intrinsic::assume)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; // Like assumes, guard intrinsics are also marked as arbitrarily writing so // that proper control dependencies are maintained but they never mod any @@ -933,10 +965,14 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, // possibilities for guard intrinsics. if (isIntrinsicCall(CS1, Intrinsic::experimental_guard)) - return getModRefBehavior(CS2) & MRI_Mod ? MRI_Ref : MRI_NoModRef; + return isModSet(createModRefInfo(getModRefBehavior(CS2))) + ? ModRefInfo::Ref + : ModRefInfo::NoModRef; if (isIntrinsicCall(CS2, Intrinsic::experimental_guard)) - return getModRefBehavior(CS1) & MRI_Mod ? MRI_Mod : MRI_NoModRef; + return isModSet(createModRefInfo(getModRefBehavior(CS1))) + ? ModRefInfo::Mod + : ModRefInfo::NoModRef; // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 7e323022d9ce..c9d27a21c958 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -567,7 +567,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, if (!EntryCount) return None; // Use 128 bit APInt to do the arithmetic to avoid overflow. - APInt BlockCount(128, EntryCount.getValue()); + APInt BlockCount(128, EntryCount.getCount()); APInt BlockFreq(128, Freq); APInt EntryFreq(128, getEntryFreq()); BlockCount *= BlockFreq; diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index a85af6c9c93f..fb261755e5d1 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -82,7 +82,7 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F, return PreservedAnalyses::all(); } -static void writeCFGToDotFile(Function &F) { +static void writeCFGToDotFile(Function &F, bool CFGOnly = false) { std::string Filename = ("cfg." + F.getName() + ".dot").str(); errs() << "Writing '" << Filename << "'..."; @@ -90,7 +90,7 @@ static void writeCFGToDotFile(Function &F) { raw_fd_ostream File(Filename, EC, sys::fs::F_Text); if (!EC) - WriteGraph(File, (const Function*)&F); + WriteGraph(File, (const Function*)&F, CFGOnly); else errs() << " error opening file for writing!"; errs() << "\n"; @@ -134,7 +134,7 @@ namespace { } bool runOnFunction(Function &F) override { - writeCFGToDotFile(F); + writeCFGToDotFile(F, /*CFGOnly=*/true); return false; } void print(raw_ostream &OS, const Module* = nullptr) const override {} @@ -152,7 +152,7 @@ INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only", PreservedAnalyses CFGOnlyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { - writeCFGToDotFile(F); + writeCFGToDotFile(F, /*CFGOnly=*/true); return PreservedAnalyses::all(); } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index af2e30db2c12..86f51cc0dff1 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -74,6 +74,7 @@ add_llvm_library(LLVMAnalysis ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp + SyntheticCountsUtils.cpp TargetLibraryInfo.cpp TargetTransformInfo.cpp Trace.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index e4d9292db92d..ac7d14ebdaea 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 867fa587bd95..3d55bf20bb40 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 7276f2524fed..de7d21f9f133 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) { void DemandedBits::print(raw_ostream &OS) { performAnalysis(); for (auto &KV : AliveBits) { - OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " - << *KV.first << "\n"; + OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue()) + << " for " << *KV.first << '\n'; } } diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 2d39a0b02150..ac684ec18466 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -71,7 +71,6 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 4ef023379bb6..9f33b94b1349 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -84,12 +84,18 @@ class GlobalsAAResult::FunctionInfo { /// The bit that flags that this function may read any global. This is /// chosen to mix together with ModRefInfo bits. + /// FIXME: This assumes ModRefInfo lattice will remain 4 bits! + /// It overlaps with ModRefInfo::Must bit! + /// FunctionInfo.getModRefInfo() masks out everything except ModRef so + /// this remains correct, but the Must info is lost. enum { MayReadAnyGlobal = 4 }; /// Checks to document the invariants of the bit packing here. - static_assert((MayReadAnyGlobal & MRI_ModRef) == 0, + static_assert((MayReadAnyGlobal & static_cast(ModRefInfo::MustModRef)) == + 0, "ModRef and the MayReadAnyGlobal flag bits overlap."); - static_assert(((MayReadAnyGlobal | MRI_ModRef) >> + static_assert(((MayReadAnyGlobal | + static_cast(ModRefInfo::MustModRef)) >> AlignedMapPointerTraits::NumLowBitsAvailable) == 0, "Insufficient low bits to store our flag and ModRef info."); @@ -124,14 +130,22 @@ class GlobalsAAResult::FunctionInfo { return *this; } + /// This method clears MayReadAnyGlobal bit added by GlobalsAAResult to return + /// the corresponding ModRefInfo. It must align in functionality with + /// clearMust(). + ModRefInfo globalClearMayReadAnyGlobal(int I) const { + return ModRefInfo((I & static_cast(ModRefInfo::ModRef)) | + static_cast(ModRefInfo::NoModRef)); + } + /// Returns the \c ModRefInfo info for this function. ModRefInfo getModRefInfo() const { - return ModRefInfo(Info.getInt() & MRI_ModRef); + return globalClearMayReadAnyGlobal(Info.getInt()); } /// Adds new \c ModRefInfo for this function to its state. void addModRefInfo(ModRefInfo NewMRI) { - Info.setInt(Info.getInt() | NewMRI); + Info.setInt(Info.getInt() | static_cast(setMust(NewMRI))); } /// Returns whether this function may read any global variable, and we don't @@ -144,17 +158,18 @@ class GlobalsAAResult::FunctionInfo { /// Returns the \c ModRefInfo info for this function w.r.t. a particular /// global, which may be more precise than the general information above. ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const { - ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef; + ModRefInfo GlobalMRI = + mayReadAnyGlobal() ? ModRefInfo::Ref : ModRefInfo::NoModRef; if (AlignedMap *P = Info.getPointer()) { auto I = P->Map.find(&GV); if (I != P->Map.end()) - GlobalMRI = ModRefInfo(GlobalMRI | I->second); + GlobalMRI = unionModRef(GlobalMRI, I->second); } return GlobalMRI; } /// Add mod/ref info from another function into ours, saturating towards - /// MRI_ModRef. + /// ModRef. void addFunctionInfo(const FunctionInfo &FI) { addModRefInfo(FI.getModRefInfo()); @@ -173,7 +188,7 @@ class GlobalsAAResult::FunctionInfo { Info.setPointer(P); } auto &GlobalMRI = P->Map[&GV]; - GlobalMRI = ModRefInfo(GlobalMRI | NewMRI); + GlobalMRI = unionModRef(GlobalMRI, NewMRI); } /// Clear a global's ModRef info. Should be used when a global is being @@ -230,9 +245,9 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; if (FunctionInfo *FI = getFunctionInfo(F)) { - if (FI->getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI->getModRefInfo())) Min = FMRB_DoesNotAccessMemory; - else if ((FI->getModRefInfo() & MRI_Mod) == 0) + else if (!isModSet(FI->getModRefInfo())) Min = FMRB_OnlyReadsMemory; } @@ -246,9 +261,9 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) { if (!CS.hasOperandBundles()) if (const Function *F = CS.getCalledFunction()) if (FunctionInfo *FI = getFunctionInfo(F)) { - if (FI->getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI->getModRefInfo())) Min = FMRB_DoesNotAccessMemory; - else if ((FI->getModRefInfo() & MRI_Mod) == 0) + else if (!isModSet(FI->getModRefInfo())) Min = FMRB_OnlyReadsMemory; } @@ -297,7 +312,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) { Handles.emplace_front(*this, Reader); Handles.front().I = Handles.begin(); } - FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref); + FunctionInfos[Reader].addModRefInfoForGlobal(GV, ModRefInfo::Ref); } if (!GV.isConstant()) // No need to keep track of writers to constants @@ -306,7 +321,7 @@ void GlobalsAAResult::AnalyzeGlobals(Module &M) { Handles.emplace_front(*this, Writer); Handles.front().I = Handles.begin(); } - FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod); + FunctionInfos[Writer].addModRefInfoForGlobal(GV, ModRefInfo::Mod); } ++NumNonAddrTakenGlobalVars; @@ -502,13 +517,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (F->doesNotAccessMemory()) { // Can't do better than that! } else if (F->onlyReadsMemory()) { - FI.addModRefInfo(MRI_Ref); + FI.addModRefInfo(ModRefInfo::Ref); if (!F->isIntrinsic() && !F->onlyAccessesArgMemory()) // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); } else { - FI.addModRefInfo(MRI_ModRef); + FI.addModRefInfo(ModRefInfo::ModRef); // Can't say anything useful unless it's an intrinsic - they don't // read or write global variables of the kind considered here. KnowNothing = !F->isIntrinsic(); @@ -544,7 +559,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // Scan the function bodies for explicit loads or stores. for (auto *Node : SCC) { - if (FI.getModRefInfo() == MRI_ModRef) + if (isModAndRefSet(FI.getModRefInfo())) break; // The mod/ref lattice saturates here. // Don't prove any properties based on the implementation of an optnone @@ -554,7 +569,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { continue; for (Instruction &I : instructions(Node->getFunction())) { - if (FI.getModRefInfo() == MRI_ModRef) + if (isModAndRefSet(FI.getModRefInfo())) break; // The mod/ref lattice saturates here. // We handle calls specially because the graph-relevant aspects are @@ -563,13 +578,17 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) { // FIXME: It is completely unclear why this is necessary and not // handled by the above graph code. - FI.addModRefInfo(MRI_ModRef); + FI.addModRefInfo(ModRefInfo::ModRef); } else if (Function *Callee = CS.getCalledFunction()) { // The callgraph doesn't include intrinsic calls. if (Callee->isIntrinsic()) { + if (isa(I)) + // Don't let dbg intrinsics affect alias info. + continue; + FunctionModRefBehavior Behaviour = AAResultBase::getModRefBehavior(Callee); - FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef)); + FI.addModRefInfo(createModRefInfo(Behaviour)); } } continue; @@ -578,15 +597,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { // All non-call instructions we use the primary predicates for whether // thay read or write memory. if (I.mayReadFromMemory()) - FI.addModRefInfo(MRI_Ref); + FI.addModRefInfo(ModRefInfo::Ref); if (I.mayWriteToMemory()) - FI.addModRefInfo(MRI_Mod); + FI.addModRefInfo(ModRefInfo::Mod); } } - if ((FI.getModRefInfo() & MRI_Mod) == 0) + if (!isModSet(FI.getModRefInfo())) ++NumReadMemFunctions; - if (FI.getModRefInfo() == MRI_NoModRef) + if (!isModOrRefSet(FI.getModRefInfo())) ++NumNoMemFunctions; // Finally, now that we know the full effect on this SCC, clone the @@ -867,8 +886,9 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, const GlobalValue *GV) { if (CS.doesNotAccessMemory()) - return MRI_NoModRef; - ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; + return ModRefInfo::NoModRef; + ModRefInfo ConservativeResult = + CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef; // Iterate through all the arguments to the called function. If any argument // is based on GV, return the conservative result. @@ -889,12 +909,12 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, } // We identified all objects in the argument list, and none of them were GV. - return MRI_NoModRef; + return ModRefInfo::NoModRef; } ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - unsigned Known = MRI_ModRef; + ModRefInfo Known = ModRefInfo::ModRef; // If we are asking for mod/ref info of a direct call with a pointer to a // global we are tracking, return information if we have it. @@ -904,12 +924,12 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, if (const Function *F = CS.getCalledFunction()) if (NonAddressTakenGlobals.count(GV)) if (const FunctionInfo *FI = getFunctionInfo(F)) - Known = FI->getModRefInfoForGlobal(*GV) | - getModRefInfoForArgument(CS, GV); + Known = unionModRef(FI->getModRefInfoForGlobal(*GV), + getModRefInfoForArgument(CS, GV)); - if (Known == MRI_NoModRef) - return MRI_NoModRef; // No need to query other mod/ref analyses - return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc)); + if (!isModOrRefSet(Known)) + return ModRefInfo::NoModRef; // No need to query other mod/ref analyses + return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc)); } GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 8f483dfefc41..c11176bbb9c8 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index b7fe884cc22c..0e7be52cae70 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -21,9 +21,11 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -134,6 +136,7 @@ class CallAnalyzer : public InstVisitor { bool HasReturn; bool HasIndirectBr; bool HasFrameEscape; + bool UsesVarArgs; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -163,14 +166,30 @@ class CallAnalyzer : public InstVisitor { /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; + /// Keep track of dead blocks due to the constant arguments. + SetVector DeadBlocks; + + /// The mapping of the blocks to their known unique successors due to the + /// constant arguments. + DenseMap KnownSuccessors; + + /// Model the elimination of repeated loads that is expected to happen + /// whenever we simplify away the stores that would otherwise cause them to be + /// loads. + bool EnableLoadElimination; + SmallPtrSet LoadAddrSet; + int LoadEliminationCost; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, DenseMap::iterator &CostIt); void disableSROA(DenseMap::iterator CostIt); void disableSROA(Value *V); + void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); + void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); @@ -231,8 +250,6 @@ class CallAnalyzer : public InstVisitor { bool visitCastInst(CastInst &I); bool visitUnaryInstruction(UnaryInstruction &I); bool visitCmpInst(CmpInst &I); - bool visitAnd(BinaryOperator &I); - bool visitOr(BinaryOperator &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); @@ -264,12 +281,12 @@ class CallAnalyzer : public InstVisitor { IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), - HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + HasFrameEscape(false), UsesVarArgs(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -324,6 +341,7 @@ void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); + disableLoadElimination(); } /// \brief If 'V' maps to a SROA candidate, disable SROA for it. @@ -341,12 +359,20 @@ void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, SROACostSavings += InstructionCost; } +void CallAnalyzer::disableLoadElimination() { + if (EnableLoadElimination) { + Cost += LoadEliminationCost; + LoadEliminationCost = 0; + EnableLoadElimination = false; + } +} + /// \brief Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned IntPtrWidth = DL.getPointerTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -420,15 +446,98 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { } bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. + // FIXME: Pointer sizes may differ between different address spaces, so do we + // need to use correct address space in the call to getPointerSizeInBits here? + // Or could we skip the getPointerSizeInBits call completely? As far as I can + // see the ZeroOffset is used as a dummy value, so we can probably use any + // bit width for the ZeroOffset? + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); + bool CheckSROA = I.getType()->isPointerTy(); + + // Track the constant or pointer with constant offset we've seen so far. + Constant *FirstC = nullptr; + std::pair FirstBaseAndOffset = {nullptr, ZeroOffset}; + Value *FirstV = nullptr; + + for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = I.getIncomingBlock(i); + // If the incoming block is dead, skip the incoming block. + if (DeadBlocks.count(Pred)) + continue; + // If the parent block of phi is not the known successor of the incoming + // block, skip the incoming block. + BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; + if (KnownSuccessor && KnownSuccessor != I.getParent()) + continue; + + Value *V = I.getIncomingValue(i); + // If the incoming value is this phi itself, skip the incoming value. + if (&I == V) + continue; + + Constant *C = dyn_cast(V); + if (!C) + C = SimplifiedValues.lookup(V); + + std::pair BaseAndOffset = {nullptr, ZeroOffset}; + if (!C && CheckSROA) + BaseAndOffset = ConstantOffsetPtrs.lookup(V); + + if (!C && !BaseAndOffset.first) + // The incoming value is neither a constant nor a pointer with constant + // offset, exit early. + return true; + + if (FirstC) { + if (FirstC == C) + // If we've seen a constant incoming value before and it is the same + // constant we see this time, continue checking the next incoming value. + continue; + // Otherwise early exit because we either see a different constant or saw + // a constant before but we have a pointer with constant offset this time. + return true; + } + + if (FirstV) { + // The same logic as above, but check pointer with constant offset here. + if (FirstBaseAndOffset == BaseAndOffset) + continue; + return true; + } + + if (C) { + // This is the 1st time we've seen a constant, record it. + FirstC = C; + continue; + } + + // The remaining case is that this is the 1st time we've seen a pointer with + // constant offset, record it. + FirstV = V; + FirstBaseAndOffset = BaseAndOffset; + } + + // Check if we can map phi to a constant. + if (FirstC) { + SimplifiedValues[&I] = FirstC; + return true; + } + + // Check if we can map phi to a pointer with constant offset. + if (FirstBaseAndOffset.first) { + ConstantOffsetPtrs[&I] = FirstBaseAndOffset; + + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + } + return true; } @@ -537,7 +646,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (IntegerSize >= DL.getPointerSizeInBits()) { + unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); + if (IntegerSize >= DL.getPointerSizeInBits(AS)) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -570,7 +680,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (IntegerSize <= DL.getPointerSizeInBits()) { + if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -595,6 +705,22 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); + // If this is a floating-point cast, and the target says this operation + // is expensive, this may eventually become a library call. Treat the cost + // as such. + switch (I.getOpcode()) { + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + Cost += InlineConstants::CallPenalty; + default: + break; + } + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } @@ -899,34 +1025,6 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { return false; } -bool CallAnalyzer::visitOr(BinaryOperator &I) { - // This is necessary because the generic simplify instruction only works if - // both operands are constants. - for (unsigned i = 0; i < 2; ++i) { - if (ConstantInt *C = dyn_cast_or_null( - SimplifiedValues.lookup(I.getOperand(i)))) - if (C->isAllOnesValue()) { - SimplifiedValues[&I] = C; - return true; - } - } - return Base::visitOr(I); -} - -bool CallAnalyzer::visitAnd(BinaryOperator &I) { - // This is necessary because the generic simplify instruction only works if - // both operands are constants. - for (unsigned i = 0; i < 2; ++i) { - if (ConstantInt *C = dyn_cast_or_null( - SimplifiedValues.lookup(I.getOperand(i)))) - if (C->isZero()) { - SimplifiedValues[&I] = C; - return true; - } - } - return Base::visitAnd(I); -} - bool CallAnalyzer::visitSub(BinaryOperator &I) { // Try to handle a special case: we can fold computing the difference of two // constant-related pointers. @@ -956,23 +1054,38 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) { bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - auto Evaluate = [&](SmallVectorImpl &COps) { - Value *SimpleV = nullptr; - if (auto FI = dyn_cast(&I)) - SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1], - FI->getFastMathFlags(), DL); - else - SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL); - return dyn_cast_or_null(SimpleV); - }; + Constant *CLHS = dyn_cast(LHS); + if (!CLHS) + CLHS = SimplifiedValues.lookup(LHS); + Constant *CRHS = dyn_cast(RHS); + if (!CRHS) + CRHS = SimplifiedValues.lookup(RHS); + + Value *SimpleV = nullptr; + if (auto FI = dyn_cast(&I)) + SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, + CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); + else + SimpleV = + SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); + + if (Constant *C = dyn_cast_or_null(SimpleV)) + SimplifiedValues[&I] = C; - if (simplifyInstruction(I, Evaluate)) + if (SimpleV) return true; // Disable any SROA on arguments to arbitrary, unsimplified binary operators. disableSROA(LHS); disableSROA(RHS); + // If the instruction is floating point, and the target says this operation + // is expensive, this may eventually become a library call. Treat the cost + // as such. + if (I.getType()->isFloatingPointTy() && + TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + Cost += InlineConstants::CallPenalty; + return false; } @@ -988,6 +1101,15 @@ bool CallAnalyzer::visitLoad(LoadInst &I) { disableSROA(CostIt); } + // If the data is already loaded from this address and hasn't been clobbered + // by any stores or calls, this load is likely to be redundant and can be + // eliminated. + if (EnableLoadElimination && + !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { + LoadEliminationCost += InlineConstants::InstrCost; + return true; + } + return false; } @@ -1003,6 +1125,15 @@ bool CallAnalyzer::visitStore(StoreInst &I) { disableSROA(CostIt); } + // The store can potentially clobber loads and prevent repeated loads from + // being eliminated. + // FIXME: + // 1. We can probably keep an initial set of eliminatable loads substracted + // from the cost even when we finally see a store. We just need to disable + // *further* accumulation of elimination savings. + // 2. We should probably at some point thread MemorySSA for the callee into + // this and then use that to actually compute *really* precise savings. + disableLoadElimination(); return false; } @@ -1085,6 +1216,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { switch (II->getIntrinsicID()) { default: + if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); return Base::visitCallSite(CS); case Intrinsic::load_relative: @@ -1095,15 +1228,20 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: + disableLoadElimination(); // SROA can usually chew through these intrinsics, but they aren't free. return false; case Intrinsic::localescape: HasFrameEscape = true; return false; + case Intrinsic::vastart: + case Intrinsic::vaend: + UsesVarArgs = true; + return false; } } - if (F == CS.getInstruction()->getParent()->getParent()) { + if (F == CS.getInstruction()->getFunction()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; @@ -1121,6 +1259,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { Cost += InlineConstants::CallPenalty; } + if (!CS.onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); } @@ -1135,8 +1275,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) + if (!F) { + if (!CS.onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); + } // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently @@ -1153,6 +1296,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } + if (!F->onlyReadsMemory()) + disableLoadElimination(); return Base::visitCallSite(CS); } @@ -1414,17 +1559,6 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; - // If the instruction is floating point, and the target says this operation - // is expensive or the function has the "use-soft-float" attribute, this may - // eventually become a library call. Treat the cost as such. - if (I->getType()->isFloatingPointTy()) { - // If the function has the "use-soft-float" attribute, mark it as - // expensive. - if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || - (F.getFnAttribute("use-soft-float").getValueAsString() == "true")) - Cost += InlineConstants::CallPenalty; - } - // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we @@ -1438,7 +1572,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) { + HasIndirectBr || HasFrameEscape || UsesVarArgs) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", @@ -1484,7 +1618,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned AS = V->getType()->getPointerAddressSpace(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -1508,10 +1643,48 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL.getIntPtrType(V->getContext()); + Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); return cast(ConstantInt::get(IntPtrTy, Offset)); } +/// \brief Find dead blocks due to deleted CFG edges during inlining. +/// +/// If we know the successor of the current block, \p CurrBB, has to be \p +/// NextBB, the other successors of \p CurrBB are dead if these successors have +/// no live incoming CFG edges. If one block is found to be dead, we can +/// continue growing the dead block list by checking the successors of the dead +/// blocks to see if all their incoming edges are dead or not. +void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { + auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { + // A CFG edge is dead if the predecessor is dead or the predessor has a + // known successor which is not the one under exam. + return (DeadBlocks.count(Pred) || + (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); + }; + + auto IsNewlyDead = [&](BasicBlock *BB) { + // If all the edges to a block are dead, the block is also dead. + return (!DeadBlocks.count(BB) && + llvm::all_of(predecessors(BB), + [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); + }; + + for (BasicBlock *Succ : successors(CurrBB)) { + if (Succ == NextBB || !IsNewlyDead(Succ)) + continue; + SmallVector NewDead; + NewDead.push_back(Succ); + while (!NewDead.empty()) { + BasicBlock *Dead = NewDead.pop_back_val(); + if (DeadBlocks.insert(Dead)) + // Continue growing the dead block lists. + for (BasicBlock *S : successors(Dead)) + if (IsNewlyDead(S)) + NewDead.push_back(S); + } + } +} + /// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not @@ -1559,14 +1732,14 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (F.empty()) return true; - Function *Caller = CS.getInstruction()->getParent()->getParent(); + Function *Caller = CS.getInstruction()->getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { CallSite Site(U); if (!Site) continue; Instruction *I = Site.getInstruction(); - if (I->getParent()->getParent() == Caller) { + if (I->getFunction() == Caller) { IsCallerRecursive = true; break; } @@ -1649,7 +1822,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1657,7 +1833,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor()); + BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1711,6 +1890,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(LoadEliminationCost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); @@ -1735,7 +1915,8 @@ int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { // size of the byval type by the target's pointer size. PointerType *PTy = cast(CS.getArgument(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL.getPointerSizeInBits(); + unsigned AS = PTy->getAddressSpace(); + unsigned PointerSize = DL.getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; @@ -1779,6 +1960,19 @@ InlineCost llvm::getInlineCost( if (!Callee) return llvm::InlineCost::getNever(); + // Never inline calls with byval arguments that does not have the alloca + // address space. Since byval arguments can be replaced with a copy to an + // alloca, the inlined code would need to be adjusted to handle that the + // argument is in the alloca address space (so it is a little bit complicated + // to solve). + unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) + if (CS.isByValArgument(I)) { + PointerType *PTy = cast(CS.getArgument(I)->getType()); + if (PTy->getAddressSpace() != AllocaAS) + return llvm::InlineCost::getNever(); + } + // Calls to functions with always-inline attributes should be inlined // whenever possible. if (CS.hasFnAttr(Attribute::AlwaysInline)) { diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 6bed2f3a9010..6c6b1cfe7203 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" @@ -328,7 +327,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, // Check that the simplified value has the form "X op Y" where "op" is the // same as the original operation. Instruction *Simplified = dyn_cast(FV ? FV : TV); - if (Simplified && Simplified->getOpcode() == Opcode) { + if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) { // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". // We already know that "op" is the same as for the simplified value. See // if the operands match too. If so, return the simplified value. @@ -827,7 +826,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, MaxRecurse)) return V; - // Mul distributes over Add. Try some generic simplifications based on this. + // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, Q, MaxRecurse)) return V; @@ -979,18 +978,17 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool IsSigned = Opcode == Instruction::SDiv; // (X * Y) / Y -> X if the multiplication does not overflow. - Value *X = nullptr, *Y = nullptr; - if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { - if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 - OverflowingBinaryOperator *Mul = cast(Op0); - // If the Mul knows it does not overflow, then we are good to go. + Value *X; + if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) { + auto *Mul = cast(Op0); + // If the Mul does not overflow, then we are good to go. if ((IsSigned && Mul->hasNoSignedWrap()) || (!IsSigned && Mul->hasNoUnsignedWrap())) return X; - // If X has the form X = A / Y then X * Y cannot overflow. - if (BinaryOperator *Div = dyn_cast(X)) - if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y) - return X; + // If X has the form X = A / Y, then X * Y cannot overflow. + if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) || + (!IsSigned && match(X, m_UDiv(m_Value(), m_Specific(Op1))))) + return X; } // (X rem Y) / Y -> 0 @@ -1414,6 +1412,43 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; } +static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool IsAnd) { + ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate(); + if (!match(Cmp0->getOperand(1), m_Zero()) || + !match(Cmp1->getOperand(1), m_Zero()) || P0 != P1) + return nullptr; + + if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ)) + return nullptr; + + // We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)". + Value *X = Cmp0->getOperand(0); + Value *Y = Cmp1->getOperand(0); + + // If one of the compares is a masked version of a (not) null check, then + // that compare implies the other, so we eliminate the other. Optionally, look + // through a pointer-to-int cast to match a null check of a pointer type. + + // (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0 + // (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0 + // (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0 + // (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0 + if (match(Y, m_c_And(m_Specific(X), m_Value())) || + match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value()))) + return Cmp1; + + // (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0 + // ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0 + // (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0 + // ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0 + if (match(X, m_c_And(m_Specific(Y), m_Value())) || + match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value()))) + return Cmp0; + + return nullptr; +} + static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; @@ -1474,6 +1509,9 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true)) + return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) @@ -1542,6 +1580,9 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) return X; + if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) return X; if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) @@ -3378,6 +3419,28 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: break; } + } else if (C->isNegative()) { + assert(!C->isNaN() && "Unexpected NaN constant!"); + // TODO: We can catch more cases by using a range check rather than + // relying on CannotBeOrderedLessThanZero. + switch (Pred) { + case FCmpInst::FCMP_UGE: + case FCmpInst::FCMP_UGT: + case FCmpInst::FCMP_UNE: + // (X >= 0) implies (X > C) when (C < 0) + if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return getTrue(RetTy); + break; + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_OLE: + case FCmpInst::FCMP_OLT: + // (X >= 0) implies !(X < C) when (C < 0) + if (CannotBeOrderedLessThanZero(LHS, Q.TLI)) + return getFalse(RetTy); + break; + default: + break; + } } } @@ -3805,6 +3868,29 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); } +Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx, + const SimplifyQuery &Q) { + // Try to constant fold. + auto *VecC = dyn_cast(Vec); + auto *ValC = dyn_cast(Val); + auto *IdxC = dyn_cast(Idx); + if (VecC && ValC && IdxC) + return ConstantFoldInsertElementInstruction(VecC, ValC, IdxC); + + // Fold into undef if index is out of bounds. + if (auto *CI = dyn_cast(Idx)) { + uint64_t NumElements = cast(Vec->getType())->getNumElements(); + if (CI->uge(NumElements)) + return UndefValue::get(Vec->getType()); + } + + // If index is undef, it might be out of bounds (see above case) + if (isa(Idx)) + return UndefValue::get(Vec->getType()); + + return nullptr; +} + /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, @@ -3853,9 +3939,18 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. - if (auto *IdxC = dyn_cast(Idx)) + if (auto *IdxC = dyn_cast(Idx)) { + if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements())) + // definitely out of bounds, thus undefined result + return UndefValue::get(Vec->getType()->getVectorElementType()); if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; + } + + // An undef extract index can be arbitrarily chosen to be an out-of-range + // index value, which would result in the instruction being undef. + if (isa(Idx)) + return UndefValue::get(Vec->getType()->getVectorElementType()); return nullptr; } @@ -4440,10 +4535,53 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } } + Value *IIOperand = *ArgBegin; + Value *X; switch (IID) { case Intrinsic::fabs: { - if (SignBitMustBeZero(*ArgBegin, Q.TLI)) - return *ArgBegin; + if (SignBitMustBeZero(IIOperand, Q.TLI)) + return IIOperand; + return nullptr; + } + case Intrinsic::bswap: { + // bswap(bswap(x)) -> x + if (match(IIOperand, m_BSwap(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::bitreverse: { + // bitreverse(bitreverse(x)) -> x + if (match(IIOperand, m_BitReverse(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::exp: { + // exp(log(x)) -> x + if (Q.CxtI->isFast() && + match(IIOperand, m_Intrinsic(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::exp2: { + // exp2(log2(x)) -> x + if (Q.CxtI->isFast() && + match(IIOperand, m_Intrinsic(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::log: { + // log(exp(x)) -> x + if (Q.CxtI->isFast() && + match(IIOperand, m_Intrinsic(m_Value(X)))) + return X; + return nullptr; + } + case Intrinsic::log2: { + // log2(exp2(x)) -> x + if (Q.CxtI->isFast() && + match(IIOperand, m_Intrinsic(m_Value(X)))) { + return X; + } return nullptr; } default: @@ -4500,6 +4638,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, return SimplifyRelativeLoad(C0, C1, Q.DL); return nullptr; } + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast(RHS)) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ConstantFP::get(LHS->getType(), 1.0); + // powi(x, 1) -> x + if (Power->isOne()) + return LHS; + } + return nullptr; default: return nullptr; } @@ -4568,6 +4716,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } +Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) { + CallSite CS(const_cast(ICS.getInstruction())); + return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q, RecursionLimit); +} + /// See if we can compute a simplified version of this instruction. /// If not, this returns null. @@ -4673,6 +4827,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, IV->getIndices(), Q); break; } + case Instruction::InsertElement: { + auto *IE = cast(I); + Result = SimplifyInsertElementInst(IE->getOperand(0), IE->getOperand(1), + IE->getOperand(2), Q); + break; + } case Instruction::ExtractValue: { auto *EVI = cast(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), @@ -4696,8 +4856,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, break; case Instruction::Call: { CallSite CS(cast(I)); - Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - Q); + Result = SimplifyCall(CS, Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 3a9dac5783f7..1982a3bbd774 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" #include -#include using namespace llvm; using namespace PatternMatch; @@ -1003,6 +1002,7 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV, case Instruction::UDiv: case Instruction::Shl: case Instruction::LShr: + case Instruction::AShr: case Instruction::And: case Instruction::Or: // continue into the code below @@ -1830,7 +1830,7 @@ void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( }; printResult(ParentBB); - // Print the LVI analysis results for the the immediate successor blocks, that + // Print the LVI analysis results for the immediate successor blocks, that // are dominated by `ParentBB`. for (auto *BBSucc : successors(ParentBB)) if (DT.dominates(ParentBB, BBSucc)) diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 7b792ed0a2e2..0e3f498cb14c 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) { // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. - if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) - for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) { + AttributeList PAL = CS.getAttributes(); + unsigned ArgNo = 0; + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + // Skip ByVal arguments since they will be memcpy'd to the callee's + // stack so we're not really passing the pointer anyway. + if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal)) + continue; if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } + } + } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 78b673be8a0d..834727c9224d 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -414,7 +414,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. - if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0) + if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize))) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -426,8 +426,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. - if (AA && - (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0) + if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize))) continue; // May modify the pointer, bail out. diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 9a223df9394c..9e54d60779a0 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -47,7 +47,7 @@ bool llvm::VerifyLoopInfo = false; #endif static cl::opt VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), - cl::desc("Verify loop info (time consuming)")); + cl::Hidden, cl::desc("Verify loop info (time consuming)")); //===----------------------------------------------------------------------===// // Loop implementation @@ -731,6 +731,18 @@ PreservedAnalyses LoopPrinterPass::run(Function &F, } void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) { + + if (forcePrintModuleIR()) { + // handling -print-module-scope + OS << Banner << " (loop: "; + L.getHeader()->printAsOperand(OS, false); + OS << ")\n"; + + // printing whole module + OS << *L.getHeader()->getModule(); + return; + } + OS << Banner; auto *PreHeader = L.getLoopPreheader(); diff --git a/lib/Analysis/LoopUnrollAnalyzer.cpp b/lib/Analysis/LoopUnrollAnalyzer.cpp index 7bdf3408a581..0da90dae3d9a 100644 --- a/lib/Analysis/LoopUnrollAnalyzer.cpp +++ b/lib/Analysis/LoopUnrollAnalyzer.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopUnrollAnalyzer.h" -#include "llvm/IR/Dominators.h" using namespace llvm; diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index 4231a78352ce..4a136c5a0c6d 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -7,9 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index ba90f1cf2fbd..05144265487c 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -119,62 +119,54 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, if (const LoadInst *LI = dyn_cast(Inst)) { if (LI->isUnordered()) { Loc = MemoryLocation::get(LI); - return MRI_Ref; + return ModRefInfo::Ref; } if (LI->getOrdering() == AtomicOrdering::Monotonic) { Loc = MemoryLocation::get(LI); - return MRI_ModRef; + return ModRefInfo::ModRef; } Loc = MemoryLocation(); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const StoreInst *SI = dyn_cast(Inst)) { if (SI->isUnordered()) { Loc = MemoryLocation::get(SI); - return MRI_Mod; + return ModRefInfo::Mod; } if (SI->getOrdering() == AtomicOrdering::Monotonic) { Loc = MemoryLocation::get(SI); - return MRI_ModRef; + return ModRefInfo::ModRef; } Loc = MemoryLocation(); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const VAArgInst *V = dyn_cast(Inst)) { Loc = MemoryLocation::get(V); - return MRI_ModRef; + return ModRefInfo::ModRef; } if (const CallInst *CI = isFreeCall(Inst, &TLI)) { // calls to free() deallocate the entire structure Loc = MemoryLocation(CI->getArgOperand(0)); - return MRI_Mod; + return ModRefInfo::Mod; } if (const IntrinsicInst *II = dyn_cast(Inst)) { - AAMDNodes AAInfo; - switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - II->getAAMetadata(AAInfo); - Loc = MemoryLocation( - II->getArgOperand(1), - cast(II->getArgOperand(0))->getZExtValue(), AAInfo); + Loc = MemoryLocation::getForArgument(II, 1, TLI); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return MRI_Mod; + return ModRefInfo::Mod; case Intrinsic::invariant_end: - II->getAAMetadata(AAInfo); - Loc = MemoryLocation( - II->getArgOperand(2), - cast(II->getArgOperand(1))->getZExtValue(), AAInfo); + Loc = MemoryLocation::getForArgument(II, 2, TLI); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. - return MRI_Mod; + return ModRefInfo::Mod; default: break; } @@ -182,10 +174,10 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // Otherwise, just do the coarse-grained thing that always works. if (Inst->mayWriteToMemory()) - return MRI_ModRef; + return ModRefInfo::ModRef; if (Inst->mayReadFromMemory()) - return MRI_Ref; - return MRI_NoModRef; + return ModRefInfo::Ref; + return ModRefInfo::NoModRef; } /// Private helper for finding the local dependencies of a call site. @@ -212,32 +204,30 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( ModRefInfo MR = GetLocation(Inst, Loc, TLI); if (Loc.Ptr) { // A simple instruction. - if (AA.getModRefInfo(CS, Loc) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo(CS, Loc))) return MemDepResult::getClobber(Inst); continue; } if (auto InstCS = CallSite(Inst)) { // If these two calls do not interfere, look past it. - switch (AA.getModRefInfo(CS, InstCS)) { - case MRI_NoModRef: + if (isNoModRef(AA.getModRefInfo(CS, InstCS))) { // If the two calls are the same, return InstCS as a Def, so that // CS can be found redundant and eliminated. - if (isReadOnlyCall && !(MR & MRI_Mod) && + if (isReadOnlyCall && !isModSet(MR) && CS.getInstruction()->isIdenticalToWhenDefined(Inst)) return MemDepResult::getDef(Inst); // Otherwise if the two calls don't interact (e.g. InstCS is readnone) // keep scanning. continue; - default: + } else return MemDepResult::getClobber(Inst); - } } // If we could not obtain a pointer for the instruction and the instruction // touches memory then assume that this is a dependency. - if (MR != MRI_NoModRef) + if (isModOrRefSet(MR)) return MemDepResult::getClobber(Inst); } @@ -308,8 +298,10 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( return 0; if (LIOffs + NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress)) + (LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress) || + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeHWAddress))) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -642,11 +634,12 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. - if (AA.getModRefInfo(SI, MemLoc) == MRI_NoModRef) + if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc))) continue; // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. + // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above. MemoryLocation StoreLoc = MemoryLocation::get(SI); // If we found a pointer, check if it could be the same as our pointer. @@ -688,15 +681,15 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc); // If necessary, perform additional analysis. - if (MR == MRI_ModRef) + if (isModAndRefSet(MR)) MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB); - switch (MR) { - case MRI_NoModRef: + switch (clearMust(MR)) { + case ModRefInfo::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; - case MRI_Mod: + case ModRefInfo::Mod: return MemDepResult::getClobber(Inst); - case MRI_Ref: + case ModRefInfo::Ref: // If the call is known to never store to the pointer, and if this is a // load query, we can safely ignore it (scan past it). if (isLoad) @@ -749,7 +742,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) { ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI); if (MemLoc.Ptr) { // If we can do a pointer scan, make it happen. - bool isLoad = !(MR & MRI_Mod); + bool isLoad = !isModSet(MR); if (auto *II = dyn_cast(QueryInst)) isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 4af18ccb2af8..6e9368c49d65 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -192,8 +192,6 @@ template <> struct DenseMapInfo { } }; -enum class Reorderability { Always, IfNoAlias, Never }; - } // end namespace llvm /// This does one-way checks to see if Use could theoretically be hoisted above @@ -202,22 +200,16 @@ enum class Reorderability { Always, IfNoAlias, Never }; /// This assumes that, for the purposes of MemorySSA, Use comes directly after /// MayClobber, with no potentially clobbering operations in between them. /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) -static Reorderability getLoadReorderability(const LoadInst *Use, - const LoadInst *MayClobber) { +static bool areLoadsReorderable(const LoadInst *Use, + const LoadInst *MayClobber) { bool VolatileUse = Use->isVolatile(); bool VolatileClobber = MayClobber->isVolatile(); // Volatile operations may never be reordered with other volatile operations. if (VolatileUse && VolatileClobber) - return Reorderability::Never; - - // The lang ref allows reordering of volatile and non-volatile operations. - // Whether an aliasing nonvolatile load and volatile load can be reordered, - // though, is ambiguous. Because it may not be best to exploit this ambiguity, - // we only allow volatile/non-volatile reordering if the volatile and - // non-volatile operations don't alias. - Reorderability Result = VolatileUse || VolatileClobber - ? Reorderability::IfNoAlias - : Reorderability::Always; + return false; + // Otherwise, volatile doesn't matter here. From the language reference: + // 'optimizers may change the order of volatile operations relative to + // non-volatile operations.'" // If a load is seq_cst, it cannot be moved above other loads. If its ordering // is weaker, it can be moved above other loads. We just need to be sure that @@ -229,9 +221,7 @@ static Reorderability getLoadReorderability(const LoadInst *Use, bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), AtomicOrdering::Acquire); - if (SeqCstUse || MayClobberIsAcquire) - return Reorderability::Never; - return Result; + return !(SeqCstUse || MayClobberIsAcquire); } static bool instructionClobbersQuery(MemoryDef *MD, @@ -262,23 +252,14 @@ static bool instructionClobbersQuery(MemoryDef *MD, if (UseCS) { ModRefInfo I = AA.getModRefInfo(DefInst, UseCS); - return I != MRI_NoModRef; + return isModOrRefSet(I); } - if (auto *DefLoad = dyn_cast(DefInst)) { - if (auto *UseLoad = dyn_cast(UseInst)) { - switch (getLoadReorderability(UseLoad, DefLoad)) { - case Reorderability::Always: - return false; - case Reorderability::Never: - return true; - case Reorderability::IfNoAlias: - return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad)); - } - } - } + if (auto *DefLoad = dyn_cast(DefInst)) + if (auto *UseLoad = dyn_cast(UseInst)) + return !areLoadsReorderable(UseLoad, DefLoad); - return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod; + return isModSet(AA.getModRefInfo(DefInst, UseLoc)); } static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, @@ -1526,8 +1507,8 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { // Separate memory aliasing and ordering into two different chains so that we // can precisely represent both "what memory will this read/write/is clobbered // by" and "what instructions can I move this past". - bool Def = bool(ModRef & MRI_Mod) || isOrdered(I); - bool Use = bool(ModRef & MRI_Ref); + bool Def = isModSet(ModRef) || isOrdered(I); + bool Use = isRefSet(ModRef); // It's possible for an instruction to not modify memory at all. During // construction, we ignore them. diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index f28f8bd6bce2..f5d89f699a5a 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -13,13 +13,11 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index e12cdf9182c7..1e321f17d59f 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index d54fb700200d..cf2fe7776ddc 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || // Inliner doesn't handle variadic functions. // FIXME: refactor this to use the same code that inliner is using. - F.isVarArg(); + F.isVarArg() || + // Don't try to import functions with noinline attribute. + F.getAttributes().hasFnAttribute(Attribute::NoInline); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); FunctionSummary::FFlags FunFlags{ @@ -370,7 +372,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( std::function GetBFICallback, ProfileSummaryInfo *PSI) { assert(PSI); - ModuleSummaryIndex Index; + ModuleSummaryIndex Index(/*IsPerformingAnalysis=*/true); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and @@ -454,7 +456,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( std::unique_ptr BFIPtr; if (GetBFICallback) BFI = GetBFICallback(F); - else if (F.getEntryCount().hasValue()) { + else if (F.hasProfileData()) { LoopInfo LI{DominatorTree(const_cast(F))}; BranchProbabilityInfo BPI{F, LI}; BFIPtr = llvm::make_unique(F, BPI, LI); diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp index ed03406ca8c6..096ea661ecb6 100644 --- a/lib/Analysis/ObjCARCAliasAnalysis.cpp +++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -123,7 +123,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, // These functions don't access any memory visible to the compiler. // Note that this doesn't include objc_retainBlock, because it updates // pointers when it copies block data. - return MRI_NoModRef; + return ModRefInfo::NoModRef; default: break; } diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp index e3e74aa249da..55335f3a7cb0 100644 --- a/lib/Analysis/ObjCARCAnalysisUtils.cpp +++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -21,8 +21,6 @@ using namespace llvm::objcarc; /// \brief A handy option to enable/disable all ARC Optimizations. bool llvm::objcarc::EnableARCOpts; -static cl::opt -EnableARCOptimizations("enable-objc-arc-opts", - cl::desc("enable/disable all ARC Optimizations"), - cl::location(EnableARCOpts), - cl::init(true)); +static cl::opt EnableARCOptimizations( + "enable-objc-arc-opts", cl::desc("enable/disable all ARC Optimizations"), + cl::location(EnableARCOpts), cl::init(true), cl::Hidden); diff --git a/lib/Analysis/OptimizationRemarkEmitter.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp index cd6a93668010..8ece0a2a3ed3 100644 --- a/lib/Analysis/OptimizationRemarkEmitter.cpp +++ b/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/LLVMContext.h" @@ -75,11 +74,10 @@ void OptimizationRemarkEmitter::emit( DiagnosticInfoOptimizationBase &OptDiagBase) { auto &OptDiag = cast(OptDiagBase); computeHotness(OptDiag); - // If a diagnostic has a hotness value, then only emit it if its hotness - // meets the threshold. - if (OptDiag.getHotness() && - *OptDiag.getHotness() < - F->getContext().getDiagnosticsHotnessThreshold()) { + + // Only emit it if its hotness meets the threshold. + if (OptDiag.getHotness().getValueOr(0) < + F->getContext().getDiagnosticsHotnessThreshold()) { return; } diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 671744f93fb8..61c9411b2c44 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -112,45 +112,65 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) { // FIXME: The heuristic used below for determining hotness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isHotCount(FunctionCount.getValue()); + return FunctionCount && isHotCount(FunctionCount.getCount()); } -/// Returns true if the function's entry or total call edge count is hot. +/// Returns true if the function contains hot code. This can include a hot +/// function entry count, hot basic block, or (in the case of Sample PGO) +/// hot total call edge count. /// If it returns false, it either means it is not hot or it is unknown -/// whether it is hot or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (isHotCount(FunctionCount.getValue())) + if (isHotCount(FunctionCount.getCount())) return true; - uint64_t TotalCallCount = 0; + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (isHotCount(TotalCallCount)) + return true; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isHotCount(TotalCallCount); + if (isHotBB(&BB, &BFI)) + return true; + return false; } -/// Returns true if the function's entry and total call edge count is cold. +/// Returns true if the function only contains cold code. This means that +/// the function entry and blocks are all cold, and (in the case of Sample PGO) +/// the total call edge count is cold. /// If it returns false, it either means it is not cold or it is unknown -/// whether it is cold or not (for example, no profile data is available). -bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) { +/// (for example, no profile data is available). +bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F, + BlockFrequencyInfo &BFI) { if (!F || !computeSummary()) return false; if (auto FunctionCount = F->getEntryCount()) - if (!isColdCount(FunctionCount.getValue())) + if (!isColdCount(FunctionCount.getCount())) return false; - - uint64_t TotalCallCount = 0; + + if (hasSampleProfile()) { + uint64_t TotalCallCount = 0; + for (const auto &BB : *F) + for (const auto &I : BB) + if (isa(I) || isa(I)) + if (auto CallCount = getProfileCount(&I, nullptr)) + TotalCallCount += CallCount.getValue(); + if (!isColdCount(TotalCallCount)) + return false; + } for (const auto &BB : *F) - for (const auto &I : BB) - if (isa(I) || isa(I)) - if (auto CallCount = getProfileCount(&I, nullptr)) - TotalCallCount += CallCount.getValue(); - return isColdCount(TotalCallCount); + if (!isColdBB(&BB, &BFI)) + return false; + return true; } /// Returns true if the function's entry is a cold. If it returns false, it @@ -167,7 +187,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) { // FIXME: The heuristic used below for determining coldness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isColdCount(FunctionCount.getValue()); + return FunctionCount && isColdCount(FunctionCount.getCount()); } /// Compute the hot and cold thresholds. @@ -231,7 +251,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS, // If there is no profile for the caller, and we know the profile is // accurate, we consider the callsite as cold. return (hasSampleProfile() && - (CS.getCaller()->getEntryCount() || ProfileSampleAccurate || + (CS.getCaller()->hasProfileData() || ProfileSampleAccurate || CS.getCaller()->hasFnAttribute("profile-sample-accurate"))); } diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index b172d42c9709..c5d71b25e022 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" -#include "llvm/Analysis/RegionIterator.h" #include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bc2277475385..7a9fddfd10b1 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -153,11 +153,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::init(100)); // FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. +static cl::opt VerifySCEV( + "verify-scev", cl::Hidden, + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt -VerifySCEV("verify-scev", - cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); -static cl::opt - VerifySCEVMap("verify-scev-maps", + VerifySCEVMap("verify-scev-maps", cl::Hidden, cl::desc("Verify no dangling value in ScalarEvolution's " "ExprValueMap (slow)")); @@ -549,10 +549,10 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { /// Since we do not continue running this routine on expression trees once we /// have seen unequal values, there is no need to track them in the cache. static int -CompareValueComplexity(EquivalenceClasses &EqCache, +CompareValueComplexity(EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, Value *LV, Value *RV, unsigned Depth) { - if (Depth > MaxValueCompareDepth || EqCache.isEquivalent(LV, RV)) + if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)) return 0; // Order pointer values after integer values. This helps SCEVExpander form @@ -612,14 +612,14 @@ CompareValueComplexity(EquivalenceClasses &EqCache, for (unsigned Idx : seq(0u, LNumOps)) { int Result = - CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx), + CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), RInst->getOperand(Idx), Depth + 1); if (Result != 0) return Result; } } - EqCache.unionSets(LV, RV); + EqCacheValue.unionSets(LV, RV); return 0; } @@ -628,6 +628,7 @@ CompareValueComplexity(EquivalenceClasses &EqCache, // more efficient. static int CompareSCEVComplexity( EquivalenceClasses &EqCacheSCEV, + EquivalenceClasses &EqCacheValue, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. @@ -649,9 +650,8 @@ static int CompareSCEVComplexity( const SCEVUnknown *LU = cast(LHS); const SCEVUnknown *RU = cast(RHS); - EquivalenceClasses EqCache; - int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(), - Depth + 1); + int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), + RU->getValue(), Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; @@ -694,10 +694,15 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + // Compare NoWrap flags. + if (LA->getNoWrapFlags() != RA->getNoWrapFlags()) + return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags(); + // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), - RA->getOperand(i), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LA->getOperand(i), RA->getOperand(i), DT, + Depth + 1); if (X != 0) return X; } @@ -717,11 +722,14 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + // Compare NoWrap flags. + if (LC->getNoWrapFlags() != RC->getNoWrapFlags()) + return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags(); + for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), - RC->getOperand(i), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LC->getOperand(i), RC->getOperand(i), DT, + Depth + 1); if (X != 0) return X; } @@ -734,12 +742,12 @@ static int CompareSCEVComplexity( const SCEVUDivExpr *RC = cast(RHS); // Lexicographically compare udiv expressions. - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), - DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), + RC->getLHS(), DT, Depth + 1); if (X != 0) return X; - X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT, - Depth + 1); + X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(), + RC->getRHS(), DT, Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; @@ -752,8 +760,9 @@ static int CompareSCEVComplexity( const SCEVCastExpr *RC = cast(RHS); // Compare cast expressions by operand. - int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), - RC->getOperand(), DT, Depth + 1); + int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LC->getOperand(), RC->getOperand(), DT, + Depth + 1); if (X == 0) EqCacheSCEV.unionSets(LHS, RHS); return X; @@ -778,21 +787,22 @@ static void GroupByComplexity(SmallVectorImpl &Ops, LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop - EquivalenceClasses EqCache; + EquivalenceClasses EqCacheSCEV; + EquivalenceClasses EqCacheValue; if (Ops.size() == 2) { // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0) + if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), - [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) { - return - CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0; + [&](const SCEV *LHS, const SCEV *RHS) { + return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, + LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same @@ -1258,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getAddExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can @@ -1274,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } if (!hasTrunc) return getMulExpr(Operands); - UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + // In spite we checked in the beginning that ID is not in the cache, + // it is possible that during recursion and different modification + // ID came to cache, so if we found it, just return it. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) + return S; } // If the input value is a chrec scev, truncate the chrec's operands. @@ -2340,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, FoundMatch = true; } if (FoundMatch) - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth + 1); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be @@ -3756,6 +3774,24 @@ void ScalarEvolution::eraseValueFromMap(Value *V) { } } +/// Check whether value has nuw/nsw/exact set but SCEV does not. +/// TODO: In reality it is better to check the poison recursevely +/// but this is better than nothing. +static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { + if (auto *I = dyn_cast(V)) { + if (isa(I)) { + if (auto *NS = dyn_cast(S)) { + if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) + return true; + if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) + return true; + } + } else if (isa(I) && I->isExact()) + return true; + } + return false; +} + /// Return an existing SCEV if it exists, otherwise analyze the expression and /// create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { @@ -3769,7 +3805,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { // ValueExprMap before insert S->{V, 0} into ExprValueMap. std::pair Pair = ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) { + if (Pair.second && !SCEVLostPoisonFlags(S, V)) { ExprValueMap[S].insert({V, nullptr}); // If S == Stripped + Offset, add Stripped -> {V, Offset} into @@ -4358,6 +4394,7 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { default: break; } + break; } default: @@ -4626,18 +4663,19 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) // for each of StartVal and Accum - auto GetExtendedExpr = [&](const SCEV *Expr) -> const SCEV * { + auto getExtendedExpr = [&](const SCEV *Expr, + bool CreateSignExtend) -> const SCEV * { assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); const SCEV *ExtendedExpr = - Signed ? getSignExtendExpr(TruncatedExpr, Expr->getType()) - : getZeroExtendExpr(TruncatedExpr, Expr->getType()); + CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType()) + : getZeroExtendExpr(TruncatedExpr, Expr->getType()); return ExtendedExpr; }; // Given: // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy - // = GetExtendedExpr(Expr) + // = getExtendedExpr(Expr) // Determine whether the predicate P: Expr == ExtendedExpr // is known to be false at compile time auto PredIsKnownFalse = [&](const SCEV *Expr, @@ -4646,13 +4684,15 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); }; - const SCEV *StartExtended = GetExtendedExpr(StartVal); + const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); if (PredIsKnownFalse(StartVal, StartExtended)) { DEBUG(dbgs() << "P2 is compile-time false\n";); return None; } - const SCEV *AccumExtended = GetExtendedExpr(Accum); + // The Step is always Signed (because the overflow checks are either + // NSSW or NUSW) + const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); if (PredIsKnownFalse(Accum, AccumExtended)) { DEBUG(dbgs() << "P3 is compile-time false\n";); return None; @@ -4719,6 +4759,30 @@ ScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { return Rewrite; } +// FIXME: This utility is currently required because the Rewriter currently +// does not rewrite this expression: +// {0, +, (sext ix (trunc iy to ix) to iy)} +// into {0, +, %step}, +// even when the following Equal predicate exists: +// "%step == (sext ix (trunc iy to ix) to iy)". +bool PredicatedScalarEvolution::areAddRecsEqualWithPreds( + const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { + if (AR1 == AR2) + return true; + + auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { + if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && + !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) + return false; + return true; + }; + + if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || + !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) + return false; + return true; +} + /// A helper function for createAddRecFromPHI to handle simple cases. /// /// This function tries to find an AddRec expression for the simplest (yet most @@ -4861,33 +4925,33 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { // indices form a positive value. if (GEP->isInBounds() && GEP->getOperand(0) == PN) { Flags = setFlags(Flags, SCEV::FlagNW); - + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) Flags = setFlags(Flags, SCEV::FlagNUW); } - + // We cannot transfer nuw and nsw flags from subtraction // operations -- sub nuw X, Y is not the same as add nuw X, -Y // for instance. } - + const SCEV *StartVal = getSCEV(StartValueV); const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - + // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the // entries for the scalars that use the symbolic expression. forgetSymbolicName(PN, SymbolicName); ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - + // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to // overflow. if (auto *BEInst = dyn_cast(BEValueV)) if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); - + return PHISCEV; } } @@ -6356,9 +6420,8 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl &Worklist) { BasicBlock *Header = L->getHeader(); // Push all Loop-header PHIs onto the Worklist stack. - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast(I); ++I) - Worklist.push_back(PN); + for (PHINode &PN : Header->phis()) + Worklist.push_back(&PN); } const ScalarEvolution::BackedgeTakenInfo & @@ -6416,11 +6479,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { SmallVector Worklist; PushLoopPHIs(L, Worklist); - SmallPtrSet Visited; + SmallPtrSet Discovered; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I).second) - continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); @@ -6435,13 +6496,37 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // own when it gets to that point. if (!isa(I) || !isa(Old)) { eraseValueFromMap(It->first); - forgetMemoizedResults(Old, false); + forgetMemoizedResults(Old); } if (PHINode *PN = dyn_cast(I)) ConstantEvolutionLoopExitValue.erase(PN); } - PushDefUseChildren(I, Worklist); + // Since we don't need to invalidate anything for correctness and we're + // only invalidating to make SCEV's results more precise, we get to stop + // early to avoid invalidating too much. This is especially important in + // cases like: + // + // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node + // loop0: + // %pn0 = phi + // ... + // loop1: + // %pn1 = phi + // ... + // + // where both loop0 and loop1's backedge taken count uses the SCEV + // expression for %v. If we don't have the early stop below then in cases + // like the above, getBackedgeTakenInfo(loop1) will clear out the trip + // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip + // count for loop1, effectively nullifying SCEV's trip count cache. + for (auto *U : I->users()) + if (auto *I = dyn_cast(U)) { + auto *LoopForUser = LI.getLoopFor(I->getParent()); + if (LoopForUser && L->contains(LoopForUser) && + Discovered.insert(I).second) + Worklist.push_back(I); + } } } @@ -6512,12 +6597,6 @@ void ScalarEvolution::forgetLoop(const Loop *L) { PushDefUseChildren(I, Worklist); } - for (auto I = ExitLimits.begin(); I != ExitLimits.end(); ++I) { - auto &Query = I->first; - if (Query.L == CurrL) - ExitLimits.erase(I); - } - LoopPropertiesCache.erase(CurrL); // Forget all contained loops too, to avoid dangling entries in the // ValuesAtScopes map. @@ -6779,18 +6858,6 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L, ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, - bool AllowPredicates) { - ExitLimitQuery Query(L, ExitingBlock, AllowPredicates); - auto MaybeEL = ExitLimits.find(Query); - if (MaybeEL != ExitLimits.end()) - return MaybeEL->second; - ExitLimit EL = computeExitLimitImpl(L, ExitingBlock, AllowPredicates); - ExitLimits.insert({Query, EL}); - return EL; -} - -ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimitImpl(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) { // Okay, we've chosen an exiting block. See what condition causes us to exit // at this block and remember the exit block and whether all other targets @@ -7055,17 +7122,18 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, bool ControlsExit, bool AllowPredicates) { // If the condition was exit on true, convert the condition to exit on false - ICmpInst::Predicate Cond; + ICmpInst::Predicate Pred; if (!L->contains(FBB)) - Cond = ExitCond->getPredicate(); + Pred = ExitCond->getPredicate(); else - Cond = ExitCond->getInversePredicate(); + Pred = ExitCond->getInversePredicate(); + const ICmpInst::Predicate OriginalPred = Pred; // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast(ExitCond->getOperand(1))) { ExitLimit ItCnt = - computeLoadConstantCompareExitLimit(LI, RHS, L, Cond); + computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); if (ItCnt.hasAnyInfo()) return ItCnt; } @@ -7082,11 +7150,11 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { // If there is a loop-invariant, force it into the RHS. std::swap(LHS, RHS); - Cond = ICmpInst::getSwappedPredicate(Cond); + Pred = ICmpInst::getSwappedPredicate(Pred); } // Simplify the operands before analyzing them. - (void)SimplifyICmpOperands(Cond, LHS, RHS); + (void)SimplifyICmpOperands(Pred, LHS, RHS); // If we have a comparison of a chrec against a constant, try to use value // ranges to answer this query. @@ -7095,13 +7163,13 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, if (AddRec->getLoop() == L) { // Form the constant range. ConstantRange CompRange = - ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt()); + ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa(Ret)) return Ret; } - switch (Cond) { + switch (Pred) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, @@ -7117,7 +7185,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, } case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) - bool IsSigned = Cond == ICmpInst::ICMP_SLT; + bool IsSigned = Pred == ICmpInst::ICMP_SLT; ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); if (EL.hasAnyInfo()) return EL; @@ -7125,7 +7193,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) - bool IsSigned = Cond == ICmpInst::ICMP_SGT; + bool IsSigned = Pred == ICmpInst::ICMP_SGT; ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, AllowPredicates); @@ -7143,7 +7211,7 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L, return ExhaustiveCount; return computeShiftCompareExitLimit(ExitCond->getOperand(0), - ExitCond->getOperand(1), L, Cond); + ExitCond->getOperand(1), L, OriginalPred); } ScalarEvolution::ExitLimit @@ -7587,12 +7655,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, if (!Latch) return nullptr; - for (auto &I : *Header) { - PHINode *PHI = dyn_cast(&I); - if (!PHI) break; - auto *StartCST = getOtherIncomingValue(PHI, Latch); - if (!StartCST) continue; - CurrentIterVals[PHI] = StartCST; + for (PHINode &PHI : Header->phis()) { + if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) + CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return RetVal = nullptr; @@ -7669,13 +7734,9 @@ const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Should follow from NumIncomingValues == 2!"); - for (auto &I : *Header) { - PHINode *PHI = dyn_cast(&I); - if (!PHI) - break; - auto *StartCST = getOtherIncomingValue(PHI, Latch); - if (!StartCST) continue; - CurrentIterVals[PHI] = StartCST; + for (PHINode &PHI : Header->phis()) { + if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) + CurrentIterVals[&PHI] = StartCST; } if (!CurrentIterVals.count(PN)) return getCouldNotCompute(); @@ -10684,7 +10745,6 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( std::move(Arg.PredicatedBackedgeTakenCounts)), - ExitLimits(std::move(Arg.ExitLimits)), ConstantEvolutionLoopExitValue( std::move(Arg.ConstantEvolutionLoopExitValue)), ValuesAtScopes(std::move(Arg.ValuesAtScopes)), @@ -11099,7 +11159,7 @@ bool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const { } void -ScalarEvolution::forgetMemoizedResults(const SCEV *S, bool EraseExitLimit) { +ScalarEvolution::forgetMemoizedResults(const SCEV *S) { ValuesAtScopes.erase(S); LoopDispositions.erase(S); BlockDispositions.erase(S); @@ -11132,13 +11192,6 @@ ScalarEvolution::forgetMemoizedResults(const SCEV *S, bool EraseExitLimit) { RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); - - // TODO: There is a suspicion that we only need to do it when there is a - // SCEVUnknown somewhere inside S. Need to check this. - if (EraseExitLimit) - for (auto I = ExitLimits.begin(), E = ExitLimits.end(); I != E; ++I) - if (I->second.hasOperand(S)) - ExitLimits.erase(I); } void ScalarEvolution::addToLoopUseLists(const SCEV *S) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index ee0bc37e3dce..53ce33bacbe9 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // generated code. if (isa(IP)) ScanLimit++; + + // Conservatively, do not use any instruction which has any of wrap/exact + // flags installed. + // TODO: Instead of simply disable poison instructions we can be clever + // here and match SCEV to this instruction. + auto canGeneratePoison = [](Instruction *I) { + if (isa(I) && + (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())) + return true; + if (isa(I) && I->isExact()) + return true; + return false; + }; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && - IP->getOperand(1) == RHS) + IP->getOperand(1) == RHS && !canGeneratePoison(&*IP)) return &*IP; if (IP == BlockBegin) break; } @@ -878,7 +891,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, if (IncV->mayHaveSideEffects()) return false; - if (IncV != PN) + if (IncV == PN) return true; return isNormalAddRecExprPHI(PN, IncV, L); @@ -1141,12 +1154,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertLoop && SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); - for (auto &I : *L->getHeader()) { - auto *PN = dyn_cast(&I); - if (!PN || !SE.isSCEVable(PN->getType())) + for (PHINode &PN : L->getHeader()->phis()) { + if (!SE.isSCEVable(PN.getType())) continue; - const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(PN)); + const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(&PN)); if (!PhiSCEV) continue; @@ -1158,16 +1170,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, continue; Instruction *TempIncV = - cast(PN->getIncomingValueForBlock(LatchBlock)); + cast(PN.getIncomingValueForBlock(LatchBlock)); // Check whether we can reuse this PHI node. if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, TempIncV, L)) + if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) continue; if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) continue; } else { - if (!isNormalAddRecExprPHI(PN, TempIncV, L)) + if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) continue; } @@ -1176,7 +1188,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IncV = TempIncV; TruncTy = nullptr; InvertStep = false; - AddRecPhiMatch = PN; + AddRecPhiMatch = &PN; break; } @@ -1186,7 +1198,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { // Record the phi node. But don't stop we might find an exact match // later. - AddRecPhiMatch = PN; + AddRecPhiMatch = &PN; IncV = TempIncV; TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); } @@ -1728,10 +1740,28 @@ Value *SCEVExpander::expand(const SCEV *S) { InsertPt = &*L->getHeader()->getFirstInsertionPt(); } } else { + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast(S)) { + if (const auto *SC = dyn_cast(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) && + SafeToHoist(S)) InsertPt = &*L->getHeader()->getFirstInsertionPt(); while (InsertPt->getIterator() != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || @@ -1828,12 +1858,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; - for (auto &I : *L->getHeader()) { - if (auto *PN = dyn_cast(&I)) - Phis.push_back(PN); - else - break; - } + for (PHINode &PN : L->getHeader()->phis()) + Phis.push_back(&PN); if (TTI) std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp index ada053cfc165..f12275aff387 100644 --- a/lib/Analysis/ScopedNoAliasAA.cpp +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -48,7 +48,7 @@ using namespace llvm; // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. static cl::opt EnableScopedNoAlias("enable-scoped-noalias", - cl::init(true)); + cl::init(true), cl::Hidden); namespace { @@ -102,12 +102,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS, if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (!mayAliasInScopes( CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), Loc.AATags.NoAlias)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS, Loc); } @@ -120,12 +120,12 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, if (!mayAliasInScopes( CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; if (!mayAliasInScopes( CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS1, CS2); } diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp new file mode 100644 index 000000000000..262299c5f3bb --- /dev/null +++ b/lib/Analysis/SyntheticCountsUtils.cpp @@ -0,0 +1,122 @@ +//===--- SyntheticCountsUtils.cpp - synthetic counts propagation utils ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for propagating synthetic counts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/SyntheticCountsUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +// Given a set of functions in an SCC, propagate entry counts to functions +// called by the SCC. +static void +propagateFromSCC(const SmallPtrSetImpl &SCCFunctions, + function_ref GetCallSiteRelFreq, + function_ref GetCount, + function_ref AddToCount) { + + SmallVector CallSites; + + // Gather all callsites in the SCC. + auto GatherCallSites = [&]() { + for (auto *F : SCCFunctions) { + assert(F && !F->isDeclaration()); + for (auto &I : instructions(F)) { + if (auto CS = CallSite(&I)) { + CallSites.push_back(CS); + } + } + } + }; + + GatherCallSites(); + + // Partition callsites so that the callsites that call functions in the same + // SCC come first. + auto Mid = partition(CallSites, [&](CallSite &CS) { + auto *Callee = CS.getCalledFunction(); + if (Callee) + return SCCFunctions.count(Callee); + // FIXME: Use the !callees metadata to propagate counts through indirect + // calls. + return 0U; + }); + + // For functions in the same SCC, update the counts in two steps: + // 1. Compute the additional count for each function by propagating the counts + // along all incoming edges to the function that originate from the same SCC + // and summing them up. + // 2. Add the additional counts to the functions in the SCC. + // This ensures that the order of + // traversal of functions within the SCC doesn't change the final result. + + DenseMap AdditionalCounts; + for (auto It = CallSites.begin(); It != Mid; It++) { + auto &CS = *It; + auto RelFreq = GetCallSiteRelFreq(CS); + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + RelFreq *= Scaled64(GetCount(Caller), 0); + uint64_t AdditionalCount = RelFreq.toInt(); + AdditionalCounts[Callee] += AdditionalCount; + } + + // Update the counts for the functions in the SCC. + for (auto &Entry : AdditionalCounts) + AddToCount(Entry.first, Entry.second); + + // Now update the counts for functions not in SCC. + for (auto It = Mid; It != CallSites.end(); It++) { + auto &CS = *It; + auto Weight = GetCallSiteRelFreq(CS); + Function *Callee = CS.getCalledFunction(); + Function *Caller = CS.getCaller(); + Weight *= Scaled64(GetCount(Caller), 0); + AddToCount(Callee, Weight.toInt()); + } +} + +/// Propgate synthetic entry counts on a callgraph. +/// +/// This performs a reverse post-order traversal of the callgraph SCC. For each +/// SCC, it first propagates the entry counts to the functions within the SCC +/// through call edges and updates them in one shot. Then the entry counts are +/// propagated to functions outside the SCC. +void llvm::propagateSyntheticCounts( + const CallGraph &CG, function_ref GetCallSiteRelFreq, + function_ref GetCount, + function_ref AddToCount) { + + SmallVector, 16> SCCs; + for (auto I = scc_begin(&CG); !I.isAtEnd(); ++I) { + auto SCC = *I; + + SmallPtrSet SCCFunctions; + for (auto *Node : SCC) { + Function *F = Node->getFunction(); + if (F && !F->isDeclaration()) { + SCCFunctions.insert(F); + } + } + SCCs.push_back(SCCFunctions); + } + + for (auto &SCCFunctions : reverse(SCCs)) + propagateFromSCC(SCCFunctions, GetCallSiteRelFreq, GetCount, AddToCount); +} diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index 47a84bd382a1..f655eaeca524 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -50,9 +50,9 @@ static bool hasSinCosPiStret(const Triple &T) { return true; } -/// initialize - Initialize the set of available library functions based on the -/// specified target triple. This should be carefully written so that a missing -/// target triple gets a sane set of defaults. +/// Initialize the set of available library functions based on the specified +/// target triple. This should be carefully written so that a missing target +/// triple gets a sane set of defaults. static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef StandardNames) { // Verify that the StandardNames array is in alphabetical order. @@ -182,6 +182,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_atanh); TLI.setUnavailable(LibFunc_atanhf); TLI.setUnavailable(LibFunc_atanhl); + TLI.setUnavailable(LibFunc_cabs); + TLI.setUnavailable(LibFunc_cabsf); + TLI.setUnavailable(LibFunc_cabsl); TLI.setUnavailable(LibFunc_cbrt); TLI.setUnavailable(LibFunc_cbrtf); TLI.setUnavailable(LibFunc_cbrtl); @@ -242,50 +245,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanhf); } - // These definitions are due to math-finite.h header on Linux - TLI.setUnavailable(LibFunc_acos_finite); - TLI.setUnavailable(LibFunc_acosf_finite); - TLI.setUnavailable(LibFunc_acosl_finite); - TLI.setUnavailable(LibFunc_acosh_finite); - TLI.setUnavailable(LibFunc_acoshf_finite); - TLI.setUnavailable(LibFunc_acoshl_finite); - TLI.setUnavailable(LibFunc_asin_finite); - TLI.setUnavailable(LibFunc_asinf_finite); - TLI.setUnavailable(LibFunc_asinl_finite); - TLI.setUnavailable(LibFunc_atan2_finite); - TLI.setUnavailable(LibFunc_atan2f_finite); - TLI.setUnavailable(LibFunc_atan2l_finite); - TLI.setUnavailable(LibFunc_atanh_finite); - TLI.setUnavailable(LibFunc_atanhf_finite); - TLI.setUnavailable(LibFunc_atanhl_finite); - TLI.setUnavailable(LibFunc_cosh_finite); - TLI.setUnavailable(LibFunc_coshf_finite); - TLI.setUnavailable(LibFunc_coshl_finite); - TLI.setUnavailable(LibFunc_exp10_finite); - TLI.setUnavailable(LibFunc_exp10f_finite); - TLI.setUnavailable(LibFunc_exp10l_finite); - TLI.setUnavailable(LibFunc_exp2_finite); - TLI.setUnavailable(LibFunc_exp2f_finite); - TLI.setUnavailable(LibFunc_exp2l_finite); - TLI.setUnavailable(LibFunc_exp_finite); - TLI.setUnavailable(LibFunc_expf_finite); - TLI.setUnavailable(LibFunc_expl_finite); - TLI.setUnavailable(LibFunc_log10_finite); - TLI.setUnavailable(LibFunc_log10f_finite); - TLI.setUnavailable(LibFunc_log10l_finite); - TLI.setUnavailable(LibFunc_log2_finite); - TLI.setUnavailable(LibFunc_log2f_finite); - TLI.setUnavailable(LibFunc_log2l_finite); - TLI.setUnavailable(LibFunc_log_finite); - TLI.setUnavailable(LibFunc_logf_finite); - TLI.setUnavailable(LibFunc_logl_finite); - TLI.setUnavailable(LibFunc_pow_finite); - TLI.setUnavailable(LibFunc_powf_finite); - TLI.setUnavailable(LibFunc_powl_finite); - TLI.setUnavailable(LibFunc_sinh_finite); - TLI.setUnavailable(LibFunc_sinhf_finite); - TLI.setUnavailable(LibFunc_sinhl_finite); - // Win32 does *not* provide provide these functions, but they are // generally available on POSIX-compliant systems: TLI.setUnavailable(LibFunc_access); @@ -457,6 +416,50 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_stat64); TLI.setUnavailable(LibFunc_statvfs64); TLI.setUnavailable(LibFunc_tmpfile64); + + // Relaxed math functions are included in math-finite.h on Linux (GLIBC). + TLI.setUnavailable(LibFunc_acos_finite); + TLI.setUnavailable(LibFunc_acosf_finite); + TLI.setUnavailable(LibFunc_acosl_finite); + TLI.setUnavailable(LibFunc_acosh_finite); + TLI.setUnavailable(LibFunc_acoshf_finite); + TLI.setUnavailable(LibFunc_acoshl_finite); + TLI.setUnavailable(LibFunc_asin_finite); + TLI.setUnavailable(LibFunc_asinf_finite); + TLI.setUnavailable(LibFunc_asinl_finite); + TLI.setUnavailable(LibFunc_atan2_finite); + TLI.setUnavailable(LibFunc_atan2f_finite); + TLI.setUnavailable(LibFunc_atan2l_finite); + TLI.setUnavailable(LibFunc_atanh_finite); + TLI.setUnavailable(LibFunc_atanhf_finite); + TLI.setUnavailable(LibFunc_atanhl_finite); + TLI.setUnavailable(LibFunc_cosh_finite); + TLI.setUnavailable(LibFunc_coshf_finite); + TLI.setUnavailable(LibFunc_coshl_finite); + TLI.setUnavailable(LibFunc_exp10_finite); + TLI.setUnavailable(LibFunc_exp10f_finite); + TLI.setUnavailable(LibFunc_exp10l_finite); + TLI.setUnavailable(LibFunc_exp2_finite); + TLI.setUnavailable(LibFunc_exp2f_finite); + TLI.setUnavailable(LibFunc_exp2l_finite); + TLI.setUnavailable(LibFunc_exp_finite); + TLI.setUnavailable(LibFunc_expf_finite); + TLI.setUnavailable(LibFunc_expl_finite); + TLI.setUnavailable(LibFunc_log10_finite); + TLI.setUnavailable(LibFunc_log10f_finite); + TLI.setUnavailable(LibFunc_log10l_finite); + TLI.setUnavailable(LibFunc_log2_finite); + TLI.setUnavailable(LibFunc_log2f_finite); + TLI.setUnavailable(LibFunc_log2l_finite); + TLI.setUnavailable(LibFunc_log_finite); + TLI.setUnavailable(LibFunc_logf_finite); + TLI.setUnavailable(LibFunc_logl_finite); + TLI.setUnavailable(LibFunc_pow_finite); + TLI.setUnavailable(LibFunc_powf_finite); + TLI.setUnavailable(LibFunc_powl_finite); + TLI.setUnavailable(LibFunc_sinh_finite); + TLI.setUnavailable(LibFunc_sinhf_finite); + TLI.setUnavailable(LibFunc_sinhl_finite); } // As currently implemented in clang, NVPTX code has no standard library to @@ -605,7 +608,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType()->isPointerTy() && FTy.getParamType(0) == FTy.getReturnType() && FTy.getParamType(1) == FTy.getReturnType() && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strcpy_chk: case LibFunc_stpcpy_chk: @@ -630,7 +633,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) && FTy.getParamType(0) == FTy.getParamType(1) && FTy.getParamType(0) == PCharTy && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strxfrm: return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() && @@ -645,7 +648,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) && FTy.getParamType(0)->isPointerTy() && FTy.getParamType(0) == FTy.getParamType(1) && - FTy.getParamType(2)->isIntegerTy()); + IsSizeTTy(FTy.getParamType(2))); case LibFunc_strspn: case LibFunc_strcspn: @@ -1267,6 +1270,25 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() && FTy.getReturnType()->isIntegerTy()); + case LibFunc_cabs: + case LibFunc_cabsf: + case LibFunc_cabsl: { + Type* RetTy = FTy.getReturnType(); + if (!RetTy->isFloatingPointTy()) + return false; + + // NOTE: These prototypes are target specific and currently support + // "complex" passed as an array or discrete real & imaginary parameters. + // Add other calling conventions to enable libcall optimizations. + if (NumParams == 1) + return (FTy.getParamType(0)->isArrayTy() && + FTy.getParamType(0)->getArrayNumElements() == 2 && + FTy.getParamType(0)->getArrayElementType() == RetTy); + else if (NumParams == 2) + return (FTy.getParamType(0) == RetTy && FTy.getParamType(1) == RetTy); + else + return false; + } case LibFunc::NumLibFuncs: break; } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 7feb40da2718..b744cae51ed7 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -26,11 +26,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "tti" -static cl::opt UseWideMemcpyLoopLowering( - "use-wide-memcpy-loop-lowering", cl::init(false), - cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), - cl::Hidden); - static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); @@ -547,10 +542,6 @@ void TargetTransformInfo::getMemcpyLoopResidualLoweringType( SrcAlign, DestAlign); } -bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { - return UseWideMemcpyLoopLowering; -} - bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 1e36e314b864..173db399b9d6 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -142,7 +142,7 @@ using namespace llvm; // A handy option for disabling TBAA functionality. The same effect can also be // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. -static cl::opt EnableTBAA("enable-tbaa", cl::init(true)); +static cl::opt EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden); namespace { @@ -371,7 +371,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS, Loc); } @@ -386,7 +386,7 @@ ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, if (const MDNode *M2 = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) - return MRI_NoModRef; + return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(CS1, CS2); } @@ -544,21 +544,32 @@ static bool matchAccessTags(const MDNode *A, const MDNode *B, TBAAStructTagNode TagA(A), TagB(B); const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(), TagB.getAccessType()); - if (GenericTag) - *GenericTag = createAccessTag(CommonType); // TODO: We need to check if AccessType of TagA encloses AccessType of // TagB to support aggregate AccessType. If yes, return true. // Climb the type DAG from base type of A to see if we reach base type of B. uint64_t OffsetA; - if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) - return OffsetA == TagB.getOffset(); + if (findAccessType(TagA, TagB.getBaseType(), OffsetA)) { + bool SameMemberAccess = OffsetA == TagB.getOffset(); + if (GenericTag) + *GenericTag = SameMemberAccess ? TagB.getNode() : + createAccessTag(CommonType); + return SameMemberAccess; + } // Climb the type DAG from base type of B to see if we reach base type of A. uint64_t OffsetB; - if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) - return OffsetB == TagA.getOffset(); + if (findAccessType(TagB, TagA.getBaseType(), OffsetB)) { + bool SameMemberAccess = OffsetB == TagA.getOffset(); + if (GenericTag) + *GenericTag = SameMemberAccess ? TagA.getNode() : + createAccessTag(CommonType); + return SameMemberAccess; + } + + if (GenericTag) + *GenericTag = createAccessTag(CommonType); // If the final access types have different roots, they're part of different // potentially unrelated type systems, so we must be conservative. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 106a4a71f93a..6a322438f5ae 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -336,21 +336,78 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, } } - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conservative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - unsigned TrailZ = Known.countMinTrailingZeros() + - Known2.countMinTrailingZeros(); + assert(!Known.hasConflict() && !Known2.hasConflict()); + // Compute a conservative estimate for high known-0 bits. unsigned LeadZ = std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); + + // The result of the bottom bits of an integer multiply can be + // inferred by looking at the bottom bits of both operands and + // multiplying them together. + // We can infer at least the minimum number of known trailing bits + // of both operands. Depending on number of trailing zeros, we can + // infer more bits, because (a*b) <=> ((a/m) * (b/n)) * (m*n) assuming + // a and b are divisible by m and n respectively. + // We then calculate how many of those bits are inferrable and set + // the output. For example, the i8 mul: + // a = XXXX1100 (12) + // b = XXXX1110 (14) + // We know the bottom 3 bits are zero since the first can be divided by + // 4 and the second by 2, thus having ((12/4) * (14/2)) * (2*4). + // Applying the multiplication to the trimmed arguments gets: + // XX11 (3) + // X111 (7) + // ------- + // XX11 + // XX11 + // XX11 + // XX11 + // ------- + // XXXXX01 + // Which allows us to infer the 2 LSBs. Since we're multiplying the result + // by 8, the bottom 3 bits will be 0, so we can infer a total of 5 bits. + // The proof for this can be described as: + // Pre: (C1 >= 0) && (C1 < (1 << C5)) && (C2 >= 0) && (C2 < (1 << C6)) && + // (C7 == (1 << (umin(countTrailingZeros(C1), C5) + + // umin(countTrailingZeros(C2), C6) + + // umin(C5 - umin(countTrailingZeros(C1), C5), + // C6 - umin(countTrailingZeros(C2), C6)))) - 1) + // %aa = shl i8 %a, C5 + // %bb = shl i8 %b, C6 + // %aaa = or i8 %aa, C1 + // %bbb = or i8 %bb, C2 + // %mul = mul i8 %aaa, %bbb + // %mask = and i8 %mul, C7 + // => + // %mask = i8 ((C1*C2)&C7) + // Where C5, C6 describe the known bits of %a, %b + // C1, C2 describe the known bottom bits of %a, %b. + // C7 describes the mask of the known bits of the result. + APInt Bottom0 = Known.One; + APInt Bottom1 = Known2.One; + + // How many times we'd be able to divide each argument by 2 (shr by 1). + // This gives us the number of trailing zeros on the multiplication result. + unsigned TrailBitsKnown0 = (Known.Zero | Known.One).countTrailingOnes(); + unsigned TrailBitsKnown1 = (Known2.Zero | Known2.One).countTrailingOnes(); + unsigned TrailZero0 = Known.countMinTrailingZeros(); + unsigned TrailZero1 = Known2.countMinTrailingZeros(); + unsigned TrailZ = TrailZero0 + TrailZero1; + + // Figure out the fewest known-bits operand. + unsigned SmallestOperand = std::min(TrailBitsKnown0 - TrailZero0, + TrailBitsKnown1 - TrailZero1); + unsigned ResultBitsKnown = std::min(SmallestOperand + TrailZ, BitWidth); + + APInt BottomKnown = Bottom0.getLoBits(TrailBitsKnown0) * + Bottom1.getLoBits(TrailBitsKnown1); + Known.resetAll(); - Known.Zero.setLowBits(TrailZ); Known.Zero.setHighBits(LeadZ); + Known.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown); + Known.One |= BottomKnown.getLoBits(ResultBitsKnown); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -426,7 +483,7 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { } // Is this an intrinsic that cannot be speculated but also cannot trap? -static bool isAssumeLikeIntrinsic(const Instruction *I) { +bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { if (const CallInst *CI = dyn_cast(I)) if (Function *F = CI->getCalledFunction()) switch (F->getIntrinsicID()) { @@ -548,7 +605,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, m_BitCast(m_Specific(V)))); CmpInst::Predicate Pred; - ConstantInt *C; + uint64_t C; // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { @@ -650,51 +707,55 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + RHSKnown.Zero.lshrInPlace(C); Known.Zero |= RHSKnown.Zero; - RHSKnown.One.lshrInPlace(C->getZExtValue()); + RHSKnown.One.lshrInPlace(C); Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - RHSKnown.One.lshrInPlace(C->getZExtValue()); + RHSKnown.One.lshrInPlace(C); Known.Zero |= RHSKnown.One; - RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + RHSKnown.Zero.lshrInPlace(C); Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - Known.Zero |= RHSKnown.Zero << C->getZExtValue(); - Known.One |= RHSKnown.One << C->getZExtValue(); + Known.Zero |= RHSKnown.Zero << C; + Known.One |= RHSKnown.One << C; // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && - isValidAssumeForContext(I, Q.CxtI, Q.DT)) { + isValidAssumeForContext(I, Q.CxtI, Q.DT) && + C < BitWidth) { KnownBits RHSKnown(BitWidth); computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - Known.Zero |= RHSKnown.One << C->getZExtValue(); - Known.One |= RHSKnown.Zero << C->getZExtValue(); + Known.Zero |= RHSKnown.One << C; + Known.One |= RHSKnown.Zero << C; // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && @@ -2203,9 +2264,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { - unsigned ShAmtLimited = ShAmt->getZExtValue(); - if (ShAmtLimited >= TyBits) + if (ShAmt->uge(TyBits)) break; // Bad shift. + unsigned ShAmtLimited = ShAmt->getZExtValue(); Tmp += ShAmtLimited; if (Tmp > TyBits) Tmp = TyBits; } @@ -2216,9 +2277,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q); + if (ShAmt->uge(TyBits) || // Bad shift. + ShAmt->uge(Tmp)) break; // Shifted all sign bits out. Tmp2 = ShAmt->getZExtValue(); - if (Tmp2 >= TyBits || // Bad shift. - Tmp2 >= Tmp) break; // Shifted all sign bits out. return Tmp - Tmp2; } break; @@ -3507,7 +3568,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) || // Speculative load may load data from dirty regions. - LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)) + LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + LI->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); return isDereferenceableAndAlignedPointer(LI->getPointerOperand(), @@ -4099,6 +4161,100 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred, return {SPF_UNKNOWN, SPNB_NA, false}; } +/// Recognize variations of: +/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c)) +static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred, + Value *CmpLHS, Value *CmpRHS, + Value *TrueVal, Value *FalseVal) { + // TODO: Allow FP min/max with nnan/nsz. + assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison"); + + Value *A, *B; + SelectPatternResult L = matchSelectPattern(TrueVal, A, B); + if (!SelectPatternResult::isMinOrMax(L.Flavor)) + return {SPF_UNKNOWN, SPNB_NA, false}; + + Value *C, *D; + SelectPatternResult R = matchSelectPattern(FalseVal, C, D); + if (L.Flavor != R.Flavor) + return {SPF_UNKNOWN, SPNB_NA, false}; + + // We have something like: x Pred y ? min(a, b) : min(c, d). + // Try to match the compare to the min/max operations of the select operands. + // First, make sure we have the right compare predicate. + switch (L.Flavor) { + case SPF_SMIN: + if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(CmpLHS, CmpRHS); + } + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) + break; + return {SPF_UNKNOWN, SPNB_NA, false}; + case SPF_SMAX: + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(CmpLHS, CmpRHS); + } + if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) + break; + return {SPF_UNKNOWN, SPNB_NA, false}; + case SPF_UMIN: + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(CmpLHS, CmpRHS); + } + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) + break; + return {SPF_UNKNOWN, SPNB_NA, false}; + case SPF_UMAX: + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { + Pred = ICmpInst::getSwappedPredicate(Pred); + std::swap(CmpLHS, CmpRHS); + } + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) + break; + return {SPF_UNKNOWN, SPNB_NA, false}; + default: + return {SPF_UNKNOWN, SPNB_NA, false}; + } + + // If there is a common operand in the already matched min/max and the other + // min/max operands match the compare operands (either directly or inverted), + // then this is min/max of the same flavor. + + // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b)) + if (D == B) { + if ((CmpLHS == A && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } + // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) + // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d)) + if (C == B) { + if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(A, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } + // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) + // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a)) + if (D == A) { + if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } + // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) + // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d)) + if (C == A) { + if ((CmpLHS == B && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) && + match(B, m_Not(m_Specific(CmpRHS))))) + return {L.Flavor, SPNB_NA, false}; + } + + return {SPF_UNKNOWN, SPNB_NA, false}; +} + /// Match non-obvious integer minimum and maximum sequences. static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, Value *CmpLHS, Value *CmpRHS, @@ -4112,6 +4268,10 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred, if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) return SPR; + SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal); + if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN) + return SPR; + if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT) return {SPF_UNKNOWN, SPNB_NA, false}; @@ -4176,14 +4336,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, LHS = CmpLHS; RHS = CmpRHS; - // If the predicate is an "or-equal" (FP) predicate, then signed zeroes may - // return inconsistent results between implementations. - // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 - // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) - // Therefore we behave conservatively and only proceed if at least one of the - // operands is known to not be zero, or if we don't care about signed zeroes. + // Signed zero may return inconsistent results between implementations. + // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 + // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) + // Therefore, we behave conservatively and only proceed if at least one of the + // operands is known to not be zero or if we don't care about signed zero. switch (Pred) { default: break; + // FIXME: Include OGT/OLT/UGT/ULT. case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && @@ -4431,14 +4591,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, // Deal with type mismatches. if (CastOp && CmpLHS->getType() != TrueVal->getType()) { - if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) + if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, cast(TrueVal)->getOperand(0), C, LHS, RHS); - if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) + } + if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) { + // If this is a potential fmin/fmax with a cast to integer, then ignore + // -0.0 because there is no corresponding integer value. + if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI) + FMF.setNoSignedZeros(); return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, C, cast(FalseVal)->getOperand(0), LHS, RHS); + } } return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index b8b56d79c827..d8be4ad42ad5 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -664,6 +664,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(strictfp); KEYWORD(safestack); KEYWORD(sanitize_address); + KEYWORD(sanitize_hwaddress); KEYWORD(sanitize_thread); KEYWORD(sanitize_memory); KEYWORD(swifterror); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 5b661ceb1800..f94b616e5968 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -715,6 +715,13 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { (GlobalValue::VisibilityTypes)V == GlobalValue::DefaultVisibility; } +// If there was an explicit dso_local, update GV. In the absence of an explicit +// dso_local we keep the default value. +static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) { + if (DSOLocal) + GV.setDSOLocal(true); +} + /// parseIndirectSymbol: /// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier /// OptionalVisibility OptionalDLLStorageClass @@ -749,11 +756,6 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc, return Error(NameLoc, "symbol with local linkage must have default visibility"); - if (DSOLocal && !IsAlias) { - return Error(NameLoc, - "dso_local is invalid on ifunc"); - } - Type *Ty; LocTy ExplicitTypeLoc = Lex.getLoc(); if (ParseType(Ty) || @@ -826,7 +828,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc, GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); GA->setUnnamedAddr(UnnamedAddr); - GA->setDSOLocal(DSOLocal); + maybeSetDSOLocal(DSOLocal, *GA); if (Name.empty()) NumberedVals.push_back(GA.get()); @@ -947,7 +949,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setInitializer(Init); GV->setConstant(IsConstant); GV->setLinkage((GlobalValue::LinkageTypes)Linkage); - GV->setDSOLocal(DSOLocal); + maybeSetDSOLocal(DSOLocal, *GV); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); GV->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); GV->setExternallyInitialized(IsExternallyInitialized); @@ -1144,6 +1146,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break; case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break; + case lltok::kw_sanitize_hwaddress: + B.addAttribute(Attribute::SanitizeHWAddress); break; case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break; case lltok::kw_sanitize_memory: @@ -1468,6 +1472,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_optsize: case lltok::kw_returns_twice: case lltok::kw_sanitize_address: + case lltok::kw_sanitize_hwaddress: case lltok::kw_sanitize_memory: case lltok::kw_sanitize_thread: case lltok::kw_ssp: @@ -1560,6 +1565,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_optsize: case lltok::kw_returns_twice: case lltok::kw_sanitize_address: + case lltok::kw_sanitize_hwaddress: case lltok::kw_sanitize_memory: case lltok::kw_sanitize_thread: case lltok::kw_ssp: @@ -4919,7 +4925,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { NumberedVals.push_back(Fn); Fn->setLinkage((GlobalValue::LinkageTypes)Linkage); - Fn->setDSOLocal(DSOLocal); + maybeSetDSOLocal(DSOLocal, *Fn); Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility); Fn->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); Fn->setCallingConv(CC); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 0c5cf6b5d455..ad826cc4fd21 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -172,6 +172,7 @@ enum Kind { kw_alwaysinline, kw_argmemonly, kw_sanitize_address, + kw_sanitize_hwaddress, kw_builtin, kw_byval, kw_inalloca, diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp index 86e3b02577fd..593ce7a1965c 100644 --- a/lib/BinaryFormat/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -567,6 +567,17 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) { return StringRef(); } +StringRef llvm::dwarf::IndexString(unsigned Idx) { + switch (Idx) { + default: + return StringRef(); +#define HANDLE_DW_IDX(ID, NAME) \ + case DW_IDX_##NAME: \ + return "DW_IDX_" #NAME; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk) { if (FormVendor(F) == DWARF_VENDOR_DWARF) { diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index f64785b3ad92..3ec45956b3e5 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -10,7 +10,6 @@ #include "llvm-c/BitReader.h" #include "llvm-c/Core.h" #include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 0b03d0062d98..7ffa62488799 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1156,6 +1156,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::WriteOnly: return 1ULL << 53; case Attribute::Speculatable: return 1ULL << 54; case Attribute::StrictFP: return 1ULL << 55; + case Attribute::SanitizeHWAddress: return 1ULL << 56; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -1368,6 +1369,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::StructRet; case bitc::ATTR_KIND_SANITIZE_ADDRESS: return Attribute::SanitizeAddress; + case bitc::ATTR_KIND_SANITIZE_HWADDRESS: + return Attribute::SanitizeHWAddress; case bitc::ATTR_KIND_SANITIZE_THREAD: return Attribute::SanitizeThread; case bitc::ATTR_KIND_SANITIZE_MEMORY: @@ -3051,14 +3054,17 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( // FIXME: Change to an error if non-default in 4.0. NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); } - if (OpNum != Record.size()) - NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); - else - upgradeDLLImportExportLinkage(NewGA, Linkage); - if (OpNum != Record.size()) - NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); - if (OpNum != Record.size()) - NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++])); + if (BitCode == bitc::MODULE_CODE_ALIAS || + BitCode == bitc::MODULE_CODE_ALIAS_OLD) { + if (OpNum != Record.size()) + NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); + else + upgradeDLLImportExportLinkage(NewGA, Linkage); + if (OpNum != Record.size()) + NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); + if (OpNum != Record.size()) + NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++])); + } if (OpNum != Record.size()) NewGA->setDSOLocal(getDecodedDSOLocal(Record[OpNum++])); ValueList.push_back(NewGA); @@ -4807,8 +4813,12 @@ void ModuleSummaryIndexBitcodeReader::setValueGUID( if (PrintSummaryGUIDs) dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " << ValueName << "\n"; - ValueIdToValueInfoMap[ValueID] = - std::make_pair(TheIndex.getOrInsertValueInfo(ValueGUID), OriginalNameID); + + // UseStrtab is false for legacy summary formats and value names are + // created on stack. We can't use them outside of parseValueSymbolTable. + ValueIdToValueInfoMap[ValueID] = std::make_pair( + TheIndex.getOrInsertValueInfo(ValueGUID, UseStrtab ? ValueName : ""), + OriginalNameID); } // Specialized value symbol table parser used when reading module index @@ -5199,6 +5209,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { if (!AliaseeInModule) return error("Alias expects aliasee summary to be parsed"); AS->setAliasee(AliaseeInModule); + AS->setAliaseeGUID(AliaseeGUID); auto GUID = getValueInfoFromValueId(ValueID); AS->setOriginalName(GUID.second); @@ -5285,9 +5296,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { getValueInfoFromValueId(AliaseeValueId).first.getGUID(); auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath()); - if (!AliaseeInModule) - return error("Alias expects aliasee summary to be parsed"); AS->setAliasee(AliaseeInModule); + AS->setAliaseeGUID(AliaseeGUID); ValueInfo VI = getValueInfoFromValueId(ValueID).first; LastSeenGUID = VI.getGUID(); @@ -5673,7 +5683,8 @@ Expected> BitcodeModule::getSummary() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); - auto Index = llvm::make_unique(); + auto Index = + llvm::make_unique(/*IsPerformingAnalysis=*/false); ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, ModuleIdentifier, 0); diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index daae5edbd170..7d79956a8118 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -21,7 +21,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitstreamReader.h" @@ -30,7 +29,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" @@ -39,7 +37,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" @@ -59,7 +56,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/OperandTraits.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" #include "llvm/IR/ValueHandle.h" @@ -169,7 +165,7 @@ class BitcodeReaderMetadataList { /// necessary. Metadata *getMetadataFwdRef(unsigned Idx); - /// Return the the given metadata only if it is fully resolved. + /// Return the given metadata only if it is fully resolved. /// /// Gives the same result as \a lookup(), unless \a MDNode::isResolved() /// would give \c false. diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 03a77c9734e4..f3f33c4474bd 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -413,7 +413,7 @@ class IndexBitcodeWriter : public BitcodeWriterBase { // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. - forEachSummary([&](GVInfo I) { + forEachSummary([&](GVInfo I, bool) { GUIDToValueIdMap[I.first] = ++GlobalValueId; }); } @@ -428,12 +428,18 @@ class IndexBitcodeWriter : public BitcodeWriterBase { void forEachSummary(Functor Callback) { if (ModuleToSummariesForIndex) { for (auto &M : *ModuleToSummariesForIndex) - for (auto &Summary : M.second) - Callback(Summary); + for (auto &Summary : M.second) { + Callback(Summary, false); + // Ensure aliasee is handled, e.g. for assigning a valueId, + // even if we are not importing the aliasee directly (the + // imported alias will contain a copy of aliasee). + if (auto *AS = dyn_cast(Summary.getSecond())) + Callback({AS->getAliaseeGUID(), &AS->getAliasee()}, true); + } } else { for (auto &Summaries : Index) for (auto &Summary : Summaries.second.SummaryList) - Callback({Summaries.first, Summary.get()}); + Callback({Summaries.first, Summary.get()}, false); } } @@ -663,6 +669,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_STRUCT_RET; case Attribute::SanitizeAddress: return bitc::ATTR_KIND_SANITIZE_ADDRESS; + case Attribute::SanitizeHWAddress: + return bitc::ATTR_KIND_SANITIZE_HWADDRESS; case Attribute::SanitizeThread: return bitc::ATTR_KIND_SANITIZE_THREAD; case Attribute::SanitizeMemory: @@ -1294,7 +1302,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver - // val#, linkage, visibility] + // val#, linkage, visibility, DSO_Local] Vals.push_back(addToStrtab(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); @@ -1302,6 +1310,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back(VE.getValueID(I.getResolver())); Vals.push_back(getEncodedLinkage(I)); Vals.push_back(getEncodedVisibility(I)); + Vals.push_back(I.isDSOLocal()); Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals); Vals.clear(); } @@ -3363,7 +3372,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); - bool HasProfileData = F.getEntryCount().hasValue(); + bool HasProfileData = F.hasProfileData(); for (auto &ECI : FS->calls()) { NameVals.push_back(getValueId(ECI.first)); if (HasProfileData) @@ -3602,7 +3611,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; - forEachSummary([&](GVInfo I) { + forEachSummary([&](GVInfo I, bool IsAliasee) { GlobalValueSummary *S = I.second; assert(S); @@ -3610,6 +3619,12 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { assert(ValueId); SummaryToValueIdMap[S] = *ValueId; + // If this is invoked for an aliasee, we want to record the above + // mapping, but then not emit a summary entry (if the aliasee is + // to be imported, we will invoke this separately with IsAliasee=false). + if (IsAliasee) + return; + if (auto *AS = dyn_cast(S)) { // Will process aliases as a post-pass because the reader wants all // global to be loaded first. diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index e527110872a4..9ed7450d5cb6 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -141,7 +141,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); DEBUG(for (unsigned r : CriticalPathSet.set_bits()) - dbgs() << " " << TRI->getName(r)); + dbgs() << " " << printReg(r, TRI)); DEBUG(dbgs() << '\n'); } @@ -216,7 +216,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, // schedule region). if (State->IsLive(Reg)) { DEBUG(if (State->GetGroup(Reg) != 0) - dbgs() << " " << TRI->getName(Reg) << "=g" << + dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg) << "->g0(region live-out)"); State->UnionGroups(Reg, 0); } else if ((DefIndices[Reg] < InsertPosIndex) @@ -323,7 +323,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, RegRefs.erase(Reg); State->LeaveGroup(Reg); DEBUG(if (header) { - dbgs() << header << TRI->getName(Reg); header = nullptr; }); + dbgs() << header << printReg(Reg, TRI); header = nullptr; }); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); // Repeat for subregisters. Note that we only do this if the superregister // was not live because otherwise, regardless whether we have an explicit @@ -337,8 +337,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); DEBUG(if (header) { - dbgs() << header << TRI->getName(Reg); header = nullptr; }); - DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + dbgs() << header << printReg(Reg, TRI); header = nullptr; }); + DEBUG(dbgs() << " " << printReg(SubregReg, TRI) << "->g" << State->GetGroup(SubregReg) << tag); } } @@ -374,7 +374,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers @@ -393,8 +393,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction( unsigned AliasReg = *AI; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); - DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << - TRI->getName(AliasReg) << ")"); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " + << printReg(AliasReg, TRI) << ")"); } } @@ -448,11 +448,11 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, // FIXME: The issue with predicated instruction is more complex. We are being // conservatively here because the kill markers cannot be trusted after // if-conversion: - // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] // ... - // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] - // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] - // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -469,8 +469,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << - State->GetGroup(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g" << State->GetGroup(Reg)); // It wasn't previously live but now it is, this is a kill. Forget // the previous live-range information and start a new live-range @@ -505,10 +504,10 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, if (Reg == 0) continue; if (FirstReg != 0) { - DEBUG(dbgs() << "=" << TRI->getName(Reg)); + DEBUG(dbgs() << "=" << printReg(Reg, TRI)); State->UnionGroups(FirstReg, Reg); } else { - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); FirstReg = Reg; } } @@ -574,7 +573,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // If Reg has any references, then collect possible rename regs if (RegRefs.count(Reg) > 0) { - DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); + DEBUG(dbgs() << "\t\t" << printReg(Reg, TRI) << ":"); BitVector &BV = RenameRegisterMap[Reg]; assert(BV.empty()); @@ -583,7 +582,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG({ dbgs() << " ::"; for (unsigned r : BV.set_bits()) - dbgs() << " " << TRI->getName(r); + dbgs() << " " << printReg(r, TRI); dbgs() << "\n"; }); } @@ -608,8 +607,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (renamecnt++ % DebugDiv != DebugMod) return false; - dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << - " for debug ***\n"; + dbgs() << "*** Performing rename " << printReg(SuperReg, TRI) + << " for debug ***\n"; } #endif @@ -646,7 +645,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // Don't replace a register with itself. if (NewSuperReg == SuperReg) continue; - DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); + DEBUG(dbgs() << " [" << printReg(NewSuperReg, TRI) << ':'); RenameMap.clear(); // For each referenced group register (which must be a SuperReg or @@ -663,7 +662,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); } - DEBUG(dbgs() << " " << TRI->getName(NewReg)); + DEBUG(dbgs() << " " << printReg(NewReg, TRI)); // Check if Reg can be renamed to NewReg. if (!RenameRegisterMap[Reg].test(NewReg)) { @@ -684,7 +683,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned AliasReg = *AI; if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { - DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); + DEBUG(dbgs() << "(alias " << printReg(AliasReg, TRI) << " live)"); found = true; break; } @@ -793,7 +792,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (!State->IsLive(Reg)) - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); } DEBUG(dbgs() << '\n'); #endif @@ -849,7 +848,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( (Edge->getKind() != SDep::Output)) continue; unsigned AntiDepReg = Edge->getReg(); - DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); + DEBUG(dbgs() << "\tAntidep reg: " << printReg(AntiDepReg, TRI)); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); if (!MRI.isAllocatable(AntiDepReg)) { @@ -952,7 +951,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::map RenameMap; if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { DEBUG(dbgs() << "\tBreaking anti-dependence edge on " - << TRI->getName(AntiDepReg) << ":"); + << printReg(AntiDepReg, TRI) << ":"); // Handle each group register... for (std::map::iterator @@ -960,9 +959,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned CurrReg = S->first; unsigned NewReg = S->second; - DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << - TRI->getName(NewReg) << "(" << - RegRefs.count(CurrReg) << " refs)"); + DEBUG(dbgs() << " " << printReg(CurrReg, TRI) << "->" + << printReg(NewReg, TRI) << "(" + << RegRefs.count(CurrReg) << " refs)"); // Update the references to the old register CurrReg to // refer to the new register NewReg. diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 7fa66258e36c..0731ae575437 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -669,7 +668,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) { int EntryBBNumber = MF.front().getNumber(); bool IsSEH = isAsynchronousEHPersonality( - classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + classifyEHPersonality(MF.getFunction().getPersonalityFn())); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SmallVector FuncletBlocks; diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 3b10a7326115..15cfbd5c40ff 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -12,20 +12,13 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -67,16 +60,16 @@ void ARMException::beginFunction(const MachineFunction *MF) { /// void ARMException::endFunction(const MachineFunction *MF) { ARMTargetStreamer &ATS = getTargetStreamer(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); const Function *Per = nullptr; - if (F->hasPersonalityFn()) - Per = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); bool forceEmitPersonality = - F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && - F->needsUnwindTableEntry(); + F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F.needsUnwindTableEntry(); bool shouldEmitPersonality = forceEmitPersonality || !MF->getLandingPads().empty(); - if (!Asm->MF->getFunction()->needsUnwindTableEntry() && + if (!Asm->MF->getFunction().needsUnwindTableEntry() && !shouldEmitPersonality) ATS.emitCantUnwind(); else if (shouldEmitPersonality) { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c9b0f9aa556f..87d91377dc90 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -16,6 +16,7 @@ #include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "WinCFGuard.h" #include "WinException.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -130,6 +131,8 @@ static const char *const DbgTimerName = "emit"; static const char *const DbgTimerDescription = "Debug Info Emission"; static const char *const EHTimerName = "write_exception"; static const char *const EHTimerDescription = "DWARF Exception Writer"; +static const char *const CFGuardName = "Control Flow Guard"; +static const char *const CFGuardDescription = "Control Flow Guard Tables"; static const char *const CodeViewLineTablesGroupName = "linetables"; static const char *const CodeViewLineTablesGroupDescription = "CodeView Line Tables"; @@ -254,28 +257,8 @@ bool AsmPrinter::doInitialization(Module &M) { // alternative is duplicated code in each of the target asm printers that // use the directive, where it would need the same conditionalization // anyway. - const Triple &TT = TM.getTargetTriple(); - // If there is a version specified, Major will be non-zero. - if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) { - unsigned Major, Minor, Update; - MCVersionMinType VersionType; - if (TT.isWatchOS()) { - VersionType = MCVM_WatchOSVersionMin; - TT.getWatchOSVersion(Major, Minor, Update); - } else if (TT.isTvOS()) { - VersionType = MCVM_TvOSVersionMin; - TT.getiOSVersion(Major, Minor, Update); - } else if (TT.isMacOSX()) { - VersionType = MCVM_OSXVersionMin; - if (!TT.getMacOSXVersion(Major, Minor, Update)) - Major = 0; - } else { - VersionType = MCVM_IOSVersionMin; - TT.getiOSVersion(Major, Minor, Update); - } - if (Major != 0) - OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); - } + const Triple &Target = TM.getTargetTriple(); + OutStreamer->EmitVersionForTarget(Target); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -374,6 +357,13 @@ bool AsmPrinter::doInitialization(Module &M) { if (ES) Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription, DWARFGroupName, DWARFGroupDescription)); + + if (mdconst::extract_or_null( + MMI->getModule()->getModuleFlag("cfguard"))) + Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName, + CFGuardDescription, DWARFGroupName, + DWARFGroupDescription)); + return false; } @@ -641,35 +631,35 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); if (isVerbose()) OutStreamer->GetCommentOS() << "-- Begin function " - << GlobalValue::dropLLVMManglingEscape(F->getName()) << '\n'; + << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; // Print out constants referenced by the function EmitConstantPool(); // Print the 'header' of function. - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); - EmitVisibility(CurrentFnSym, F->getVisibility()); + OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); + EmitVisibility(CurrentFnSym, F.getVisibility()); - EmitLinkage(F, CurrentFnSym); + EmitLinkage(&F, CurrentFnSym); if (MAI->hasFunctionAlignment()) - EmitAlignment(MF->getAlignment(), F); + EmitAlignment(MF->getAlignment(), &F); if (MAI->hasDotTypeDotSizeDirective()) OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); if (isVerbose()) { - F->printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, F->getParent()); + F.printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, F.getParent()); OutStreamer->GetCommentOS() << '\n'; } // Emit the prefix data. - if (F->hasPrefixData()) { + if (F.hasPrefixData()) { if (MAI->hasSubsectionsViaSymbols()) { // Preserving prefix data on platforms which use subsections-via-symbols // is a bit tricky. Here we introduce a symbol for the prefix data @@ -678,12 +668,12 @@ void AsmPrinter::EmitFunctionHeader() { MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); OutStreamer->EmitLabel(PrefixSym); - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); // Emit an .alt_entry directive for the actual function symbol. OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); } else { - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); } } @@ -695,7 +685,7 @@ void AsmPrinter::EmitFunctionHeader() { // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. std::vector DeadBlockSyms; - MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); + MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { OutStreamer->AddComment("Address taken block that was later removed"); OutStreamer->EmitLabel(DeadBlockSyms[i]); @@ -720,8 +710,8 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit the prologue data. - if (F->hasPrologueData()) - EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData()); + if (F.hasPrologueData()) + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -742,7 +732,9 @@ void AsmPrinter::EmitFunctionEntryLabel() { } /// emitComments - Pretty-print comments for instructions. -static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, +/// It returns true iff the sched comment was emitted. +/// Otherwise it returns false. +static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS, AsmPrinter *AP) { const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -786,12 +778,16 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, CommentOS << " Reload Reuse"; } - if (Commented && AP->EnablePrintSchedInfo) - // If any comment was added above and we need sched info comment then - // add this new comment just after the above comment w/o "\n" between them. - CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; - else if (Commented) + if (Commented) { + if (AP->EnablePrintSchedInfo) { + // If any comment was added above and we need sched info comment then add + // this new comment just after the above comment w/o "\n" between them. + CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; + return true; + } CommentOS << "\n"; + } + return false; } /// emitImplicitDef - This method emits the specified machine instruction @@ -815,10 +811,8 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); - OS << ' ' - << printReg(Op.getReg(), - AP.MF->getSubtarget().getRegisterInfo()) - << (Op.isDef() ? "" : ""); + OS << ' ' << (Op.isDef() ? "def " : "killed ") + << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); } AP.OutStreamer->AddComment(OS.str()); AP.OutStreamer->AddBlankLine(); @@ -916,7 +910,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && - MF->getFunction()->needsUnwindTableEntry()) + MF->getFunction().needsUnwindTableEntry()) return CFI_M_EH; if (MMI->hasDebugInfo()) @@ -926,7 +920,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { } bool AsmPrinter::needsSEHMoves() { - return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry(); + return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { @@ -964,6 +958,30 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { + if (!MF.getTarget().Options.EmitStackSizeSection) + return; + + MCSection *StackSizeSection = getObjFileLowering().getStackSizesSection(); + if (!StackSizeSection) + return; + + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + // Don't emit functions with dynamic stack allocations. + if (FrameInfo.hasVarSizedObjects()) + return; + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(StackSizeSection); + + const MCSymbol *FunctionSymbol = getSymbol(&MF.getFunction()); + uint64_t StackSize = FrameInfo.getStackSize(); + OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getPointerSize()); + OutStreamer->EmitULEB128IntValue(StackSize); + + OutStreamer->PopSection(); +} + static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, MachineModuleInfo *MMI) { if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) @@ -971,10 +989,10 @@ static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, // We might emit an EH table that uses function begin and end labels even if // we don't have any landingpads. - if (!MF.getFunction()->hasPersonalityFn()) + if (!MF.getFunction().hasPersonalityFn()) return false; return !isNoOpWithoutInvoke( - classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + classifyEHPersonality(MF.getFunction().getPersonalityFn())); } /// EmitFunctionBody - This method emits the body and trailer for a @@ -1010,8 +1028,10 @@ void AsmPrinter::EmitFunctionBody() { } } - if (isVerbose()) - emitComments(MI, OutStreamer->GetCommentOS(), this); + if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) { + MachineInstr *MIP = const_cast(&MI); + MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment); + } switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1059,7 +1079,7 @@ void AsmPrinter::EmitFunctionBody() { EmittedInsts += NumInstsInFunction; MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount", - MF->getFunction()->getSubprogram(), + MF->getFunction().getSubprogram(), &MF->front()); R << ore::NV("NumInstructions", NumInstsInFunction) << " instructions in function"; @@ -1087,8 +1107,8 @@ void AsmPrinter::EmitFunctionBody() { } } - const Function *F = MF->getFunction(); - for (const auto &BB : *F) { + const Function &F = MF->getFunction(); + for (const auto &BB : F) { if (!BB.hasAddressTaken()) continue; MCSymbol *Sym = GetBlockAddressSymbol(&BB); @@ -1135,6 +1155,9 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->endFunction(MF); } + // Emit section containing stack size metadata. + emitStackSizeSection(*MF); + if (isVerbose()) OutStreamer->GetCommentOS() << "-- End function\n"; @@ -1407,6 +1430,52 @@ bool AsmPrinter::doFinalization(Module &M) { if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer->SwitchSection(S); + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + // Emit /EXPORT: flags for each exported global as necessary. + const auto &TLOF = getObjFileLowering(); + std::string Flags; + + for (const GlobalValue &GV : M.global_values()) { + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForGlobal(OS, &GV); + OS.flush(); + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + + // Emit /INCLUDE: flags for each used global as necessary. + if (const auto *LU = M.getNamedGlobal("llvm.used")) { + assert(LU->hasInitializer() && + "expected llvm.used to have an initializer"); + assert(isa(LU->getValueType()) && + "expected llvm.used to be an array type"); + if (const auto *A = cast(LU->getInitializer())) { + for (const Value *Op : A->operands()) { + const auto *GV = + cast(Op->stripPointerCastsNoFollowAliases()); + // Global symbols with internal linkage are not visible to the linker, + // and thus would cause an error when the linker tried to preserve the + // symbol due to the `/include:` directive. + if (GV->hasInternalLinkage()) + continue; + + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForUsed(OS, GV); + OS.flush(); + + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + } + } + } + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); @@ -1428,7 +1497,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(&MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; CurrentFnBegin = nullptr; CurExceptionSym = nullptr; @@ -1554,14 +1623,14 @@ void AsmPrinter::EmitJumpTableInfo() { // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); const TargetLoweringObjectFile &TLOF = getObjFileLowering(); bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, - *F); + F); if (JTInDiffSection) { // Drop it in the readonly section. - MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM); + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); OutStreamer->SwitchSection(ReadOnlySection); } @@ -1935,7 +2004,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction().getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { @@ -2019,6 +2088,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { } } // else fallthrough + LLVM_FALLTHROUGH; // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. @@ -2618,7 +2688,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, assert(MF != nullptr && "Machine function must be valid"); assert(LI != nullptr && "Loop info must be valid"); Context.IsPaddingActive = !MF->hasInlineAsm() && - !MF->getFunction()->optForSize() && + !MF->getFunction().optForSize() && TM.getOptLevel() != CodeGenOpt::None; const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB); Context.IsBasicBlockInsideInnermostLoop = @@ -2682,7 +2752,8 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); + OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", + false); } } else { OutStreamer->EmitLabel(MBB.getSymbol()); @@ -2815,7 +2886,7 @@ void AsmPrinter::emitXRayTable() { return; auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); + const Function &F = MF->getFunction(); MCSection *InstMap = nullptr; MCSection *FnSledIndex = nullptr; if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { @@ -2823,9 +2894,9 @@ void AsmPrinter::emitXRayTable() { assert(Associated != nullptr); auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; std::string GroupName; - if (Fn->hasComdat()) { + if (F.hasComdat()) { Flags |= ELF::SHF_GROUP; - GroupName = Fn->getComdat()->getName(); + GroupName = F.getComdat()->getName(); } auto UniqueID = ++XRayFnUniqueID; @@ -2871,15 +2942,15 @@ void AsmPrinter::emitXRayTable() { void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind, uint8_t Version) { - auto Fn = MI.getMF()->getFunction(); - auto Attr = Fn->getFnAttribute("function-instrument"); - bool LogArgs = Fn->hasFnAttribute("xray-log-args"); + const Function &F = MI.getMF()->getFunction(); + auto Attr = F.getFnAttribute("function-instrument"); + bool LogArgs = F.hasFnAttribute("xray-log-args"); bool AlwaysInstrument = Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; if (Kind == SledKind::FUNCTION_ENTER && LogArgs) Kind = SledKind::LOG_ARGS_ENTER; Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind, - AlwaysInstrument, Fn, Version}); + AlwaysInstrument, &F, Version}); } uint16_t AsmPrinter::getDwarfVersion() const { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index c9ec161f47f0..e6e8871361b7 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,16 +12,12 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" -#include "DwarfDebug.h" -#include "DwarfExpression.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -171,14 +167,15 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); } -void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { if (MAI->doesDwarfUseRelocationsAcrossSections()) { - emitDwarfSymbolReference(S.getSymbol()); + assert(S.Symbol && "No symbol available"); + emitDwarfSymbolReference(S.Symbol); return; } // Just emit the offset directly; no need for symbol math. - EmitInt32(S.getOffset()); + EmitInt32(S.Offset); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 580830d39f2e..04a72ba3d738 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -514,7 +514,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Reset SanitizeAddress based on the function's attribute. MCTargetOptions MCOptions = TM.Options.MCOptions; MCOptions.SanitizeAddress = - MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress); + MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress); EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, MI->getInlineAsmDialect()); diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index aaf6180c9404..34ad66f8a391 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -93,15 +93,27 @@ class BufferByteStreamer final : public ByteStreamer { } void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { raw_svector_ostream OSE(Buffer); - encodeSLEB128(DWord, OSE); - if (GenerateComments) + unsigned Length = encodeSLEB128(DWord, OSE); + if (GenerateComments) { Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } } void EmitULEB128(uint64_t DWord, const Twine &Comment) override { raw_svector_ostream OSE(Buffer); - encodeULEB128(DWord, OSE); - if (GenerateComments) + unsigned Length = encodeULEB128(DWord, OSE); + if (GenerateComments) { Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } } }; diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 05c6a28bbcac..f21810f0b053 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_library(LLVMAsmPrinter EHStreamer.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp + WinCFGuard.cpp WinException.cpp CodeViewDebug.cpp diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 22fcdacb3263..11c28ba6d9ad 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -42,6 +42,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -66,10 +67,12 @@ #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Target/TargetMachine.h" @@ -87,6 +90,9 @@ using namespace llvm; using namespace llvm::codeview; +static cl::opt EmitDebugGlobalHashes("emit-codeview-ghash-section", + cl::ReallyHidden, cl::init(false)); + CodeViewDebug::CodeViewDebug(AsmPrinter *AP) : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { // If module doesn't have named metadata anchors or COFF debug section @@ -278,7 +284,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { // Build the fully qualified name of the scope. std::string ScopeName = getFullyQualifiedName(Scope); StringIdRecord SID(TypeIndex(), ScopeName); - auto TI = TypeTable.writeKnownType(SID); + auto TI = TypeTable.writeLeafType(SID); return recordTypeIndexForDINode(Scope, TI); } @@ -303,12 +309,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { TypeIndex ClassType = getTypeIndex(Class); MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class), DisplayName); - TI = TypeTable.writeKnownType(MFuncId); + TI = TypeTable.writeLeafType(MFuncId); } else { // Otherwise, this must be a free function. TypeIndex ParentScope = getScopeIndex(Scope); FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName); - TI = TypeTable.writeKnownType(FuncId); + TI = TypeTable.writeLeafType(FuncId); } return recordTypeIndexForDINode(SP, TI); @@ -485,10 +491,13 @@ void CodeViewDebug::endModule() { OS.AddComment("String table"); OS.EmitCVStringTableDirective(); - // Emit type information last, so that any types we translate while emitting - // function info are included. + // Emit type information and hashes last, so that any types we translate while + // emitting function info are included. emitTypeInformation(); + if (EmitDebugGlobalHashes) + emitTypeGlobalHashes(); + clear(); } @@ -505,11 +514,6 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { } void CodeViewDebug::emitTypeInformation() { - // Do nothing if we have no debug info or if no non-trivial types were emitted - // to TypeTable during codegen. - NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) - return; if (TypeTable.empty()) return; @@ -554,6 +558,40 @@ void CodeViewDebug::emitTypeInformation() { } } +void CodeViewDebug::emitTypeGlobalHashes() { + if (TypeTable.empty()) + return; + + // Start the .debug$H section with the version and hash algorithm, currently + // hardcoded to version 0, SHA1. + OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); + + OS.EmitValueToAlignment(4); + OS.AddComment("Magic"); + OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4); + OS.AddComment("Section Version"); + OS.EmitIntValue(0, 2); + OS.AddComment("Hash Algorithm"); + OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1), 2); + + TypeIndex TI(TypeIndex::FirstNonSimpleIndex); + for (const auto &GHR : TypeTable.hashes()) { + if (OS.isVerboseAsm()) { + // Emit an EOL-comment describing which TypeIndex this hash corresponds + // to, as well as the stringified SHA1 hash. + SmallString<32> Comment; + raw_svector_ostream CommentOS(Comment); + CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR); + OS.AddComment(Comment); + ++TI; + } + assert(GHR.Hash.size() % 20 == 0); + StringRef S(reinterpret_cast(GHR.Hash.data()), + GHR.Hash.size()); + OS.EmitBinaryData(S); + } +} + static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { switch (DWLang) { case dwarf::DW_LANG_C: @@ -1116,9 +1154,9 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { } void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == false); - CurFn = &FnDebugInfo[GV]; + const Function &GV = MF->getFunction(); + assert(FnDebugInfo.count(&GV) == false); + CurFn = &FnDebugInfo[&GV]; CurFn->FuncId = NextFuncId++; CurFn->Begin = Asm->getFunctionBegin(); @@ -1243,6 +1281,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypeClass(cast(Ty)); case dwarf::DW_TAG_union_type: return lowerTypeUnion(cast(Ty)); + case dwarf::DW_TAG_unspecified_type: + return TypeIndex::None(); default: // Use the null type index. return TypeIndex(); @@ -1304,7 +1344,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { StringRef Name = (i == 0) ? Ty->getName() : ""; ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name); - ElementTypeIndex = TypeTable.writeKnownType(AR); + ElementTypeIndex = TypeTable.writeLeafType(AR); } return ElementTypeIndex; @@ -1437,7 +1477,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { // do. PointerOptions PO = PointerOptions::None; PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); - return TypeTable.writeKnownType(PR); + return TypeTable.writeLeafType(PR); } static PointerToMemberRepresentation @@ -1488,7 +1528,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { MemberPointerInfo MPI( ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags())); PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI); - return TypeTable.writeKnownType(PR); + return TypeTable.writeLeafType(PR); } /// Given a DWARF calling convention, get the CodeView equivalent. If we don't @@ -1527,7 +1567,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { } TypeIndex ModifiedTI = getTypeIndex(BaseTy); ModifierRecord MR(ModifiedTI, Mods); - return TypeTable.writeKnownType(MR); + return TypeTable.writeLeafType(MR); } TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { @@ -1535,6 +1575,11 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { for (DITypeRef ArgTypeRef : Ty->getTypeArray()) ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + // MSVC uses type none for variadic argument. + if (ReturnAndArgTypeIndices.size() > 1 && + ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { + ReturnAndArgTypeIndices.back() = TypeIndex::None(); + } TypeIndex ReturnTypeIndex = TypeIndex::Void(); ArrayRef ArgTypeIndices = None; if (!ReturnAndArgTypeIndices.empty()) { @@ -1544,13 +1589,13 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None, ArgTypeIndices.size(), ArgListIndex); - return TypeTable.writeKnownType(Procedure); + return TypeTable.writeLeafType(Procedure); } TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, @@ -1564,6 +1609,11 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, for (DITypeRef ArgTypeRef : Ty->getTypeArray()) ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + // MSVC uses type none for variadic argument. + if (ReturnAndArgTypeIndices.size() > 1 && + ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { + ReturnAndArgTypeIndices.back() = TypeIndex::None(); + } TypeIndex ReturnTypeIndex = TypeIndex::Void(); ArrayRef ArgTypeIndices = None; if (!ReturnAndArgTypeIndices.empty()) { @@ -1578,7 +1628,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, } ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); - TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); @@ -1586,9 +1636,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None, ArgTypeIndices.size(), ArgListIndex, ThisAdjustment); - TypeIndex TI = TypeTable.writeKnownType(MFR); - - return TI; + return TypeTable.writeLeafType(MFR); } TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { @@ -1597,7 +1645,7 @@ TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { SmallVector Slots(VSlotCount, VFTableSlotKind::Near); VFTableShapeRecord VFTSR(Slots); - return TypeTable.writeKnownType(VFTSR); + return TypeTable.writeLeafType(VFTSR); } static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) { @@ -1688,9 +1736,8 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { if (Ty->isForwardDecl()) { CO |= ClassOptions::ForwardReference; } else { - FieldListRecordBuilder FLRB(TypeTable); - - FLRB.begin(); + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); for (const DINode *Element : Ty->getElements()) { // We assume that the frontend provides all members in source declaration // order, which is what MSVC does. @@ -1698,18 +1745,18 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumeratorRecord ER(MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()), Enumerator->getName()); - FLRB.writeMemberType(ER); + ContinuationBuilder.writeMemberType(ER); EnumeratorCount++; } } - FTI = FLRB.end(true); + FTI = TypeTable.insertRecord(ContinuationBuilder); } std::string FullName = getFullyQualifiedName(Ty); EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(), getTypeIndex(Ty->getBaseType())); - return TypeTable.writeKnownType(ER); + return TypeTable.writeLeafType(ER); } //===----------------------------------------------------------------------===// @@ -1812,7 +1859,7 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { std::string FullName = getFullyQualifiedName(Ty); ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()); - TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1838,13 +1885,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()); - TypeIndex ClassTI = TypeTable.writeKnownType(CR); + TypeIndex ClassTI = TypeTable.writeLeafType(CR); if (const auto *File = Ty->getFile()) { StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); - TypeIndex SIDI = TypeTable.writeKnownType(SIDR); + TypeIndex SIDI = TypeTable.writeLeafType(SIDR); + UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + TypeTable.writeLeafType(USLR); } addToUDTs(Ty); @@ -1857,7 +1905,7 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { ClassOptions::ForwardReference | getCommonClassOptions(Ty); std::string FullName = getFullyQualifiedName(Ty); UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier()); - TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR); if (!Ty->isForwardDecl()) DeferredCompleteTypes.push_back(Ty); return FwdDeclTI; @@ -1879,12 +1927,13 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName, Ty->getIdentifier()); - TypeIndex UnionTI = TypeTable.writeKnownType(UR); + TypeIndex UnionTI = TypeTable.writeLeafType(UR); StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); - TypeIndex SIRI = TypeTable.writeKnownType(SIR); + TypeIndex SIRI = TypeTable.writeLeafType(SIR); + UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + TypeTable.writeLeafType(USLR); addToUDTs(Ty); @@ -1899,8 +1948,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { // list record. unsigned MemberCount = 0; ClassInfo Info = collectClassInfo(Ty); - FieldListRecordBuilder FLBR(TypeTable); - FLBR.begin(); + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); // Create base classes. for (const DIDerivedType *I : Info.Inheritance) { @@ -1918,14 +1967,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset, VBTableIndex); - FLBR.writeMemberType(VBCR); + ContinuationBuilder.writeMemberType(VBCR); } else { assert(I->getOffsetInBits() % 8 == 0 && "bases must be on byte boundaries"); BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()), getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8); - FLBR.writeMemberType(BCR); + ContinuationBuilder.writeMemberType(BCR); } } @@ -1939,7 +1988,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { if (Member->isStaticMember()) { StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName); - FLBR.writeMemberType(SDMR); + ContinuationBuilder.writeMemberType(SDMR); MemberCount++; continue; } @@ -1948,7 +1997,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { if ((Member->getFlags() & DINode::FlagArtificial) && Member->getName().startswith("_vptr$")) { VFPtrRecord VFPR(getTypeIndex(Member->getBaseType())); - FLBR.writeMemberType(VFPR); + ContinuationBuilder.writeMemberType(VFPR); MemberCount++; continue; } @@ -1965,12 +2014,12 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { StartBitOffset -= MemberOffsetInBits; BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(), StartBitOffset); - MemberBaseType = TypeTable.writeKnownType(BFR); + MemberBaseType = TypeTable.writeLeafType(BFR); } uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8; DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes, MemberName); - FLBR.writeMemberType(DMR); + ContinuationBuilder.writeMemberType(DMR); MemberCount++; } @@ -1995,23 +2044,26 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { } assert(!Methods.empty() && "Empty methods map entry"); if (Methods.size() == 1) - FLBR.writeMemberType(Methods[0]); + ContinuationBuilder.writeMemberType(Methods[0]); else { + // FIXME: Make this use its own ContinuationBuilder so that + // MethodOverloadList can be split correctly. MethodOverloadListRecord MOLR(Methods); - TypeIndex MethodList = TypeTable.writeKnownType(MOLR); + TypeIndex MethodList = TypeTable.writeLeafType(MOLR); + OverloadedMethodRecord OMR(Methods.size(), MethodList, Name); - FLBR.writeMemberType(OMR); + ContinuationBuilder.writeMemberType(OMR); } } // Create nested classes. for (const DIType *Nested : Info.NestedTypes) { NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName()); - FLBR.writeMemberType(R); + ContinuationBuilder.writeMemberType(R); MemberCount++; } - TypeIndex FieldTI = FLBR.end(true); + TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder); return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, !Info.NestedTypes.empty()); } @@ -2020,15 +2072,14 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() { if (!VBPType.getIndex()) { // Make a 'const int *' type. ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const); - TypeIndex ModifiedTI = TypeTable.writeKnownType(MR); + TypeIndex ModifiedTI = TypeTable.writeLeafType(MR); PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 : PointerKind::Near32; PointerMode PM = PointerMode::Pointer; PointerOptions PO = PointerOptions::None; PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes()); - - VBPType = TypeTable.writeKnownType(PR); + VBPType = TypeTable.writeLeafType(PR); } return VBPType; @@ -2061,7 +2112,7 @@ TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) { : PointerKind::Near32, PointerMode::LValueReference, PointerOptions::None, Ty->getSizeInBits() / 8); - return TypeTable.writeKnownType(PR); + return TypeTable.writeLeafType(PR); } TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { @@ -2234,15 +2285,15 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { } void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV)); - assert(CurFn == &FnDebugInfo[GV]); + const Function &GV = MF->getFunction(); + assert(FnDebugInfo.count(&GV)); + assert(CurFn == &FnDebugInfo[&GV]); - collectVariableInfo(GV->getSubprogram()); + collectVariableInfo(GV.getSubprogram()); // Don't emit anything if we don't have any line tables. if (!CurFn->HaveLineInfo) { - FnDebugInfo.erase(GV); + FnDebugInfo.erase(&GV); CurFn = nullptr; return; } diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 9eff4a7fd814..69e93640d7ef 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -23,8 +23,8 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" @@ -52,7 +52,7 @@ class MachineFunction; class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; BumpPtrAllocator Allocator; - codeview::TypeTableBuilder TypeTable; + codeview::GlobalTypeTableBuilder TypeTable; /// Represents the most general definition range. struct LocalVarDefRange { @@ -219,6 +219,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeInformation(); + void emitTypeGlobalHashes(); + void emitCompilerInformation(); void emitInlineeLinesSubsection(); diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 886e6e264b3e..b3148db30cd6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -777,6 +777,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + case dwarf::DW_FORM_data16: break; } for (const auto &V : values()) @@ -791,6 +792,7 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); + case dwarf::DW_FORM_data16: return 16; default: llvm_unreachable("Improper form for block"); } } diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 68354571423d..2e5c22447936 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -163,7 +163,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { DIType *BaseType = DDTy->getBaseType().resolve(); - assert(BaseType && "Unexpected invalid base type"); + if (!BaseType) + return 0; // If this is a derived type, go ahead and get the base type, unless it's a // reference then it's just the size of the field. Pointer types have no need @@ -179,7 +180,7 @@ static bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return false; - auto *SP = MF->getFunction()->getSubprogram(); + auto *SP = MF->getFunction().getSubprogram(); if (!SP) return false; assert(SP->getUnit()); @@ -223,7 +224,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) { // label, so arguments are visible when breaking at function entry. const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); if (DIVar->isParameter() && - getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { + getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); if (Ranges.front().first->getDebugExpression()->isFragment()) { // Mark all non-overlapping initial fragments. diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 53250b9ee15a..cbb4c48b4d88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -12,16 +12,12 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" @@ -34,7 +30,6 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -92,7 +87,7 @@ static MCSymbol *getExceptionSym(AsmPrinter *Asm) { void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); // If any landing pads survive, we need an EH table. bool hasLandingPads = !MF->getLandingPads().empty(); @@ -105,17 +100,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); const Function *Per = nullptr; - if (F->hasPersonalityFn()) - Per = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); // Emit a personality function even when there are no landing pads forceEmitPersonality = // ...if a personality function is explicitly specified - F->hasPersonalityFn() && + F.hasPersonalityFn() && // ... and it's not known to be a noop in the absence of invokes !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && // ... and we're not explicitly asked not to emit it - F->needsUnwindTableEntry(); + F.needsUnwindTableEntry(); shouldEmitPersonality = (forceEmitPersonality || @@ -148,8 +143,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, if (!shouldEmitPersonality) return; - auto *F = MBB->getParent()->getFunction(); - auto *P = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + auto &F = MBB->getParent()->getFunction(); + auto *P = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); assert(P && "Expected personality function"); // If we are forced to emit this personality, make sure to record diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index c8cd8eb8ffd3..e77e8d9b354c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -94,16 +94,17 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, DIEInteger(0)); } -unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, - StringRef DirName) { +unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { // If we print assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. // FIXME: add a better feature test than hasRawTextSupport. Even better, // extend .file to support this. + unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); + if (!File) + return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, CUID); return Asm->OutStreamer->EmitDwarfFileDirective( - 0, DirName, FileName, - Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); + 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), CUID); } DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( @@ -443,7 +444,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { // Add the call site information to the DIE. const DILocation *IA = Scope->getInlinedAt(); addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); + getOrCreateSourceID(IA->getFile())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, @@ -687,9 +688,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( else EntityDie = getDIE(Entity); assert(EntityDie); - auto *File = Module->getFile(); - addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "", - File ? File->getDirectory() : ""); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module->getName(); if (!Name.empty()) diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 68482eb7e358..3325b1a345e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -141,7 +141,7 @@ class DwarfCompileUnit final : public DwarfUnit { DwarfCompileUnit &getCU() override { return *this; } - unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + unsigned getOrCreateSourceID(const DIFile *File) override; void addImportedEntity(const DIImportedEntity* IE) { DIScope *Scope = IE->getScope(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index cd8f61dbe229..1e098ccfbc9d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -288,9 +288,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else DebuggerTuning = DebuggerKind::GDB; - // Turn on accelerator tables for LLDB by default. + // Turn on accelerator tables by default, if tuning for LLDB and the target is + // supported. if (DwarfAccelTables == Default) - HasDwarfAccelTables = tuneForLLDB(); + HasDwarfAccelTables = + tuneForLLDB() && A->TM.getTargetTriple().isOSBinFormatMachO(); else HasDwarfAccelTables = DwarfAccelTables == Enable; @@ -1163,7 +1165,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); - const auto *SP = MI->getMF()->getFunction()->getSubprogram(); + const auto *SP = MI->getMF()->getFunction().getSubprogram(); if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; @@ -1261,7 +1263,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn = MF; - auto *SP = MF->getFunction()->getSubprogram(); + auto *SP = MF->getFunction().getSubprogram(); assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; @@ -1297,7 +1299,7 @@ void DwarfDebug::skippedNonDebugFunction() { // Gather and emit post-function debug information. void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { - const DISubprogram *SP = MF->getFunction()->getSubprogram(); + const DISubprogram *SP = MF->getFunction().getSubprogram(); assert(CurFn == MF && "endFunction should be called with the same function as beginFunction"); @@ -1366,19 +1368,17 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, unsigned Flags) { StringRef Fn; - StringRef Dir; unsigned Src = 1; unsigned Discriminator = 0; if (auto *Scope = cast_or_null(S)) { Fn = Scope->getFilename(); - Dir = Scope->getDirectory(); if (Line != 0 && getDwarfVersion() >= 4) if (auto *LBF = dyn_cast(Scope)) Discriminator = LBF->getDiscriminator(); unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); Src = static_cast(*InfoHolder.getUnits()[CUID]) - .getOrCreateSourceID(Fn, Dir); + .getOrCreateSourceID(Scope->getFile()); } Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, Discriminator, Fn); @@ -1973,10 +1973,7 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); Asm->EmitULEB128(F.getLine()); - DIFile *File = F.getFile(); - unsigned FID = - U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); - Asm->EmitULEB128(FID); + Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile())); handleMacroNodes(F.getElements(), U); Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 911e46235781..61868a3bc065 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" @@ -30,6 +31,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" @@ -263,9 +265,25 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); } -unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { - return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) - : getCU().getOrCreateSourceID(FileName, DirName); +MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) { + assert(File); + if (File->getChecksumKind() != DIFile::CSK_MD5) + return nullptr; + + // Convert the string checksum to an MD5Result for the streamer. + // The verifier validates the checksum so we assume it's okay. + // An MD5 checksum is 16 bytes. + std::string Checksum = fromHex(File->getChecksum()); + void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1); + memcpy(CKMem, Checksum.data(), 16); + return reinterpret_cast(CKMem); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { + return SplitLineTable + ? SplitLineTable->getFile(File->getDirectory(), + File->getFilename(), getMD5AsBytes(File)) + : getCU().getOrCreateSourceID(File); } void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { @@ -335,12 +353,11 @@ void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block); } -void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, - StringRef Directory) { +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { if (Line == 0) return; - unsigned FileID = getOrCreateSourceID(File, Directory); + unsigned FileID = getOrCreateSourceID(File); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); addUInt(Die, dwarf::DW_AT_decl_line, None, Line); @@ -349,32 +366,31 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { assert(V); - addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), - V->getScope()->getDirectory()); + addSourceLine(Die, V->getLine(), V->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) { assert(G); - addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); + addSourceLine(Die, G->getLine(), G->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { assert(SP); - addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); + addSourceLine(Die, SP->getLine(), SP->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { assert(Ty); - addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFile()); } void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { assert(Ty); - addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); + addSourceLine(Die, Ty->getLine(), Ty->getFile()); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -975,6 +991,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy->getTemplateParams()); + // Add the type's non-standard calling convention. + uint8_t CC = 0; + if (CTy->isTypePassByValue()) + CC = dwarf::DW_CC_pass_by_value; + else if (CTy->isTypePassByReference()) + CC = dwarf::DW_CC_pass_by_reference; + if (CC) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CC); break; } default: @@ -1152,9 +1177,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, // Look at the Decl's linkage name only if we emitted it. if (DD->useAllLinkageNames()) DeclLinkageName = SPDecl->getLinkageName(); - unsigned DeclID = - getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); - unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); + unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); + unsigned DefID = getOrCreateSourceID(SP->getFile()); if (DeclID != DefID) addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); @@ -1391,7 +1415,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, resolve(DT->getBaseType())); + if (DIType *Resolved = resolve(DT->getBaseType())) + addType(MemberDie, Resolved); addSourceLine(MemberDie, DT); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 4cc01b3298d4..6546a0c72d11 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -207,8 +207,7 @@ class DwarfUnit : public DIEUnit { void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); /// Add location information to specified debug information entry. - void addSourceLine(DIE &Die, unsigned Line, StringRef File, - StringRef Directory); + void addSourceLine(DIE &Die, unsigned Line, const DIFile *File); void addSourceLine(DIE &Die, const DILocalVariable *V); void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); @@ -306,9 +305,13 @@ class DwarfUnit : public DIEUnit { /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); - /// Look up the source ID with the given directory and source file names. If - /// none currently exists, create a new ID and insert it in the line table. - virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; + /// Look up the source ID for the given file. If none currently exists, + /// create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(const DIFile *File) = 0; + + /// If the \p File has an MD5 checksum, return it as an MD5Result + /// allocated in the MCContext. + MD5::MD5Result *getMD5AsBytes(const DIFile *File); /// Look in the DwarfDebug map for the MDNode that corresponds to the /// reference. @@ -358,7 +361,7 @@ class DwarfTypeUnit final : public DwarfUnit { DwarfCompileUnit &CU; MCDwarfDwoLineTable *SplitLineTable; - unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; + unsigned getOrCreateSourceID(const DIFile *File) override; bool isDwoUnit() const override; public: diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 3cdab57bca70..871699afcb6c 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -149,7 +149,7 @@ computeActionsTable(const SmallVectorImpl &LandingPads, FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; } // else identical - re-use previous FirstAction - // Information used when created the call-site table. The action record + // Information used when creating the call-site table. The action record // field of the call site record is the offset of the first associated // action record, relative to the start of the actions table. This value is // biased by 1 (1 indicating the start of the actions table), and 0 diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp new file mode 100644 index 000000000000..18d37caf57ee --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -0,0 +1,45 @@ +//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinCFGuard.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" + +#include + +using namespace llvm; + +WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} + +WinCFGuard::~WinCFGuard() {} + +void WinCFGuard::endModule() { + const Module *M = Asm->MMI->getModule(); + std::vector Functions; + for (const Function &F : *M) + if (F.hasAddressTaken()) + Functions.push_back(&F); + if (Functions.empty()) + return; + auto &OS = *Asm->OutStreamer; + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); + for (const Function *F : Functions) + OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); +} diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h new file mode 100644 index 000000000000..553b4ae261c7 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -0,0 +1,54 @@ +//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H + +#include "AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { + /// Target of directive emission. + AsmPrinter *Asm; + +public: + WinCFGuard(AsmPrinter *A); + ~WinCFGuard() override; + + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + + /// \brief Emit the Control Flow Guard function ID table + void endModule() override; + + /// \brief Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + void beginFunction(const MachineFunction *MF) override {} + + /// \brief Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + void endFunction(const MachineFunction *MF) override {} + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override {} + + /// \brief Process end of an instruction. + void endInstruction() override {} +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 3f26bcaf71aa..a6a8e84a949f 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "WinException.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -23,7 +22,6 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" @@ -35,7 +33,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCWin64EH.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" @@ -66,7 +63,7 @@ void WinException::beginFunction(const MachineFunction *MF) { bool hasLandingPads = !MF->getLandingPads().empty(); bool hasEHFunclets = MF->hasEHFunclets(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI(); @@ -75,14 +72,14 @@ void WinException::beginFunction(const MachineFunction *MF) { EHPersonality Per = EHPersonality::Unknown; const Function *PerFn = nullptr; - if (F->hasPersonalityFn()) { - PerFn = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) { + PerFn = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); Per = classifyEHPersonality(PerFn); } - bool forceEmitPersonality = F->hasPersonalityFn() && + bool forceEmitPersonality = F.hasPersonalityFn() && !isNoOpWithoutInvoke(Per) && - F->needsUnwindTableEntry(); + F.needsUnwindTableEntry(); shouldEmitPersonality = forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && @@ -101,7 +98,7 @@ void WinException::beginFunction(const MachineFunction *MF) { // functions may still refer to it. const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); } shouldEmitLSDA = hasEHFunclets; @@ -118,10 +115,10 @@ void WinException::endFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) return; - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; - if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); // Get rid of any dead landing pads if we're not using funclets. In funclet // schemes, the landing pad is not actually reachable. It only exists so @@ -173,8 +170,8 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, // Give catches and cleanups a name based off of their parent function and // their funclet entry block's number. const MachineFunction *MF = MBB->getParent(); - const Function *F = MF->getFunction(); - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + const Function &F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCContext &Ctx = MF->getContext(); StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + @@ -186,7 +183,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) { CurrentFuncletEntry = &MBB; - const Function *F = Asm->MF->getFunction(); + const Function &F = Asm->MF->getFunction(); // If a symbol was not provided for the funclet, invent one. if (!Sym) { Sym = getMCSymbolForMBB(Asm, &MBB); @@ -201,7 +198,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // We want our funclet's entry point to be aligned such that no nops will be // present after the label. Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), - F); + &F); // Now that we've emitted the alignment directive, point at our funclet. Asm->OutStreamer->EmitLabel(Sym); @@ -218,8 +215,8 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, const Function *PerFn = nullptr; // Determine which personality routine we are using for this funclet. - if (F->hasPersonalityFn()) - PerFn = dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + PerFn = dyn_cast(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI); @@ -240,10 +237,10 @@ void WinException::endFunclet() { const MachineFunction *MF = Asm->MF; if (shouldEmitMoves || shouldEmitPersonality) { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); EHPersonality Per = EHPersonality::Unknown; - if (F->hasPersonalityFn()) - Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts()); + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); @@ -252,7 +249,7 @@ void WinException::endFunclet() { !CurrentFuncletEntry->isCleanupFuncletEntry()) { // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); @@ -536,7 +533,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { // Emit a label assignment with the SEH frame offset so we can use it for // llvm.x86.seh.recoverfp. StringRef FLinkageName = - GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = @@ -631,11 +628,11 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, } void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); auto &OS = *Asm->OutStreamer; const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); SmallVector, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; @@ -941,8 +938,8 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, /// indexed by state number instead of IP. void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; - const Function *F = MF->getFunction(); - StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); + const Function &F = MF->getFunction(); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { @@ -959,7 +956,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { OS.EmitLabel(LSDALabel); const Function *Per = - dyn_cast(F->getPersonalityFn()->stripPointerCasts()); + dyn_cast(F.getPersonalityFn()->stripPointerCasts()); StringRef PerName = Per->getName(); int BaseState = -1; if (PerName == "_except_handler4") { diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index d31260e767fb..7f358a679366 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -118,7 +118,7 @@ INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TargetPassConfig *PassConfig = &getAnalysis(); @@ -613,8 +613,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); if (CommonTailLen == 0) return false; - DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() - << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + DEBUG(dbgs() << "Common tail length of " << printMBBReference(*MBB1) + << " and " << printMBBReference(*MBB2) << " is " << CommonTailLen << '\n'); // It's almost always profitable to merge any number of non-terminator @@ -685,7 +685,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); - return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && + return EffectiveTailLen >= 2 && MF->getFunction().optForSize() && (I1 == MBB1->begin() || I2 == MBB2->begin()); } @@ -770,7 +770,7 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, SameTails[commonTailIndex].getTailStartPos(); MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + DEBUG(dbgs() << "\nSplitting " << printMBBReference(*MBB) << ", size " << maxCommonTailLength); // If the split block unconditionally falls-thru to SuccBB, it will be @@ -920,20 +920,17 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, bool MadeChange = false; DEBUG(dbgs() << "\nTryTailMergeBlocks: "; - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() - << (i == e-1 ? "" : ", "); - dbgs() << "\n"; - if (SuccBB) { - dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) dbgs() + << printMBBReference(*MergePotentials[i].getBlock()) + << (i == e - 1 ? "" : ", "); + dbgs() << "\n"; if (SuccBB) { + dbgs() << " with successor " << printMBBReference(*SuccBB) << '\n'; if (PredBB) - dbgs() << " which has fall-through from BB#" - << PredBB->getNumber() << "\n"; - } - dbgs() << "Looking for common tails of at least " - << MinCommonTailLength << " instruction" - << (MinCommonTailLength == 1 ? "" : "s") << '\n'; - ); + dbgs() << " which has fall-through from " + << printMBBReference(*PredBB) << "\n"; + } dbgs() << "Looking for common tails of at least " + << MinCommonTailLength << " instruction" + << (MinCommonTailLength == 1 ? "" : "s") << '\n';); // Sort by hash value so that blocks with identical end sequences sort // together. @@ -1013,13 +1010,13 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() + DEBUG(dbgs() << "\nUsing common tail in " << printMBBReference(*MBB) << " for "); for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { if (commonTailIndex == i) continue; - DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() - << (i == e-1 ? "" : ", ")); + DEBUG(dbgs() << printMBBReference(*SameTails[i].getBlock()) + << (i == e - 1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. @@ -1514,7 +1511,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && - MF.getFunction()->optForSize()) { + MF.getFunction().optForSize()) { // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch // direction, thereby defeating careful block placement and regressing // performance. Therefore, only consider this for optsize functions. @@ -1971,7 +1968,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { // // BB2: // r1 = op2, ... - // = op3, r1 + // = op3, killed r1 IsSafe = false; break; } diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 99fa4dc67915..970b33fa51a6 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -143,7 +143,7 @@ void BranchRelaxation::verify() { LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() { for (auto &MBB : *MF) { const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) << format("size=%#x\n", BBI.Size); } } @@ -287,13 +287,10 @@ bool BranchRelaxation::isBlockInRange( if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset)) return true; - DEBUG( - dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber() - << " from BB#" << MI.getParent()->getNumber() - << " to " << DestOffset - << " offset " << DestOffset - BrOffset - << '\t' << MI - ); + DEBUG(dbgs() << "Out of range branch to destination " + << printMBBReference(DestBB) << " from " + << printMBBReference(*MI.getParent()) << " to " << DestOffset + << " offset " << DestOffset - BrOffset << '\t' << MI); return false; } @@ -305,8 +302,41 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { DebugLoc DL = MI.getDebugLoc(); MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + MachineBasicBlock *NewBB = nullptr; SmallVector Cond; + auto insertUncondBranch = [&](MachineBasicBlock *MBB, + MachineBasicBlock *DestBB) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int NewBrSize = 0; + TII->insertUnconditionalBranch(*MBB, DestBB, DL, &NewBrSize); + BBSize += NewBrSize; + }; + auto insertBranch = [&](MachineBasicBlock *MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + SmallVectorImpl& Cond) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int NewBrSize = 0; + TII->insertBranch(*MBB, TBB, FBB, Cond, DL, &NewBrSize); + BBSize += NewBrSize; + }; + auto removeBranch = [&](MachineBasicBlock *MBB) { + unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; + int RemovedSize = 0; + TII->removeBranch(*MBB, &RemovedSize); + BBSize -= RemovedSize; + }; + + auto finalizeBlockChanges = [&](MachineBasicBlock *MBB, + MachineBasicBlock *NewBB) { + // Keep the block offsets up to date. + adjustBlockOffsets(*MBB); + + // Need to fix live-in lists if we track liveness. + if (NewBB && TRI->trackLivenessAfterRegAlloc(*MF)) + computeAndAddLiveIns(LiveRegs, *NewBB); + }; + bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond); assert(!Fail && "branches to be relaxed must be analyzable"); (void)Fail; @@ -319,71 +349,88 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // b L1 // L2: - if (FBB && isBlockInRange(MI, *FBB)) { - // Last MI in the BB is an unconditional branch. We can simply invert the - // condition and swap destinations: - // beq L1 - // b L2 - // => - // bne L2 - // b L1 - DEBUG(dbgs() << " Invert condition and swap " - "its destination with " << MBB->back()); - - TII->reverseBranchCondition(Cond); - int OldSize = 0, NewSize = 0; - TII->removeBranch(*MBB, &OldSize); - TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize); - - BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize); - return true; - } else if (FBB) { - // We need to split the basic block here to obtain two long-range - // unconditional branches. - auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock()); - MF->insert(++MBB->getIterator(), &NewBB); - - // Insert an entry into BlockInfo to align it properly with the block - // numbers. - BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo()); - - unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size; - int NewBrSize; - TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize); - NewBBSize += NewBrSize; - - // Update the successor lists according to the transformation to follow. - // Do it here since if there's no split, no update is needed. - MBB->replaceSuccessor(FBB, &NewBB); - NewBB.addSuccessor(FBB); + bool ReversedCond = !TII->reverseBranchCondition(Cond); + if (ReversedCond) { + if (FBB && isBlockInRange(MI, *FBB)) { + // Last MI in the BB is an unconditional branch. We can simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + DEBUG(dbgs() << " Invert condition and swap " + "its destination with " << MBB->back()); + + removeBranch(MBB); + insertBranch(MBB, FBB, TBB, Cond); + finalizeBlockChanges(MBB, nullptr); + return true; + } + if (FBB) { + // We need to split the basic block here to obtain two long-range + // unconditional branches. + NewBB = createNewBlockAfter(*MBB); + + insertUncondBranch(NewBB, FBB); + // Update the succesor lists according to the transformation to follow. + // Do it here since if there's no split, no update is needed. + MBB->replaceSuccessor(FBB, NewBB); + NewBB->addSuccessor(FBB); + } - // Need to fix live-in lists if we track liveness. - if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeAndAddLiveIns(LiveRegs, NewBB); + // We now have an appropriate fall-through block in place (either naturally or + // just created), so we can use the inverted the condition. + MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to " << printMBBReference(*TBB) + << ", invert condition and change dest. to " + << printMBBReference(NextBB) << '\n'); + + removeBranch(MBB); + // Insert a new conditional branch and a new unconditional branch. + insertBranch(MBB, &NextBB, TBB, Cond); + + finalizeBlockChanges(MBB, NewBB); + return true; } + // Branch cond can't be inverted. + // In this case we always add a block after the MBB. + DEBUG(dbgs() << " The branch condition can't be inverted. " + << " Insert a new BB after " << MBB->back()); - // We now have an appropriate fall-through block in place (either naturally or - // just created), so we can invert the condition. - MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB)); + if (!FBB) + FBB = &(*std::next(MachineFunction::iterator(MBB))); - DEBUG(dbgs() << " Insert B to BB#" << TBB->getNumber() - << ", invert condition and change dest. to BB#" - << NextBB.getNumber() << '\n'); + // This is the block with cond. branch and the distance to TBB is too long. + // beq L1 + // L2: - unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size; + // We do the following transformation: + // beq NewBB + // b L2 + // NewBB: + // b L1 + // L2: - // Insert a new conditional branch and a new unconditional branch. - int RemovedSize = 0; - TII->reverseBranchCondition(Cond); - TII->removeBranch(*MBB, &RemovedSize); - MBBSize -= RemovedSize; + NewBB = createNewBlockAfter(*MBB); + insertUncondBranch(NewBB, TBB); - int AddedSize = 0; - TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize); - MBBSize += AddedSize; + DEBUG(dbgs() << " Insert cond B to the new BB " << printMBBReference(*NewBB) + << " Keep the exiting condition.\n" + << " Insert B to " << printMBBReference(*FBB) << ".\n" + << " In the new BB: Insert B to " + << printMBBReference(*TBB) << ".\n"); - // Finally, keep the block offsets up to date. - adjustBlockOffsets(*MBB); + // Update the successor lists according to the transformation to follow. + MBB->replaceSuccessor(TBB, NewBB); + NewBB->addSuccessor(TBB); + + // Replace branch in the current (MBB) block. + removeBranch(MBB); + insertBranch(MBB, NewBB, FBB, Cond); + + finalizeBlockChanges(MBB, NewBB); return true; } diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp new file mode 100644 index 000000000000..5e60b7ae32fd --- /dev/null +++ b/lib/CodeGen/BreakFalseDeps.cpp @@ -0,0 +1,271 @@ +//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Break False Dependency pass. +/// +/// Some instructions have false dependencies which cause unnecessary stalls. +/// For exmaple, instructions that only write part of a register, and implicitly +/// need to read the other parts of the register. This may cause unwanted +/// stalls preventing otherwise unrelated instructions from executing in +/// parallel in an out-of-order CPU. +/// This pass is aimed at identifying and avoiding these depepndencies when +/// possible. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + + +using namespace llvm; + +namespace llvm { + +class BreakFalseDeps : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + + /// List of undefined register reads in this block in forward order. + std::vector> UndefReads; + + /// Storage for register unit liveness. + LivePhysRegs LiveRegSet; + + ReachingDefAnalysis *RDA; + +public: + static char ID; // Pass identification, replacement for typeid + + BreakFalseDeps() : MachineFunctionPass(ID) { + initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + /// Process he given basic block. + void processBasicBlock(MachineBasicBlock *MBB); + + /// Update def-ages for registers defined by MI. + /// Also break dependencies on partial defs and undef uses. + void processDefs(MachineInstr *MI); + + /// \brief Helps avoid false dependencies on undef registers by updating the + /// machine instructions' undef operand to use a register that the instruction + /// is truly dependent on, or use a register with clearance higher than Pref. + /// Returns true if it was able to find a true dependency, thus not requiring + /// a dependency breaking instruction regardless of clearance. + bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref); + + /// \brief Return true to if it makes sense to break dependence on a partial + /// def or undef use. + bool shouldBreakDependence(MachineInstr *, unsigned OpIdx, unsigned Pref); + + /// \brief Break false dependencies on undefined register reads. + /// Walk the block backward computing precise liveness. This is expensive, so + /// we only do it on demand. Note that the occurrence of undefined register + /// reads that should be broken is very rare, but when they occur we may have + /// many in a single block. + void processUndefReads(MachineBasicBlock *); +}; + +} // namespace llvm + +#define DEBUG_TYPE "break-false-deps" + +char BreakFalseDeps::ID = 0; +INITIALIZE_PASS_BEGIN(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(BreakFalseDeps, DEBUG_TYPE, "BreakFalseDeps", false, false) + +FunctionPass *llvm::createBreakFalseDeps() { return new BreakFalseDeps(); } + +bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + MachineOperand &MO = MI->getOperand(OpIdx); + assert(MO.isUndef() && "Expected undef machine operand"); + + unsigned OriginalReg = MO.getReg(); + + // Update only undef operands that have reg units that are mapped to one root. + for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { + unsigned NumRoots = 0; + for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { + NumRoots++; + if (NumRoots > 1) + return false; + } + } + + // Get the undef operand's register class + const TargetRegisterClass *OpRC = + TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); + + // If the instruction has a true dependency, we can hide the false depdency + // behind it. + for (MachineOperand &CurrMO : MI->operands()) { + if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || + !OpRC->contains(CurrMO.getReg())) + continue; + // We found a true dependency - replace the undef register with the true + // dependency. + MO.setReg(CurrMO.getReg()); + return true; + } + + // Go over all registers in the register class and find the register with + // max clearance or clearance higher than Pref. + unsigned MaxClearance = 0; + unsigned MaxClearanceReg = OriginalReg; + ArrayRef Order = RegClassInfo.getOrder(OpRC); + for (MCPhysReg Reg : Order) { + unsigned Clearance = RDA->getClearance(MI, Reg); + if (Clearance <= MaxClearance) + continue; + MaxClearance = Clearance; + MaxClearanceReg = Reg; + + if (MaxClearance > Pref) + break; + } + + // Update the operand if we found a register with better clearance. + if (MaxClearanceReg != OriginalReg) + MO.setReg(MaxClearanceReg); + + return false; +} + +bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + unsigned reg = MI->getOperand(OpIdx).getReg(); + unsigned Clearance = RDA->getClearance(MI, reg); + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + DEBUG(dbgs() << ": OK .\n"); + return false; +} + +void BreakFalseDeps::processDefs(MachineInstr *MI) { + assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); + if (Pref) { + bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); + // We don't need to bother trying to break a dependency if this + // instruction has a true dependency on that register through another + // operand - we'll have to wait for it to be available regardless. + if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } + + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + // Check clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(*MI, i, TRI); + } +} + +void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; + + // Collect this block's live out register units. + LiveRegSet.init(*TRI); + // We do not need to care about pristine registers as they are just preserved + // but not actually used in the function. + LiveRegSet.addLiveOutsNoPristines(*MBB); + + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; + + for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(I); + + if (UndefMI == &I) { + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) + TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } +} + +void BreakFalseDeps::processBasicBlock(MachineBasicBlock *MBB) { + UndefReads.clear(); + // If this block is not done, it makes little sense to make any decisions + // based on clearance information. We need to make a second pass anyway, + // and by then we'll have better information, so we can avoid doing the work + // to try and break dependencies now. + for (MachineInstr &MI : *MBB) { + if (!MI.isDebugValue()) + processDefs(&MI); + } + processUndefReads(MBB); +} + +bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + RDA = &getAnalysis(); + + RegClassInfo.runOnMachineFunction(mf); + + DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); + + // Traverse the basic blocks. + for (MachineBasicBlock &MBB : mf) { + processBasicBlock(&MBB); + } + + return false; +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 4b4662bb0aca..88c6bccf7d81 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen BasicTargetTransformInfo.cpp BranchFolding.cpp BranchRelaxation.cpp + BreakFalseDeps.cpp BuiltinGCs.cpp CalcSpillWeights.cpp CallingConvLower.cpp @@ -18,7 +19,7 @@ add_llvm_library(LLVMCodeGen DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp - ExecutionDepsFix.cpp + ExecutionDomainFix.cpp ExpandISelPseudos.cpp ExpandMemCmp.cpp ExpandPostRAPseudos.cpp @@ -33,6 +34,7 @@ add_llvm_library(LLVMCodeGen GlobalMerge.cpp IfConversion.cpp ImplicitNullChecks.cpp + IndirectBrExpandPass.cpp InlineSpiller.cpp InterferenceCache.cpp InterleavedAccessPass.cpp @@ -42,7 +44,7 @@ add_llvm_library(LLVMCodeGen LexicalScopes.cpp LiveDebugValues.cpp LiveDebugVariables.cpp - LiveIntervalAnalysis.cpp + LiveIntervals.cpp LiveInterval.cpp LiveIntervalUnion.cpp LivePhysRegs.cpp @@ -51,10 +53,11 @@ add_llvm_library(LLVMCodeGen LiveRangeShrink.cpp LiveRegMatrix.cpp LiveRegUnits.cpp - LiveStackAnalysis.cpp + LiveStacks.cpp LiveVariables.cpp LLVMTargetMachine.cpp LocalStackSlotAllocation.cpp + LoopTraversal.cpp LowLevelType.cpp LowerEmuTLS.cpp MachineBasicBlock.cpp @@ -76,6 +79,7 @@ add_llvm_library(LLVMCodeGen MachineLoopInfo.cpp MachineModuleInfo.cpp MachineModuleInfoImpls.cpp + MachineOperand.cpp MachineOptimizationRemarkEmitter.cpp MachineOutliner.cpp MachinePassRegistry.cpp @@ -103,6 +107,7 @@ add_llvm_library(LLVMCodeGen ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + ReachingDefAnalysis.cpp RegAllocBase.cpp RegAllocBasic.cpp RegAllocFast.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 6a6ec461cf70..b8920a601938 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -10,7 +10,7 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -70,13 +70,24 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, return sub == hsub ? hreg : 0; const TargetRegisterClass *rc = mri.getRegClass(reg); + if (!tri.enableMultipleCopyHints()) { + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; - // Only allow physreg hints in rc. - if (sub == 0) - return rc->contains(hreg) ? hreg : 0; + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); + } + + unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg); + if (rc->contains(CopiedPReg)) + return CopiedPReg; + + // Check if reg:sub matches so that a super register could be hinted. + if (sub) + return tri.getMatchingSuperReg(CopiedPReg, sub, rc); - // reg:sub should match the physreg hreg. - return tri.getMatchingSuperReg(hreg, sub, rc); + return 0; } // Check if all values in LI are rematerializable @@ -157,12 +168,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; - // Find the best physreg hint and the best virtreg hint. - float bestPhys = 0, bestVirt = 0; - unsigned hintPhys = 0, hintVirt = 0; - - // Don't recompute a target specific hint. - bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + std::pair TargetHint = mri.getRegAllocationHint(li.reg); // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); @@ -188,6 +194,36 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, numInstr += 2; } + // CopyHint is a sortable hint derived from a COPY instruction. + struct CopyHint { + unsigned Reg; + float Weight; + bool IsPhys; + unsigned HintOrder; + CopyHint(unsigned R, float W, bool P, unsigned HR) : + Reg(R), Weight(W), IsPhys(P), HintOrder(HR) {} + bool operator<(const CopyHint &rhs) const { + // Always prefer any physreg hint. + if (IsPhys != rhs.IsPhys) + return (IsPhys && !rhs.IsPhys); + if (Weight != rhs.Weight) + return (Weight > rhs.Weight); + + // This is just a temporary way to achive NFC for targets that don't + // enable multiple copy hints. HintOrder should be removed when all + // targets return true in enableMultipleCopyHints(). + return (HintOrder < rhs.HintOrder); + +#if 0 // Should replace the HintOrder check, see above. + // (just for the purpose of maintaining the set) + return Reg < rhs.Reg; +#endif + } + }; + std::set CopyHints; + + // Temporary: see comment for HintOrder above. + unsigned CopyHintOrder = 0; for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { @@ -227,7 +263,8 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, } // Get allocation hints from copies. - if (noHint || !mi->isCopy()) + if (!mi->isCopy() || + (TargetHint.first != 0 && !tri.enableMultipleCopyHints())) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) @@ -237,28 +274,30 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, // // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; - if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) { - bestPhys = hweight; - hintPhys = hint; - } - } else { - if (hweight > bestVirt) { - bestVirt = hweight; - hintVirt = hint; - } - } + if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint)) + CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint), + (tri.enableMultipleCopyHints() ? hint : CopyHintOrder++))); } Hint.clear(); - // Always prefer the physreg hint. - if (updateLI) { - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weight of hinted registers. - totalWeight *= 1.01F; + // Pass all the sorted copy hints to mri. + if (updateLI && CopyHints.size()) { + // Remove a generic hint if previously added by target. + if (TargetHint.first == 0 && TargetHint.second) + mri.clearSimpleHint(li.reg); + + for (auto &Hint : CopyHints) { + if (TargetHint.first != 0 && Hint.Reg == TargetHint.second) + // Don't add again the target-type hint. + continue; + mri.addRegAllocationHint(li.reg, Hint.Reg); + if (!tri.enableMultipleCopyHints()) + break; } + + // Weakly boost the spill weight of hinted registers. + totalWeight *= 1.01F; } // If the live interval was already unspillable, leave it that way. diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index c0d7eb4cf47b..a268dc509e53 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -28,6 +28,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); + initializeEarlyMachineLICMPass(Registry); + initializeEarlyTailDuplicatePass(Registry); initializeExpandISelPseudosPass(Registry); initializeExpandMemCmpPassPass(Registry); initializeExpandPostRAPass(Registry); @@ -38,6 +40,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); + initializeIndirectBrExpandPassPass(Registry); initializeInterleavedAccessPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); @@ -89,7 +92,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackMapLivenessPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); - initializeTailDuplicatePassPass(Registry); + initializeTailDuplicatePass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); initializeUnpackMachineBundlesPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 75f9f81c112c..eb2e3320a95f 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -86,10 +85,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include @@ -190,7 +187,7 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, " the other."), cl::init(true)); static cl::opt DisableComplexAddrModes( - "disable-complex-addr-modes", cl::Hidden, cl::init(true), + "disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst.")); @@ -331,7 +328,6 @@ class TypePromotionTransaction; SmallVectorImpl &SpeculativelyMovedExts); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); - bool splitIndirectCriticalEdges(Function &F); }; } // end anonymous namespace @@ -356,8 +352,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Clear per function information. InsertedInsts.clear(); PromotedInsts.clear(); - BFI.reset(); - BPI.reset(); ModifiedDT = false; if (auto *TPC = getAnalysisIfAvailable()) { @@ -369,14 +363,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis().getTLI(); TTI = &getAnalysis().getTTI(F); LI = &getAnalysis().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); OptSize = F.optForSize(); ProfileSummaryInfo *PSI = getAnalysis().getPSI(); if (ProfileGuidedSectionPrefix) { - if (PSI->isFunctionHotInCallGraph(&F)) + if (PSI->isFunctionHotInCallGraph(&F, *BFI)) F.setSectionPrefix(".hot"); - else if (PSI->isFunctionColdInCallGraph(&F)) + else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) F.setSectionPrefix(".unlikely"); } @@ -410,7 +406,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. - EverMadeChange |= splitIndirectCriticalEdges(F); + EverMadeChange |= SplitIndirectBrCriticalEdges(F); bool MadeChange = true; while (MadeChange) { @@ -555,160 +551,6 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { return DestBB; } -// Return the unique indirectbr predecessor of a block. This may return null -// even if such a predecessor exists, if it's not useful for splitting. -// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) -// predecessors of BB. -static BasicBlock * -findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { - // If the block doesn't have any PHIs, we don't care about it, since there's - // no point in splitting it. - PHINode *PN = dyn_cast(BB->begin()); - if (!PN) - return nullptr; - - // Verify we have exactly one IBR predecessor. - // Conservatively bail out if one of the other predecessors is not a "regular" - // terminator (that is, not a switch or a br). - BasicBlock *IBB = nullptr; - for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { - BasicBlock *PredBB = PN->getIncomingBlock(Pred); - TerminatorInst *PredTerm = PredBB->getTerminator(); - switch (PredTerm->getOpcode()) { - case Instruction::IndirectBr: - if (IBB) - return nullptr; - IBB = PredBB; - break; - case Instruction::Br: - case Instruction::Switch: - OtherPreds.push_back(PredBB); - continue; - default: - return nullptr; - } - } - - return IBB; -} - -// Split critical edges where the source of the edge is an indirectbr -// instruction. This isn't always possible, but we can handle some easy cases. -// This is useful because MI is unable to split such critical edges, -// which means it will not be able to sink instructions along those edges. -// This is especially painful for indirect branches with many successors, where -// we end up having to prepare all outgoing values in the origin block. -// -// Our normal algorithm for splitting critical edges requires us to update -// the outgoing edges of the edge origin block, but for an indirectbr this -// is hard, since it would require finding and updating the block addresses -// the indirect branch uses. But if a block only has a single indirectbr -// predecessor, with the others being regular branches, we can do it in a -// different way. -// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr. -// We can split D into D0 and D1, where D0 contains only the PHIs from D, -// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and -// create the following structure: -// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1 -bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) { - // Check whether the function has any indirectbrs, and collect which blocks - // they may jump to. Since most functions don't have indirect branches, - // this lowers the common case's overhead to O(Blocks) instead of O(Edges). - SmallSetVector Targets; - for (auto &BB : F) { - auto *IBI = dyn_cast(BB.getTerminator()); - if (!IBI) - continue; - - for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) - Targets.insert(IBI->getSuccessor(Succ)); - } - - if (Targets.empty()) - return false; - - bool Changed = false; - for (BasicBlock *Target : Targets) { - SmallVector OtherPreds; - BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); - // If we did not found an indirectbr, or the indirectbr is the only - // incoming edge, this isn't the kind of edge we're looking for. - if (!IBRPred || OtherPreds.empty()) - continue; - - // Don't even think about ehpads/landingpads. - Instruction *FirstNonPHI = Target->getFirstNonPHI(); - if (FirstNonPHI->isEHPad() || Target->isLandingPad()) - continue; - - BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); - // It's possible Target was its own successor through an indirectbr. - // In this case, the indirectbr now comes from BodyBlock. - if (IBRPred == Target) - IBRPred = BodyBlock; - - // At this point Target only has PHIs, and BodyBlock has the rest of the - // block's body. Create a copy of Target that will be used by the "direct" - // preds. - ValueToValueMapTy VMap; - BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); - - for (BasicBlock *Pred : OtherPreds) { - // If the target is a loop to itself, then the terminator of the split - // block needs to be updated. - if (Pred == Target) - BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - else - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); - } - - // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that - // they are clones, so the number of PHIs are the same. - // (a) Remove the edge coming from IBRPred from the "Direct" PHI - // (b) Leave that as the only edge in the "Indirect" PHI. - // (c) Merge the two in the body block. - BasicBlock::iterator Indirect = Target->begin(), - End = Target->getFirstNonPHI()->getIterator(); - BasicBlock::iterator Direct = DirectSucc->begin(); - BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); - - assert(&*End == Target->getTerminator() && - "Block was expected to only contain PHIs"); - - while (Indirect != End) { - PHINode *DirPHI = cast(Direct); - PHINode *IndPHI = cast(Indirect); - - // Now, clean up - the direct block shouldn't get the indirect value, - // and vice versa. - DirPHI->removeIncomingValue(IBRPred); - Direct++; - - // Advance the pointer here, to avoid invalidation issues when the old - // PHI is erased. - Indirect++; - - PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); - NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), - IBRPred); - - // Create a PHI in the body block, to merge the direct and indirect - // predecessors. - PHINode *MergePHI = - PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); - MergePHI->addIncoming(NewIndPHI, Target); - MergePHI->addIncoming(DirPHI, DirectSucc); - - IndPHI->replaceAllUsesWith(MergePHI); - IndPHI->eraseFromParent(); - } - - Changed = true; - } - - return Changed; -} - /// Eliminate blocks that contain only PHI nodes, debug info directives, and an /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split /// edges in ways that are non-optimal for isel. Start by eliminating these @@ -791,16 +633,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (DestBBPred == BB) continue; - bool HasAllSameValue = true; - BasicBlock::const_iterator DestBBI = DestBB->begin(); - while (const PHINode *DestPN = dyn_cast(DestBBI++)) { - if (DestPN->getIncomingValueForBlock(BB) != - DestPN->getIncomingValueForBlock(DestBBPred)) { - HasAllSameValue = false; - break; - } - } - if (HasAllSameValue) + if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) { + return DestPN.getIncomingValueForBlock(BB) == + DestPN.getIncomingValueForBlock(DestBBPred); + })) SameIncomingValueBBs.insert(DestBBPred); } @@ -810,13 +646,6 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, if (SameIncomingValueBBs.count(Pred)) return true; - if (!BFI) { - Function &F = *BB->getParent(); - LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); - } - BlockFrequency PredFreq = BFI->getBlockFreq(Pred); BlockFrequency BBFreq = BFI->getBlockFreq(BB); @@ -837,9 +666,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, // We only want to eliminate blocks whose phi nodes are used by phi nodes in // the successor. If there are more complex condition (e.g. preheaders), // don't mess around with them. - BasicBlock::const_iterator BBI = BB->begin(); - while (const PHINode *PN = dyn_cast(BBI++)) { - for (const User *U : PN->users()) { + for (const PHINode &PN : BB->phis()) { + for (const User *U : PN.users()) { const Instruction *UI = cast(U); if (UI->getParent() != DestBB || !isa(UI)) return false; @@ -878,10 +706,9 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = DestBBPN->getIncomingBlock(i); if (BBPreds.count(Pred)) { // Common predecessor? - BBI = DestBB->begin(); - while (const PHINode *PN = dyn_cast(BBI++)) { - const Value *V1 = PN->getIncomingValueForBlock(Pred); - const Value *V2 = PN->getIncomingValueForBlock(BB); + for (const PHINode &PN : DestBB->phis()) { + const Value *V1 = PN.getIncomingValueForBlock(Pred); + const Value *V2 = PN.getIncomingValueForBlock(BB); // If V2 is a phi node in BB, look up what the mapped value will be. if (const PHINode *V2PN = dyn_cast(V2)) @@ -924,11 +751,9 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB // to handle the new incoming edges it is about to have. - PHINode *PN; - for (BasicBlock::iterator BBI = DestBB->begin(); - (PN = dyn_cast(BBI)); ++BBI) { + for (PHINode &PN : DestBB->phis()) { // Remove the incoming value for BB, and remember it. - Value *InVal = PN->removeIncomingValue(BB, false); + Value *InVal = PN.removeIncomingValue(BB, false); // Two options: either the InVal is a phi node defined in BB or it is some // value that dominates BB. @@ -936,17 +761,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { if (InValPhi && InValPhi->getParent() == BB) { // Add all of the input values of the input PHI as inputs of this phi. for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) - PN->addIncoming(InValPhi->getIncomingValue(i), - InValPhi->getIncomingBlock(i)); + PN.addIncoming(InValPhi->getIncomingValue(i), + InValPhi->getIncomingBlock(i)); } else { // Otherwise, add one instance of the dominating value for each edge that // we will be adding. if (PHINode *BBPN = dyn_cast(BB->begin())) { for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) - PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); + PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); } else { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - PN->addIncoming(InVal, *PI); + PN.addIncoming(InVal, *PI); } } } @@ -1785,7 +1610,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { if (MemTransferInst *MTI = dyn_cast(MI)) Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); if (Align > MI->getAlignment()) - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + MI->setAlignment(Align); } } @@ -2875,8 +2700,13 @@ class AddressingModeCombiner { // we still need to collect it due to original value is different. // And later we will need all original values as anchors during // finding the common Phi node. + // We also must reject the case when base offset is different and + // scale reg is not null, we cannot handle this case due to merge of + // different offsets will be used as ScaleReg. if (DifferentField != ExtAddrMode::MultipleFields && - DifferentField != ExtAddrMode::ScaleField) { + DifferentField != ExtAddrMode::ScaleField && + (DifferentField != ExtAddrMode::BaseOffsField || + !NewAddrMode.ScaledReg)) { AddrModes.emplace_back(NewAddrMode); return true; } @@ -2909,8 +2739,10 @@ class AddressingModeCombiner { // Build a map between to // value of base register. + // Bail out if there is no common type. FoldAddrToValueMapping Map; - initializeMap(Map); + if (!initializeMap(Map)) + return false; Value *CommonValue = findCommon(Map); if (CommonValue) @@ -2924,7 +2756,8 @@ class AddressingModeCombiner { /// If address is not an instruction than basic block is set to null. /// At the same time we find a common type for different field we will /// use to create new Phi/Select nodes. Keep it in CommonType field. - void initializeMap(FoldAddrToValueMapping &Map) { + /// Return false if there is no common type found. + bool initializeMap(FoldAddrToValueMapping &Map) { // Keep track of keys where the value is null. We will need to replace it // with constant null when we know the common type. SmallVector NullValue; @@ -2936,10 +2769,10 @@ class AddressingModeCombiner { Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); if (DV) { - if (CommonType) - assert(CommonType == DV->getType() && "Different types detected!"); - else - CommonType = DV->getType(); + auto *Type = DV->getType(); + if (CommonType && CommonType != Type) + return false; + CommonType = Type; Map[{ AM.OriginalValue, BB }] = DV; } else { NullValue.push_back({ AM.OriginalValue, BB }); @@ -2948,6 +2781,7 @@ class AddressingModeCombiner { assert(CommonType && "At least one non-null value must be!"); for (auto VIBB : NullValue) Map[VIBB] = Constant::getNullValue(CommonType); + return true; } /// \brief We have mapping between value A and basic block where value A @@ -2975,11 +2809,11 @@ class AddressingModeCombiner { // -> ? // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3 Value *findCommon(FoldAddrToValueMapping &Map) { - // Tracks of new created Phi nodes. + // Tracks newly created Phi nodes. SmallPtrSet NewPhiNodes; - // Tracks of new created Select nodes. + // Tracks newly created Select nodes. SmallPtrSet NewSelectNodes; - // Tracks the simplification of new created phi nodes. The reason we use + // Tracks the simplification of newly created phi nodes. The reason we use // this mapping is because we will add new created Phi nodes in AddrToBase. // Simplification of Phi nodes is recursive, so some Phi node may // be simplified after we added it to AddrToBase. @@ -3142,13 +2976,13 @@ class AddressingModeCombiner { ? CurrentBlock : nullptr }; assert(Map.find(TrueItem) != Map.end() && "No True Value!"); - Select->setTrueValue(Map[TrueItem]); + Select->setTrueValue(ST.Get(Map[TrueItem])); auto *FalseValue = CurrentSelect->getFalseValue(); ValueInBB FalseItem = { FalseValue, isa(FalseValue) ? CurrentBlock : nullptr }; assert(Map.find(FalseItem) != Map.end() && "No False Value!"); - Select->setFalseValue(Map[FalseItem]); + Select->setFalseValue(ST.Get(Map[FalseItem])); } else { // Must be a Phi node then. PHINode *PHI = cast(V); @@ -3858,7 +3692,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, } else { uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); if (ConstantInt *CI = dyn_cast(AddrInst->getOperand(i))) { - ConstantOffset += CI->getSExtValue()*TypeSize; + ConstantOffset += CI->getSExtValue() * TypeSize; } else if (TypeSize) { // Scales of zero don't do anything. // We only allow one variable index at the moment. if (VariableOperand != -1) @@ -6658,22 +6492,16 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { std::swap(TBB, FBB); // Replace the old BB with the new BB. - for (auto &I : *TBB) { - PHINode *PN = dyn_cast(&I); - if (!PN) - break; + for (PHINode &PN : TBB->phis()) { int i; - while ((i = PN->getBasicBlockIndex(&BB)) >= 0) - PN->setIncomingBlock(i, TmpBB); + while ((i = PN.getBasicBlockIndex(&BB)) >= 0) + PN.setIncomingBlock(i, TmpBB); } // Add another incoming edge form the new BB. - for (auto &I : *FBB) { - PHINode *PN = dyn_cast(&I); - if (!PN) - break; - auto *Val = PN->getIncomingValueForBlock(&BB); - PN->addIncoming(Val, TmpBB); + for (PHINode &PN : FBB->phis()) { + auto *Val = PN.getIncomingValueForBlock(&BB); + PN.addIncoming(Val, TmpBB); } // Update the branch weights (from SelectionDAGBuilder:: diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index be364bf760a2..5a4e6d0aad9e 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -170,11 +170,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: - // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // %r6 = LDR %sp, %reg0, 92, 14, %reg0; mem:LD4[FixedStack14] // ... - // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] - // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] - // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // STR %r0, killed %r6, %reg0, 0, 0, %cpsr; mem:ST4[%395] + // %r6 = LDR %sp, %reg0, 100, 0, %cpsr; mem:LD4[FixedStack12] + // STR %r0, killed %r6, %reg0, 0, 14, %reg0; mem:ST4[%396](align=8) // // The first R6 kill is not really a kill since it's killed by a predicated // instruction which may not be executed. The second R6 def may or may not @@ -466,7 +466,7 @@ BreakAntiDependencies(const std::vector &SUnits, DEBUG(dbgs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (KillIndices[Reg] == ~0u) - DEBUG(dbgs() << " " << TRI->getName(Reg)); + DEBUG(dbgs() << " " << printReg(Reg, TRI)); } DEBUG(dbgs() << '\n'); } @@ -646,9 +646,9 @@ BreakAntiDependencies(const std::vector &SUnits, LastNewReg[AntiDepReg], RC, ForbidRegs)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " - << TRI->getName(AntiDepReg) - << " with " << RegRefs.count(AntiDepReg) << " references" - << " using " << TRI->getName(NewReg) << "!\n"); + << printReg(AntiDepReg, TRI) << " with " + << RegRefs.count(AntiDepReg) << " references" + << " using " << printReg(NewReg, TRI) << "!\n"); // Update the references to the old register to refer to the new // register. diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 8a648a068763..e6a54bb300f2 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -95,7 +94,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool AnyChanges = false; diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index ef4e2aaaf484..7d7eb57352a2 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -17,12 +17,12 @@ /// when subregisters are involved. /// /// Example: -/// %vreg0 = some definition -/// %vreg1 = IMPLICIT_DEF -/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1 -/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1 -/// = use %vreg3 -/// The %vreg0 definition is dead and %vreg3 contains an undefined value. +/// %0 = some definition +/// %1 = IMPLICIT_DEF +/// %2 = REG_SEQUENCE %0, sub0, %1, sub1 +/// %3 = EXTRACT_SUBREG %2, sub1 +/// = use %3 +/// The %0 definition is dead and %3 contains an undefined value. // //===----------------------------------------------------------------------===// @@ -34,7 +34,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index bb181b7e165f..6294ff450113 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -185,7 +185,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to // get right. if (!MBB->livein_empty()) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); return false; } @@ -199,7 +199,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { continue; if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " << BlockInstrLimit << " instructions.\n"); return false; } @@ -246,7 +246,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { if (!DefMI || DefMI->getParent() != Head) continue; if (InsertAfter.insert(DefMI).second) - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + DEBUG(dbgs() << printMBBReference(*MBB) << " depends on " << *DefMI); if (DefMI->isTerminator()) { DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); return false; @@ -361,10 +361,10 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || Succ1->succ_begin()[0] != Tail) return false; - DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() - << " -> BB#" << Succ0->getNumber() - << "/BB#" << Succ1->getNumber() - << " -> BB#" << Tail->getNumber() << '\n'); + DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << "/" + << printMBBReference(*Succ1) << " -> " + << printMBBReference(*Tail) << '\n'); // Live-in physregs are tricky to get right when speculating code. if (!Tail->livein_empty()) { @@ -372,9 +372,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { return false; } } else { - DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() - << " -> BB#" << Succ0->getNumber() - << " -> BB#" << Tail->getNumber() << '\n'); + DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << " -> " + << printMBBReference(*Tail) << '\n'); } // This is a triangle or a diamond. @@ -563,8 +563,8 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { assert(Head->succ_empty() && "Additional head successors?"); if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { // Splice Tail onto the end of Head. - DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() - << " into head BB#" << Head->getNumber() << '\n'); + DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) << " into head " + << printMBBReference(*Head) << '\n'); Head->splice(Head->end(), Tail, Tail->begin(), Tail->end()); Head->transferSuccessorsAndUpdatePHIs(Tail); @@ -785,7 +785,7 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Only run if conversion if the target wants it. diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index b3a25544be39..54c53eb16312 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -80,13 +80,15 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, O << "digraph {\n"; for (const auto &MBB : *MF) { unsigned BB = MBB.getNumber(); - O << "\t\"BB#" << BB << "\" [ shape=box ]\n" - << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" - << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; + O << "\t\"" << printMBBReference(MBB) << "\" [ shape=box ]\n" + << '\t' << G.getBundle(BB, false) << " -> \"" << printMBBReference(MBB) + << "\"\n" + << "\t\"" << printMBBReference(MBB) << "\" -> " << G.getBundle(BB, true) + << '\n'; for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() - << "\" [ color=lightgray ]\n"; + O << "\t\"" << printMBBReference(MBB) << "\" -> \"" + << printMBBReference(**SI) << "\" [ color=lightgray ]\n"; } O << "}\n"; return O; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp deleted file mode 100644 index 61990671d88d..000000000000 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ /dev/null @@ -1,755 +0,0 @@ -//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/ExecutionDepsFix.h" - -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "execution-deps-fix" - -/// Translate TRI register number to a list of indices into our smaller tables -/// of interesting registers. -iterator_range::const_iterator> -ExecutionDepsFix::regIndices(unsigned Reg) const { - assert(Reg < AliasMap.size() && "Invalid register"); - const auto &Entry = AliasMap[Reg]; - return make_range(Entry.begin(), Entry.end()); -} - -DomainValue *ExecutionDepsFix::alloc(int domain) { - DomainValue *dv = Avail.empty() ? - new(Allocator.Allocate()) DomainValue : - Avail.pop_back_val(); - if (domain >= 0) - dv->addDomain(domain); - assert(dv->Refs == 0 && "Reference count wasn't cleared"); - assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); - return dv; -} - -/// Release a reference to DV. When the last reference is released, -/// collapse if needed. -void ExecutionDepsFix::release(DomainValue *DV) { - while (DV) { - assert(DV->Refs && "Bad DomainValue"); - if (--DV->Refs) - return; - - // There are no more DV references. Collapse any contained instructions. - if (DV->AvailableDomains && !DV->isCollapsed()) - collapse(DV, DV->getFirstDomain()); - - DomainValue *Next = DV->Next; - DV->clear(); - Avail.push_back(DV); - // Also release the next DomainValue in the chain. - DV = Next; - } -} - -/// Follow the chain of dead DomainValues until a live DomainValue is reached. -/// Update the referenced pointer when necessary. -DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) { - DomainValue *DV = DVRef; - if (!DV || !DV->Next) - return DV; - - // DV has a chain. Find the end. - do DV = DV->Next; - while (DV->Next); - - // Update DVRef to point to DV. - retain(DV); - release(DVRef); - DVRef = DV; - return DV; -} - -/// Set LiveRegs[rx] = dv, updating reference counts. -void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - - if (LiveRegs[rx].Value == dv) - return; - if (LiveRegs[rx].Value) - release(LiveRegs[rx].Value); - LiveRegs[rx].Value = retain(dv); -} - -// Kill register rx, recycle or collapse any DomainValue. -void ExecutionDepsFix::kill(int rx) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - if (!LiveRegs[rx].Value) - return; - - release(LiveRegs[rx].Value); - LiveRegs[rx].Value = nullptr; -} - -/// Force register rx into domain. -void ExecutionDepsFix::force(int rx, unsigned domain) { - assert(unsigned(rx) < NumRegs && "Invalid index"); - assert(LiveRegs && "Must enter basic block first."); - if (DomainValue *dv = LiveRegs[rx].Value) { - if (dv->isCollapsed()) - dv->addDomain(domain); - else if (dv->hasDomain(domain)) - collapse(dv, domain); - else { - // This is an incompatible open DomainValue. Collapse it to whatever and - // force the new value into domain. This costs a domain crossing. - collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx].Value && "Not live after collapse?"); - LiveRegs[rx].Value->addDomain(domain); - } - } else { - // Set up basic collapsed DomainValue. - setLiveReg(rx, alloc(domain)); - } -} - -/// Collapse open DomainValue into given domain. If there are multiple -/// registers using dv, they each get a unique collapsed DomainValue. -void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) { - assert(dv->hasDomain(domain) && "Cannot collapse"); - - // Collapse all the instructions. - while (!dv->Instrs.empty()) - TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); - dv->setSingleDomain(domain); - - // If there are multiple users, give them new, unique DomainValues. - if (LiveRegs && dv->Refs > 1) - for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx].Value == dv) - setLiveReg(rx, alloc(domain)); -} - -/// All instructions and registers in B are moved to A, and B is released. -bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) { - assert(!A->isCollapsed() && "Cannot merge into collapsed"); - assert(!B->isCollapsed() && "Cannot merge from collapsed"); - if (A == B) - return true; - // Restrict to the domains that A and B have in common. - unsigned common = A->getCommonDomains(B->AvailableDomains); - if (!common) - return false; - A->AvailableDomains = common; - A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); - - // Clear the old DomainValue so we won't try to swizzle instructions twice. - B->clear(); - // All uses of B are referred to A. - B->Next = retain(A); - - for (unsigned rx = 0; rx != NumRegs; ++rx) { - assert(LiveRegs && "no space allocated for live registers"); - if (LiveRegs[rx].Value == B) - setLiveReg(rx, A); - } - return true; -} - -/// Set up LiveRegs by merging predecessor live-out values. -void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { - // Reset instruction counter in each basic block. - CurInstr = 0; - - // Set up UndefReads to track undefined register reads. - UndefReads.clear(); - LiveRegSet.clear(); - - // Set up LiveRegs to represent registers entering MBB. - if (!LiveRegs) - LiveRegs = new LiveReg[NumRegs]; - - // Default values are 'nothing happened a long time ago'. - for (unsigned rx = 0; rx != NumRegs; ++rx) { - LiveRegs[rx].Value = nullptr; - LiveRegs[rx].Def = -(1 << 20); - } - - // This is the entry block. - if (MBB->pred_empty()) { - for (const auto &LI : MBB->liveins()) { - for (int rx : regIndices(LI.PhysReg)) { - // Treat function live-ins as if they were defined just before the first - // instruction. Usually, function arguments are set up immediately - // before the call. - LiveRegs[rx].Def = -1; - } - } - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); - return; - } - - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - auto fi = MBBInfos.find(*pi); - assert(fi != MBBInfos.end() && - "Should have pre-allocated MBBInfos for all MBBs"); - LiveReg *Incoming = fi->second.OutRegs; - // Incoming is null if this is a backedge from a BB - // we haven't processed yet - if (Incoming == nullptr) { - continue; - } - - for (unsigned rx = 0; rx != NumRegs; ++rx) { - // Use the most recent predecessor def for each register. - LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def); - - DomainValue *pdv = resolve(Incoming[rx].Value); - if (!pdv) - continue; - if (!LiveRegs[rx].Value) { - setLiveReg(rx, pdv); - continue; - } - - // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx].Value->isCollapsed()) { - // We are already collapsed, but predecessor is not. Force it. - unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) - collapse(pdv, Domain); - continue; - } - - // Currently open, merge in predecessor. - if (!pdv->isCollapsed()) - merge(LiveRegs[rx].Value, pdv); - else - force(rx, pdv->getFirstDomain()); - } - } - DEBUG( - dbgs() << "BB#" << MBB->getNumber() - << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n")); -} - -void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { - assert(LiveRegs && "Must enter basic block first."); - LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs; - // Save register clearances at end of MBB - used by enterBasicBlock(). - MBBInfos[MBB].OutRegs = LiveRegs; - - // While processing the basic block, we kept `Def` relative to the start - // of the basic block for convenience. However, future use of this information - // only cares about the clearance from the end of the block, so adjust - // everything to be relative to the end of the basic block. - for (unsigned i = 0, e = NumRegs; i != e; ++i) - LiveRegs[i].Def -= CurInstr; - if (OldOutRegs) { - // This must be the second pass. - // Release all the DomainValues instead of keeping them. - for (unsigned i = 0, e = NumRegs; i != e; ++i) - release(OldOutRegs[i].Value); - delete[] OldOutRegs; - } - LiveRegs = nullptr; -} - -bool ExecutionDepsFix::visitInstr(MachineInstr *MI) { - // Update instructions with explicit execution domains. - std::pair DomP = TII->getExecutionDomain(*MI); - if (DomP.first) { - if (DomP.second) - visitSoftInstr(MI, DomP.second); - else - visitHardInstr(MI, DomP.first); - } - - return !DomP.first; -} - -/// \brief Helps avoid false dependencies on undef registers by updating the -/// machine instructions' undef operand to use a register that the instruction -/// is truly dependent on, or use a register with clearance higher than Pref. -/// Returns true if it was able to find a true dependency, thus not requiring -/// a dependency breaking instruction regardless of clearance. -bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI, - unsigned OpIdx, unsigned Pref) { - MachineOperand &MO = MI->getOperand(OpIdx); - assert(MO.isUndef() && "Expected undef machine operand"); - - unsigned OriginalReg = MO.getReg(); - - // Update only undef operands that are mapped to one register. - if (AliasMap[OriginalReg].size() != 1) - return false; - - // Get the undef operand's register class - const TargetRegisterClass *OpRC = - TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF); - - // If the instruction has a true dependency, we can hide the false depdency - // behind it. - for (MachineOperand &CurrMO : MI->operands()) { - if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() || - !OpRC->contains(CurrMO.getReg())) - continue; - // We found a true dependency - replace the undef register with the true - // dependency. - MO.setReg(CurrMO.getReg()); - return true; - } - - // Go over all registers in the register class and find the register with - // max clearance or clearance higher than Pref. - unsigned MaxClearance = 0; - unsigned MaxClearanceReg = OriginalReg; - ArrayRef Order = RegClassInfo.getOrder(OpRC); - for (auto Reg : Order) { - assert(AliasMap[Reg].size() == 1 && - "Reg is expected to be mapped to a single index"); - int RCrx = *regIndices(Reg).begin(); - unsigned Clearance = CurInstr - LiveRegs[RCrx].Def; - if (Clearance <= MaxClearance) - continue; - MaxClearance = Clearance; - MaxClearanceReg = Reg; - - if (MaxClearance > Pref) - break; - } - - // Update the operand if we found a register with better clearance. - if (MaxClearanceReg != OriginalReg) - MO.setReg(MaxClearanceReg); - - return false; -} - -/// \brief Return true to if it makes sense to break dependence on a partial def -/// or undef use. -bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, - unsigned Pref) { - unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndices(reg)) { - unsigned Clearance = CurInstr - LiveRegs[rx].Def; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - continue; - } - DEBUG(dbgs() << ": OK .\n"); - return false; - } - return true; -} - -// Update def-ages for registers defined by MI. -// If Kill is set, also kill off DomainValues clobbered by the defs. -// -// Also break dependencies on partial defs and undef uses. -void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency, - bool Kill) { - assert(!MI->isDebugValue() && "Won't process debug values"); - - // Break dependence on undef uses. Do this before updating LiveRegs below. - unsigned OpNum; - if (breakDependency) { - unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); - if (Pref) { - bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); - // We don't need to bother trying to break a dependency if this - // instruction has a true dependency on that register through another - // operand - we'll have to wait for it to be available regardless. - if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) - UndefReads.push_back(std::make_pair(MI, OpNum)); - } - } - const MCInstrDesc &MCID = MI->getDesc(); - for (unsigned i = 0, - e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); - i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isUse()) - continue; - for (int rx : regIndices(MO.getReg())) { - // This instruction explicitly defines rx. - DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr - << '\t' << *MI); - - if (breakDependency) { - // Check clearance before partial register updates. - // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); - if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(*MI, i, TRI); - } - - // How many instructions since rx was last written? - LiveRegs[rx].Def = CurInstr; - - // Kill off domains redefined by generic instructions. - if (Kill) - kill(rx); - } - } - ++CurInstr; -} - -/// \break Break false dependencies on undefined register reads. -/// -/// Walk the block backward computing precise liveness. This is expensive, so we -/// only do it on demand. Note that the occurrence of undefined register reads -/// that should be broken is very rare, but when they occur we may have many in -/// a single block. -void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) { - if (UndefReads.empty()) - return; - - // Collect this block's live out register units. - LiveRegSet.init(*TRI); - // We do not need to care about pristine registers as they are just preserved - // but not actually used in the function. - LiveRegSet.addLiveOutsNoPristines(*MBB); - - MachineInstr *UndefMI = UndefReads.back().first; - unsigned OpIdx = UndefReads.back().second; - - for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { - // Update liveness, including the current instruction's defs. - LiveRegSet.stepBackward(I); - - if (UndefMI == &I) { - if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) - TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); - - UndefReads.pop_back(); - if (UndefReads.empty()) - return; - - UndefMI = UndefReads.back().first; - OpIdx = UndefReads.back().second; - } - } -} - -// A hard instruction only works in one domain. All input registers will be -// forced into that domain. -void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { - // Collapse all uses. - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - force(rx, domain); - } - } - - // Kill all defs and force them. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - kill(rx); - force(rx, domain); - } - } -} - -// A soft instruction can be changed to work in other domains given by mask. -void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { - // Bitmask of available domains for this instruction after taking collapsed - // operands into account. - unsigned available = mask; - - // Scan the explicit use operands for incoming domains. - SmallVector used; - if (LiveRegs) - for (unsigned i = mi->getDesc().getNumDefs(), - e = mi->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - DomainValue *dv = LiveRegs[rx].Value; - if (dv == nullptr) - continue; - // Bitmask of domains that dv and available have in common. - unsigned common = dv->getCommonDomains(available); - // Is it possible to use this collapsed register for free? - if (dv->isCollapsed()) { - // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain - // penalty for this operand. - if (common) available = common; - } else if (common) - // Open DomainValue is compatible, save it for merging. - used.push_back(rx); - else - // Open DomainValue is not compatible with instruction. It is useless - // now. - kill(rx); - } - } - - // If the collapsed operands force a single domain, propagate the collapse. - if (isPowerOf2_32(available)) { - unsigned domain = countTrailingZeros(available); - TII->setExecutionDomain(*mi, domain); - visitHardInstr(mi, domain); - return; - } - - // Kill off any remaining uses that don't match available, and build a list of - // incoming DomainValues that we want to merge. - SmallVector Regs; - for (int rx : used) { - assert(LiveRegs && "no space allocated for live registers"); - const LiveReg &LR = LiveRegs[rx]; - // This useless DomainValue could have been missed above. - if (!LR.Value->getCommonDomains(available)) { - kill(rx); - continue; - } - // Sorted insertion. - auto I = std::upper_bound(Regs.begin(), Regs.end(), &LR, - [](const LiveReg *LHS, const LiveReg *RHS) { - return LHS->Def < RHS->Def; - }); - Regs.insert(I, &LR); - } - - // doms are now sorted in order of appearance. Try to merge them all, giving - // priority to the latest ones. - DomainValue *dv = nullptr; - while (!Regs.empty()) { - if (!dv) { - dv = Regs.pop_back_val()->Value; - // Force the first dv to match the current instruction. - dv->AvailableDomains = dv->getCommonDomains(available); - assert(dv->AvailableDomains && "Domain should have been filtered"); - continue; - } - - DomainValue *Latest = Regs.pop_back_val()->Value; - // Skip already merged values. - if (Latest == dv || Latest->Next) - continue; - if (merge(dv, Latest)) - continue; - - // If latest didn't merge, it is useless now. Kill all registers using it. - for (int i : used) { - assert(LiveRegs && "no space allocated for live registers"); - if (LiveRegs[i].Value == Latest) - kill(i); - } - } - - // dv is the DomainValue we are going to use for this instruction. - if (!dv) { - dv = alloc(); - dv->AvailableDomains = available; - } - dv->Instrs.push_back(mi); - - // Finally set all defs and non-collapsed uses to dv. We must iterate through - // all the operators, including imp-def ones. - for (MachineInstr::mop_iterator ii = mi->operands_begin(), - ee = mi->operands_end(); - ii != ee; ++ii) { - MachineOperand &mo = *ii; - if (!mo.isReg()) continue; - for (int rx : regIndices(mo.getReg())) { - if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { - kill(rx); - setLiveReg(rx, dv); - } - } - } -} - -void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB, - bool PrimaryPass) { - enterBasicBlock(MBB); - // If this block is not done, it makes little sense to make any decisions - // based on clearance information. We need to make a second pass anyway, - // and by then we'll have better information, so we can avoid doing the work - // to try and break dependencies now. - bool breakDependency = isBlockDone(MBB); - for (MachineInstr &MI : *MBB) { - if (!MI.isDebugValue()) { - bool Kill = false; - if (PrimaryPass) - Kill = visitInstr(&MI); - processDefs(&MI, breakDependency, Kill); - } - } - if (breakDependency) - processUndefReads(MBB); - leaveBasicBlock(MBB); -} - -bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) { - return MBBInfos[MBB].PrimaryCompleted && - MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming && - MBBInfos[MBB].IncomingProcessed == MBB->pred_size(); -} - -bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) - return false; - MF = &mf; - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - RegClassInfo.runOnMachineFunction(mf); - LiveRegs = nullptr; - assert(NumRegs == RC->getNumRegs() && "Bad regclass"); - - DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " - << TRI->getRegClassName(RC) << " **********\n"); - - // If no relevant registers are used in the function, we can skip it - // completely. - bool anyregs = false; - const MachineRegisterInfo &MRI = mf.getRegInfo(); - for (unsigned Reg : *RC) { - if (MRI.isPhysRegUsed(Reg)) { - anyregs = true; - break; - } - } - if (!anyregs) return false; - - // Initialize the AliasMap on the first use. - if (AliasMap.empty()) { - // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and - // therefore the LiveRegs array. - AliasMap.resize(TRI->getNumRegs()); - for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); - AI.isValid(); ++AI) - AliasMap[*AI].push_back(i); - } - - // Initialize the MMBInfos - for (auto &MBB : mf) { - MBBInfo InitialInfo; - MBBInfos.insert(std::make_pair(&MBB, InitialInfo)); - } - - /* - * We want to visit every instruction in every basic block in order to update - * it's execution domain or break any false dependencies. However, for the - * dependency breaking, we need to know clearances from all predecessors - * (including any backedges). One way to do so would be to do two complete - * passes over all basic blocks/instructions, the first for recording - * clearances, the second to break the dependencies. However, for functions - * without backedges, or functions with a lot of straight-line code, and - * a small loop, that would be a lot of unnecessary work (since only the - * BBs that are part of the loop require two passes). As an example, - * consider the following loop. - * - * - * PH -> A -> B (xmm -> xmm) -> C -> D -> EXIT - * ^ | - * +----------------------------------+ - * - * The iteration order is as follows: - * Naive: PH A B C D A' B' C' D' - * Optimized: PH A B C A' B' C' D - * - * Note that we avoid processing D twice, because we can entirely process - * the predecessors before getting to D. We call a block that is ready - * for its second round of processing `done` (isBlockDone). Once we finish - * processing some block, we update the counters in MBBInfos and re-process - * any successors that are now done. - */ - - MachineBasicBlock *Entry = &*MF->begin(); - ReversePostOrderTraversal RPOT(Entry); - SmallVector Workqueue; - for (ReversePostOrderTraversal::rpo_iterator - MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { - MachineBasicBlock *MBB = *MBBI; - // N.B: IncomingProcessed and IncomingCompleted were already updated while - // processing this block's predecessors. - MBBInfos[MBB].PrimaryCompleted = true; - MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed; - bool Primary = true; - Workqueue.push_back(MBB); - while (!Workqueue.empty()) { - MachineBasicBlock *ActiveMBB = &*Workqueue.back(); - Workqueue.pop_back(); - processBasicBlock(ActiveMBB, Primary); - bool Done = isBlockDone(ActiveMBB); - for (auto *Succ : ActiveMBB->successors()) { - if (!isBlockDone(Succ)) { - if (Primary) { - MBBInfos[Succ].IncomingProcessed++; - } - if (Done) { - MBBInfos[Succ].IncomingCompleted++; - } - if (isBlockDone(Succ)) { - Workqueue.push_back(Succ); - } - } - } - Primary = false; - } - } - - // We need to go through again and finalize any blocks that are not done yet. - // This is possible if blocks have dead predecessors, so we didn't visit them - // above. - for (ReversePostOrderTraversal::rpo_iterator - MBBI = RPOT.begin(), - MBBE = RPOT.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock *MBB = *MBBI; - if (!isBlockDone(MBB)) { - processBasicBlock(MBB, false); - // Don't update successors here. We'll get to them anyway through this - // loop. - } - } - - // Clear the LiveOuts vectors and collapse any remaining DomainValues. - for (ReversePostOrderTraversal::rpo_iterator - MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { - auto FI = MBBInfos.find(*MBBI); - if (FI == MBBInfos.end() || !FI->second.OutRegs) - continue; - for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (FI->second.OutRegs[i].Value) - release(FI->second.OutRegs[i].Value); - delete[] FI->second.OutRegs; - } - MBBInfos.clear(); - UndefReads.clear(); - Avail.clear(); - Allocator.DestroyAll(); - - return false; -} diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp new file mode 100644 index 000000000000..776fc6bb410a --- /dev/null +++ b/lib/CodeGen/ExecutionDomainFix.cpp @@ -0,0 +1,473 @@ +//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExecutionDomainFix.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "execution-deps-fix" + +iterator_range::const_iterator> +ExecutionDomainFix::regIndices(unsigned Reg) const { + assert(Reg < AliasMap.size() && "Invalid register"); + const auto &Entry = AliasMap[Reg]; + return make_range(Entry.begin(), Entry.end()); +} + +DomainValue *ExecutionDomainFix::alloc(int domain) { + DomainValue *dv = Avail.empty() ? new (Allocator.Allocate()) DomainValue + : Avail.pop_back_val(); + if (domain >= 0) + dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); + return dv; +} + +void ExecutionDomainFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do + DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; +} + +void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + + if (LiveRegs[rx] == dv) + return; + if (LiveRegs[rx]) + release(LiveRegs[rx]); + LiveRegs[rx] = retain(dv); +} + +void ExecutionDomainFix::kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + if (!LiveRegs[rx]) + return; + + release(LiveRegs[rx]); + LiveRegs[rx] = nullptr; +} + +void ExecutionDomainFix::force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(!LiveRegs.empty() && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx]) { + if (dv->isCollapsed()) + dv->addDomain(domain); + else if (dv->hasDomain(domain)) + collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and + // force the new value into domain. This costs a domain crossing. + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } + } else { + // Set up basic collapsed DomainValue. + setLiveReg(rx, alloc(domain)); + } +} + +void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { + assert(dv->hasDomain(domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) + TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); + dv->setSingleDomain(domain); + + // If there are multiple users, give them new, unique DomainValues. + if (!LiveRegs.empty() && dv->Refs > 1) + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx] == dv) + setLiveReg(rx, alloc(domain)); +} + +bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { + assert(!A->isCollapsed() && "Cannot merge into collapsed"); + assert(!B->isCollapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + // Restrict to the domains that A and B have in common. + unsigned common = A->getCommonDomains(B->AvailableDomains); + if (!common) + return false; + A->AvailableDomains = common; + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + if (LiveRegs[rx] == B) + setLiveReg(rx, A); + } + return true; +} + +void ExecutionDomainFix::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + + // Set up LiveRegs to represent registers entering MBB. + // Set default domain values to 'no domain' (nullptr) + if (LiveRegs.empty()) + LiveRegs.assign(NumRegs, nullptr); + + // This is the entry block. + if (MBB->pred_empty()) { + DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock *pred : MBB->predecessors()) { + assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && + "Should have pre-allocated MBBInfos for all MBBs"); + LiveRegsDVInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; + // Incoming is null if this is a backedge from a BB + // we haven't processed yet + if (Incoming.empty()) + continue; + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + DomainValue *pdv = resolve(Incoming[rx]); + if (!pdv) + continue; + if (!LiveRegs[rx]) { + setLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx]->isCollapsed()) { + // We are already collapsed, but predecessor is not. Force it. + unsigned Domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->isCollapsed()) + merge(LiveRegs[rx], pdv); + else + force(rx, pdv->getFirstDomain()); + } + } + DEBUG(dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" + : ": all preds known\n")); +} + +void ExecutionDomainFix::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + assert(!LiveRegs.empty() && "Must enter basic block first."); + unsigned MBBNumber = TraversedMBB.MBB->getNumber(); + assert(MBBNumber < MBBOutRegsInfos.size() && + "Unexpected basic block number."); + // Save register clearances at end of MBB - used by enterBasicBlock(). + for (DomainValue *OldLiveReg : MBBOutRegsInfos[MBBNumber]) { + release(OldLiveReg); + } + MBBOutRegsInfos[MBBNumber] = LiveRegs; + LiveRegs.clear(); +} + +bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { + // Update instructions with explicit execution domains. + std::pair DomP = TII->getExecutionDomain(*MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + return !DomP.first; +} + +void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isUse()) + continue; + for (int rx : regIndices(MO.getReg())) { + // This instruction explicitly defines rx. + DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } + } +} + +void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); + i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + force(rx, domain); + } + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + kill(rx); + force(rx, domain); + } + } +} + +void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Bitmask of available domains for this instruction after taking collapsed + // operands into account. + unsigned available = mask; + + // Scan the explicit use operands for incoming domains. + SmallVector used; + if (!LiveRegs.empty()) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); + i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + DomainValue *dv = LiveRegs[rx]; + if (dv == nullptr) + continue; + // Bitmask of domains that dv and available have in common. + unsigned common = dv->getCommonDomains(available); + // Is it possible to use this collapsed register for free? + if (dv->isCollapsed()) { + // Restrict available domains to the ones in common with the operand. + // If there are no common domains, we must pay the cross-domain + // penalty for this operand. + if (common) + available = common; + } else if (common) + // Open DomainValue is compatible, save it for merging. + used.push_back(rx); + else + // Open DomainValue is not compatible with instruction. It is useless + // now. + kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(available)) { + unsigned domain = countTrailingZeros(available); + TII->setExecutionDomain(*mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match available, and build a list of + // incoming DomainValues that we want to merge. + SmallVector Regs; + for (int rx : used) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + DomainValue *&LR = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!LR->getCommonDomains(available)) { + kill(rx); + continue; + } + // Sorted insertion. + // Enables giving priority to the latest domains during merging. + auto I = std::upper_bound( + Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) { + return RDA->getReachingDef(mi, RC->getRegister(LHS)) < + RDA->getReachingDef(mi, RC->getRegister(RHS)); + }); + Regs.insert(I, rx); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = nullptr; + while (!Regs.empty()) { + if (!dv) { + dv = LiveRegs[Regs.pop_back_val()]; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); + continue; + } + + DomainValue *Latest = LiveRegs[Regs.pop_back_val()]; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; + + // If latest didn't merge, it is useless now. Kill all registers using it. + for (int i : used) { + assert(!LiveRegs.empty() && "no space allocated for live registers"); + if (LiveRegs[i] == Latest) + kill(i); + } + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineOperand &mo : mi->operands()) { + if (!mo.isReg()) + continue; + for (int rx : regIndices(mo.getReg())) { + if (!LiveRegs[rx] || (mo.isDef() && LiveRegs[rx] != dv)) { + kill(rx); + setLiveReg(rx, dv); + } + } + } +} + +void ExecutionDomainFix::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); + // If this block is not done, it makes little sense to make any decisions + // based on clearance information. We need to make a second pass anyway, + // and by then we'll have better information, so we can avoid doing the work + // to try and break dependencies now. + for (MachineInstr &MI : *TraversedMBB.MBB) { + if (!MI.isDebugValue()) { + bool Kill = false; + if (TraversedMBB.PrimaryPass) + Kill = visitInstr(&MI); + processDefs(&MI, Kill); + } + } + leaveBasicBlock(TraversedMBB); +} + +bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + LiveRegs.clear(); + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + + DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " + << TRI->getRegClassName(RC) << " **********\n"); + + // If no relevant registers are used in the function, we can skip it + // completely. + bool anyregs = false; + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { + anyregs = true; + break; + } + } + if (!anyregs) + return false; + + RDA = &getAnalysis(); + + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); + ++AI) + AliasMap[*AI].push_back(i); + } + + // Initialize the MBBOutRegsInfos + MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + + // Traverse the basic blocks. + LoopTraversal Traversal; + LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); + for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + processBasicBlock(TraversedMBB); + } + + for (LiveRegsDVInfo OutLiveRegs : MBBOutRegsInfos) { + for (DomainValue *OutLiveReg : OutLiveRegs) { + if (OutLiveReg) + release(OutLiveReg); + } + } + MBBOutRegsInfos.clear(); + Avail.clear(); + Allocator.DestroyAll(); + + return false; +} diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp index 8d69ea90a10a..d73e2c4670be 100644 --- a/lib/CodeGen/ExpandMemCmp.cpp +++ b/lib/CodeGen/ExpandMemCmp.cpp @@ -7,9 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This pass tries to partially inline the fast path of well-known library -// functions, such as using square-root instructions for cases where sqrt() -// does not need to set errno. +// This pass tries to expand memcmp() calls into optimally-sized loads and +// compares for the target. // //===----------------------------------------------------------------------===// @@ -22,8 +21,6 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -35,7 +32,7 @@ STATISTIC(NumMemCmpGreaterThanMax, "Number of memcmp calls with size greater than max size"); STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); -static cl::opt MemCmpNumLoadsPerBlock( +static cl::opt MemCmpEqZeroNumLoadsPerBlock( "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero.")); @@ -59,7 +56,7 @@ class MemCmpExpansion { const uint64_t Size; unsigned MaxLoadSize; uint64_t NumLoadsNonOneByte; - const uint64_t NumLoadsPerBlock; + const uint64_t NumLoadsPerBlockForZeroCmp; std::vector LoadCmpBlocks; BasicBlock *EndBlock; PHINode *PhiRes; @@ -105,7 +102,7 @@ class MemCmpExpansion { MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - unsigned NumLoadsPerBlock, const DataLayout &DL); + unsigned NumLoadsPerBlockForZeroCmp, const DataLayout &DL); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -125,12 +122,12 @@ MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, const unsigned MaxNumLoads, const bool IsUsedForZeroCmp, - const unsigned NumLoadsPerBlock, const DataLayout &TheDataLayout) + const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout) : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), - NumLoadsPerBlock(NumLoadsPerBlock), + NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp), IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { @@ -174,8 +171,8 @@ MemCmpExpansion::MemCmpExpansion( unsigned MemCmpExpansion::getNumBlocks() { if (IsUsedForZeroCmp) - return getNumLoads() / NumLoadsPerBlock + - (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads() / NumLoadsPerBlockForZeroCmp + + (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); return getNumLoads(); } @@ -252,7 +249,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, Value *Diff; const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); // For a single-block expansion, start inserting before the memcmp call. if (LoadCmpBlocks.empty()) @@ -522,8 +519,6 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { - assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); - Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -569,11 +564,8 @@ Value *MemCmpExpansion::getMemCmpOneBlock() { // This function expands the memcmp call into an inline expansion and returns // the memcmp result. Value *MemCmpExpansion::getMemCmpExpansion() { - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + // Create the basic block framework for a multi-block expansion. + if (getNumBlocks() != 1) { BasicBlock *StartBlock = CI->getParent(); EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); setupEndBlockPHINodes(); @@ -599,8 +591,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() { return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() : getMemCmpExpansionZeroCase(); - // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + if (getNumBlocks() == 1) + return getMemCmpOneBlock(); for (unsigned I = 0; I < getNumBlocks(); ++I) { emitLoadCompareBlock(I); @@ -712,8 +704,12 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const unsigned MaxNumLoads = TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize()); + unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences() + ? MemCmpEqZeroNumLoadsPerBlock + : TLI->getMemcmpEqZeroLoadsPerBlock(); + MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads, - IsUsedForZeroCmp, MemCmpNumLoadsPerBlock, *DL); + IsUsedForZeroCmp, NumLoadsPerBlock, *DL); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 651d67226dc0..6ef97d6dd5ec 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -104,8 +104,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { if (DstSubReg == InsReg) { // No need to insert an identity copy instruction. // Watch out for case like this: - // %RAX = SUBREG_TO_REG 0, %EAX, 3 - // We must leave %RAX live. + // %rax = SUBREG_TO_REG 0, killed %eax, 3 + // We must leave %rax live. if (DstReg != InsReg) { MI->setDesc(TII->get(TargetOpcode::KILL)); MI->RemoveOperand(3); // SubIdx diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp index dbe6b30c9642..4ddf9f92836c 100644 --- a/lib/CodeGen/FEntryInserter.cpp +++ b/lib/CodeGen/FEntryInserter.cpp @@ -36,7 +36,7 @@ struct FEntryInserter : public MachineFunctionPass { bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) { const std::string FEntryName = - MF.getFunction()->getFnAttribute("fentry-call").getValueAsString(); + MF.getFunction().getFnAttribute("fentry-call").getValueAsString(); if (FEntryName != "true") return false; diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp index 3a7b48eeb469..4361d8b248c8 100644 --- a/lib/CodeGen/GCRootLowering.cpp +++ b/lib/CodeGen/GCRootLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -329,10 +328,10 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Quick exit for functions that do not use GC. - if (!MF.getFunction()->hasGC()) + if (!MF.getFunction().hasGC()) return false; - FI = &getAnalysis().getFunctionInfo(*MF.getFunction()); + FI = &getAnalysis().getFunctionInfo(MF.getFunction()); MMI = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 50ea69a267ee..114c068749eb 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -108,7 +108,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector ArgLocs; diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 83de926a2390..7f1e18291c0d 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -124,8 +124,8 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) { bool Success = translate(*CV, VReg); if (!Success) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - MF->getFunction()->getSubprogram(), - &MF->getFunction()->getEntryBlock()); + MF->getFunction().getSubprogram(), + &MF->getFunction().getEntryBlock()); R << "unable to translate constant: " << ore::NV("Type", Val.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return VReg; @@ -238,6 +238,8 @@ bool IRTranslator::translateCompare(const User &U, bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { const ReturnInst &RI = cast(U); const Value *Ret = RI.getReturnValue(); + if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) + Ret = nullptr; // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. @@ -337,6 +339,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { : MachineMemOperand::MONone; Flags |= MachineMemOperand::MOLoad; + if (DL->getTypeStoreSize(LI.getType()) == 0) + return true; + unsigned Res = getOrCreateVReg(LI); unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); @@ -355,6 +360,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { : MachineMemOperand::MONone; Flags |= MachineMemOperand::MOStore; + if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) + return true; + unsigned Val = getOrCreateVReg(*SI.getValueOperand()); unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); @@ -508,10 +516,6 @@ bool IRTranslator::translateGetElementPtr(const User &U, Offset = 0; } - // N = N + Idx * ElementSize; - unsigned ElementSizeReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); - unsigned IdxReg = getOrCreateVReg(*Idx); if (MRI->getType(IdxReg) != OffsetTy) { unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); @@ -519,11 +523,20 @@ bool IRTranslator::translateGetElementPtr(const User &U, IdxReg = NewIdxReg; } - unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg); + // N = N + Idx * ElementSize; + // Avoid doing it for ElementSize of 1. + unsigned GepOffsetReg; + if (ElementSize != 1) { + unsigned ElementSizeReg = + getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); + + GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + } else + GepOffsetReg = IdxReg; unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); BaseReg = NewBaseReg; } } @@ -583,7 +596,7 @@ void IRTranslator::getStackGuard(unsigned DstReg, MIB.addDef(DstReg); auto &TLI = *MF->getSubtarget().getTargetLowering(); - Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent()); if (!Global) return; @@ -807,7 +820,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (CI.isInlineAsm()) return translateInlineAsm(CI, MIRBuilder); - if (!F || !F->isIntrinsic()) { + Intrinsic::ID ID = Intrinsic::not_intrinsic; + if (F && F->isIntrinsic()) { + ID = F->getIntrinsicID(); + if (TII && ID == Intrinsic::not_intrinsic) + ID = static_cast(TII->getIntrinsicID(F)); + } + + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); SmallVector Args; for (auto &Arg: CI.arg_operands()) @@ -819,10 +839,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { }); } - Intrinsic::ID ID = F->getIntrinsicID(); - if (TII && ID == Intrinsic::not_intrinsic) - ID = static_cast(TII->getIntrinsicID(F)); - assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); if (translateKnownIntrinsic(CI, ID, MIRBuilder)) @@ -843,14 +859,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI.getTgtMemIntrinsic(Info, CI, ID)) { - MachineMemOperand::Flags Flags = - Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - Flags |= - Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore; - uint64_t Size = Info.memVT.getSizeInBits() >> 3; + if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + uint64_t Size = Info.memVT.getStoreSize(); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Flags, Size, Info.align)); + Info.flags, Size, Info.align)); } return true; @@ -921,7 +933,7 @@ bool IRTranslator::translateLandingPad(const User &U, // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother. auto &TLI = *MF->getSubtarget().getTargetLowering(); - const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn(); + const Constant *PersonalityFn = MF->getFunction().getPersonalityFn(); if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && TLI.getExceptionSelectorRegister(PersonalityFn) == 0) return true; @@ -1232,7 +1244,7 @@ void IRTranslator::finalizeFunction() { bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; - const Function &F = *MF->getFunction(); + const Function &F = MF->getFunction(); if (F.empty()) return false; CLI = MF->getSubtarget().getCallLowering(); @@ -1245,6 +1257,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { assert(PendingPHIs.empty() && "stale PHIs"); + if (!DL->isLittleEndian()) { + // Currently we don't properly handle big endian code. + OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", + F.getSubprogram(), &F.getEntryBlock()); + R << "unable to translate in big endian mode"; + reportTranslationError(*MF, *TPC, *ORE, R); + } + // Release the per-function state when we return, whether we succeeded or not. auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); }); @@ -1269,12 +1289,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Lower the actual args into this basic block. SmallVector VRegArgs; - for (const Argument &Arg: F.args()) + for (const Argument &Arg: F.args()) { + if (DL->getTypeStoreSize(Arg.getType()) == 0) + continue; // Don't handle zero sized types. VRegArgs.push_back(getOrCreateVReg(Arg)); + } if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - MF->getFunction()->getSubprogram(), - &MF->getFunction()->getEntryBlock()); + F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return false; diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index bd5fd5afcbcd..422cc2219aa8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -189,7 +189,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { if (MF.size() != NumBlocks) { MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure", - MF.getFunction()->getSubprogram(), + MF.getFunction().getSubprogram(), /*MBB=*/nullptr); R << "inserting blocks is not supported yet"; reportGISelFailure(MF, TPC, MORE, R); diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 88669bd68c00..5e77fcbb0ed9 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -46,50 +46,6 @@ bool InstructionSelector::constrainOperandRegToRegClass( constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC); } -bool InstructionSelector::constrainSelectedInstRegOperands( - MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - MachineBasicBlock &MBB = *I.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { - MachineOperand &MO = I.getOperand(OpI); - - // There's nothing to be done on non-register operands. - if (!MO.isReg()) - continue; - - DEBUG(dbgs() << "Converting operand: " << MO << '\n'); - assert(MO.isReg() && "Unsupported non-reg operand"); - - unsigned Reg = MO.getReg(); - // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) - continue; - - // Register operands with a value of 0 (e.g. predicate operands) don't need - // to be constrained. - if (Reg == 0) - continue; - - // If the operand is a vreg, we should constrain its regclass, and only - // insert COPYs if that's impossible. - // constrainOperandRegClass does that for us. - MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), - Reg, OpI)); - - // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been - // done. - if (MO.isUse()) { - int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); - if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) - I.tieOperands(DefIdx, OpI); - } - } - return true; -} - bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index f7bbf610fc98..f09b0d9f11e7 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" @@ -176,7 +175,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // outerloop for that. if (MF.size() != NumBlocks) { MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure", - MF.getFunction()->getSubprogram(), + MF.getFunction().getSubprogram(), /*MBB=*/nullptr); R << "inserting blocks is not supported yet"; reportGISelFailure(MF, TPC, MORE, R); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index bb2e61582314..f1cb5c0ad027 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -22,7 +22,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include #define DEBUG_TYPE "legalizer" @@ -104,6 +103,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; case TargetOpcode::G_FPOW: return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32; + case TargetOpcode::G_FMA: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; } llvm_unreachable("Unknown libcall function"); } @@ -124,20 +126,46 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, return LegalizerHelper::Legalized; } +// Useful for libcalls where all operands have the same type. static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + + SmallVector Args; + for (unsigned i = 1; i < MI.getNumOperands(); i++) + Args.push_back({MI.getOperand(i).getReg(), OpType}); return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + Args); +} + +static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, + Type *FromType) { + auto ToMVT = MVT::getVT(ToType); + auto FromMVT = MVT::getVT(FromType); + + switch (Opcode) { + case TargetOpcode::G_FPEXT: + return RTLIB::getFPEXT(FromMVT, ToMVT); + case TargetOpcode::G_FPTRUNC: + return RTLIB::getFPROUND(FromMVT, ToMVT); + } + llvm_unreachable("Unsupported libcall function"); +} + +static LegalizerHelper::LegalizeResult +conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, + Type *FromType) { + RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType}, + {{MI.getOperand(1).getReg(), FromType}}); } LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); unsigned Size = LLTy.getSizeInBits(); - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); MIRBuilder.setInstr(MI); @@ -158,6 +186,7 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: + case TargetOpcode::G_FMA: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); @@ -166,6 +195,30 @@ LegalizerHelper::libcall(MachineInstr &MI) { return Status; break; } + case TargetOpcode::G_FPEXT: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 64 || FromSize != 32) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } + case TargetOpcode::G_FPTRUNC: { + // FIXME: Support other floating point types (half, fp128 etc) + unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + if (ToSize != 32 || FromSize != 64) + return UnableToLegalize; + LegalizeResult Status = conversionLibcall( + MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx)); + if (Status != Legalized) + return Status; + break; + } } MI.eraseFromParent(); @@ -411,7 +464,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; int NumParts = SizeOp0 / NarrowSize; const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); SmallVector DstRegs; for (int i = 0; i < NumParts; ++i) { @@ -814,7 +867,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Zero = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildConstant(Zero, 0); - MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); + + // For *signed* multiply, overflow is detected by checking: + // (hi != (lo >> bitwidth-1)) + if (Opcode == TargetOpcode::G_SMULH) { + unsigned Shifted = MRI.createGenericVirtualRegister(Ty); + unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR) + .addDef(Shifted) + .addUse(Res) + .addUse(ShiftAmt); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted); + } else { + MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero); + } MI.eraseFromParent(); return Legalized; } @@ -825,7 +892,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; unsigned Res = MI.getOperand(0).getReg(); Type *ZeroTy; - LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (Ty.getSizeInBits()) { case 16: ZeroTy = Type::getHalfTy(Ctx); @@ -836,6 +903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case 64: ZeroTy = Type::getDoubleTy(Ctx); break; + case 128: + ZeroTy = Type::getFP128Ty(Ctx); + break; default: llvm_unreachable("unexpected floating-point type"); } @@ -868,6 +938,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { + unsigned OldValRes = MI.getOperand(0).getReg(); + unsigned SuccessRes = MI.getOperand(1).getReg(); + unsigned Addr = MI.getOperand(2).getReg(); + unsigned CmpVal = MI.getOperand(3).getReg(); + unsigned NewVal = MI.getOperand(4).getReg(); + MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, + **MI.memoperands_begin()); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); + MI.eraseFromParent(); + return Legalized; + } } } diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 1a23b26e7ce1..9c27c59a0654 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -167,19 +167,25 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const { assert(TablesInitialized && "backend forgot to call computeTables"); // These *have* to be implemented for now, they're the fundamental basis of // how everything else is transformed. - - // FIXME: the long-term plan calls for expansion in terms of load/store (if - // they're not legal). - if (Aspect.Opcode == TargetOpcode::G_MERGE_VALUES || - Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES) - return std::make_pair(Legal, Aspect.Type); - if (Aspect.Type.isScalar() || Aspect.Type.isPointer()) return findScalarLegalAction(Aspect); assert(Aspect.Type.isVector()); return findVectorLegalAction(Aspect); } +/// Helper function to get LLT for the given type index. +static LLT getTypeFromTypeIdx(const MachineInstr &MI, + const MachineRegisterInfo &MRI, unsigned OpIdx, + unsigned TypeIdx) { + assert(TypeIdx < MI.getNumOperands() && "Unexpected TypeIdx"); + // G_UNMERGE_VALUES has variable number of operands, but there is only + // one source type and one destination type as all destinations must be the + // same type. So, get the last operand if TypeIdx == 1. + if (MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && TypeIdx == 1) + return MRI.getType(MI.getOperand(MI.getNumOperands() - 1).getReg()); + return MRI.getType(MI.getOperand(OpIdx).getReg()); +} + std::tuple LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { @@ -198,7 +204,7 @@ LegalizerInfo::getAction(const MachineInstr &MI, SeenTypes.set(TypeIdx); - LLT Ty = MRI.getType(MI.getOperand(i).getReg()); + LLT Ty = getTypeFromTypeIdx(MI, MRI, i, TypeIdx); auto Action = getAction({MI.getOpcode(), TypeIdx, Ty}); if (Action.first != Legal) return std::make_tuple(Action.first, TypeIdx, Action.second); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 079cd11574bd..475bb82e5b9c 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -263,7 +263,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, const ConstantInt *NewVal = &Val; if (Ty.getSizeInBits() != Val.getBitWidth()) - NewVal = ConstantInt::get(MF->getFunction()->getContext(), + NewVal = ConstantInt::get(MF->getFunction().getContext(), Val.getValue().sextOrTrunc(Ty.getSizeInBits())); return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal); @@ -271,7 +271,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res, int64_t Val) { - auto IntN = IntegerType::get(MF->getFunction()->getContext(), + auto IntN = IntegerType::get(MF->getFunction().getContext(), MRI->getType(Res).getSizeInBits()); ConstantInt *CI = ConstantInt::get(IntN, Val, true); return buildConstant(Res, *CI); @@ -658,6 +658,31 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, .addUse(Idx); } +MachineInstrBuilder +MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, + unsigned CmpVal, unsigned NewVal, + MachineMemOperand &MMO) { +#ifndef NDEBUG + LLT OldValResTy = MRI->getType(OldValRes); + LLT AddrTy = MRI->getType(Addr); + LLT CmpValTy = MRI->getType(CmpVal); + LLT NewValTy = MRI->getType(NewVal); + assert(OldValResTy.isScalar() && "invalid operand type"); + assert(AddrTy.isPointer() && "invalid operand type"); + assert(CmpValTy.isValid() && "invalid operand type"); + assert(NewValTy.isValid() && "invalid operand type"); + assert(OldValResTy == CmpValTy && "type mismatch"); + assert(OldValResTy == NewValTy && "type mismatch"); +#endif + + return buildInstr(TargetOpcode::G_ATOMIC_CMPXCHG) + .addDef(OldValRes) + .addUse(Addr) + .addUse(CmpVal) + .addUse(NewVal) + .addMemOperand(&MMO); +} + void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend) { #ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 36ce1c220cb4..006c9ea23034 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -601,9 +601,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { return false; DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); Mode SaveOptMode = OptMode; - if (F->hasFnAttribute(Attribute::OptimizeNone)) + if (F.hasFnAttribute(Attribute::OptimizeNone)) OptMode = Mode::Fast; init(MF); diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 270394934139..b3d9209ae6eb 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index ef990b49aceb..9f8440f33164 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -56,6 +56,51 @@ unsigned llvm::constrainOperandRegClass( return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass); } +bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + MachineBasicBlock &MBB = *I.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) { + MachineOperand &MO = I.getOperand(OpI); + + // There's nothing to be done on non-register operands. + if (!MO.isReg()) + continue; + + DEBUG(dbgs() << "Converting operand: " << MO << '\n'); + assert(MO.isReg() && "Unsupported non-reg operand"); + + unsigned Reg = MO.getReg(); + // Physical registers don't need to be constrained. + if (TRI.isPhysicalRegister(Reg)) + continue; + + // Register operands with a value of 0 (e.g. predicate operands) don't need + // to be constrained. + if (Reg == 0) + continue; + + // If the operand is a vreg, we should constrain its regclass, and only + // insert COPYs if that's impossible. + // constrainOperandRegClass does that for us. + MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), + Reg, OpI)); + + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. + if (MO.isUse()) { + int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) + I.tieOperands(DefIdx, OpI); + } + } + return true; +} + bool llvm::isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI) { // If we can move an instruction, we can remove it. Otherwise, it has diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 567461c19452..a22ce0dab9c2 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -337,7 +337,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) + if (skipFunction(MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) return false; const TargetSubtargetInfo &ST = MF.getSubtarget(); @@ -406,12 +406,12 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? - " false" : "") - << "): BB#" << BBI.BB->getNumber() << " (" - << ((Kind == ICSimpleFalse) - ? BBI.FalseBB->getNumber() - : BBI.TrueBB->getNumber()) << ") "); + DEBUG(dbgs() << "Ifcvt (Simple" + << (Kind == ICSimpleFalse ? " false" : "") + << "): " << printMBBReference(*BBI.BB) << " (" + << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) + << ") "); RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { @@ -435,9 +435,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " false"); if (isRev) DEBUG(dbgs() << " rev"); - DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { @@ -453,9 +453,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: if (DisableDiamond) break; - DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "Ifcvt (Diamond): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); @@ -464,10 +464,9 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { break; case ICForkedDiamond: if (DisableForkedDiamond) break; - DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#" - << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "); + DEBUG(dbgs() << "Ifcvt (Forked Diamond): " << printMBBReference(*BBI.BB) + << " (T:" << BBI.TrueBB->getNumber() + << ",F:" << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2, Token->TClobbersPred, Token->FClobbersPred); diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index d2dd7f13ce27..a10bb0199264 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -63,13 +63,13 @@ using namespace llvm; static cl::opt PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), - cl::init(4096)); + cl::init(4096), cl::Hidden); static cl::opt MaxInstsToConsider( "imp-null-max-insts-to-consider", cl::desc("The max number of instructions to consider hoisting loads over " "(the algorithm is quadratic over this number)"), - cl::init(8)); + cl::Hidden, cl::init(8)); #define DEBUG_TYPE "implicit-null-checks" @@ -198,7 +198,7 @@ class ImplicitNullChecks : public MachineFunctionPass { SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, ArrayRef PrevInsts); - /// Return true if \p FaultingMI can be hoisted from after the the + /// Return true if \p FaultingMI can be hoisted from after the /// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a /// non-null value if we also need to (and legally can) hoist a depedency. bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg, @@ -421,7 +421,7 @@ bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI, // test %rcx, %rcx // je _null_block // _non_null_block: - // %rdx = INST + // %rdx = INST // ... // // This restriction does not apply to the faulting load inst because in @@ -498,7 +498,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // Starting with a code fragment like: // - // test %RAX, %RAX + // test %rax, %rax // jne LblNotNull // // LblNull: @@ -508,13 +508,13 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // Inst0 // Inst1 // ... - // Def = Load (%RAX + ) + // Def = Load (%rax + ) // ... // // // we want to end up with // - // Def = FaultingLoad (%RAX + ), LblNull + // Def = FaultingLoad (%rax + ), LblNull // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: @@ -528,11 +528,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // // To see why this is legal, consider the two possibilities: // - // 1. %RAX is null: since we constrain to be less than PageSize, the + // 1. %rax is null: since we constrain to be less than PageSize, the // load instruction dereferences the null page, causing a segmentation // fault. // - // 2. %RAX is not null: in this case we know that the load cannot fault, as + // 2. %rax is not null: in this case we know that the load cannot fault, as // otherwise the load would've faulted in the original program too and the // original program would've been undefined. // diff --git a/lib/CodeGen/IndirectBrExpandPass.cpp b/lib/CodeGen/IndirectBrExpandPass.cpp new file mode 100644 index 000000000000..7b05ebf820fd --- /dev/null +++ b/lib/CodeGen/IndirectBrExpandPass.cpp @@ -0,0 +1,221 @@ +//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Implements an expansion pass to turn `indirectbr` instructions in the IR +/// into `switch` instructions. This works by enumerating the basic blocks in +/// a dense range of integers, replacing each `blockaddr` constant with the +/// corresponding integer constant, and then building a switch that maps from +/// the integers to the actual blocks. All of the indirectbr instructions in the +/// function are redirected to this common switch. +/// +/// While this is generically useful if a target is unable to codegen +/// `indirectbr` natively, it is primarily useful when there is some desire to +/// get the builtin non-jump-table lowering of a switch even when the input +/// source contained an explicit indirect branch construct. +/// +/// Note that it doesn't make any sense to enable this pass unless a target also +/// disables jump-table lowering of switches. Doing that is likely to pessimize +/// the code. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "indirectbr-expand" + +namespace { + +class IndirectBrExpandPass : public FunctionPass { + const TargetLowering *TLI = nullptr; + +public: + static char ID; // Pass identification, replacement for typeid + + IndirectBrExpandPass() : FunctionPass(ID) { + initializeIndirectBrExpandPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +} // end anonymous namespace + +char IndirectBrExpandPass::ID = 0; + +INITIALIZE_PASS(IndirectBrExpandPass, DEBUG_TYPE, + "Expand indirectbr instructions", false, false) + +FunctionPass *llvm::createIndirectBrExpandPass() { + return new IndirectBrExpandPass(); +} + +bool IndirectBrExpandPass::runOnFunction(Function &F) { + auto &DL = F.getParent()->getDataLayout(); + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + auto &STI = *TM.getSubtargetImpl(F); + if (!STI.enableIndirectBrExpand()) + return false; + TLI = STI.getTargetLowering(); + + SmallVector IndirectBrs; + + // Set of all potential successors for indirectbr instructions. + SmallPtrSet IndirectBrSuccs; + + // Build a list of indirectbrs that we want to rewrite. + for (BasicBlock &BB : F) + if (auto *IBr = dyn_cast(BB.getTerminator())) { + // Handle the degenerate case of no successors by replacing the indirectbr + // with unreachable as there is no successor available. + if (IBr->getNumSuccessors() == 0) { + (void)new UnreachableInst(F.getContext(), IBr); + IBr->eraseFromParent(); + continue; + } + + IndirectBrs.push_back(IBr); + for (BasicBlock *SuccBB : IBr->successors()) + IndirectBrSuccs.insert(SuccBB); + } + + if (IndirectBrs.empty()) + return false; + + // If we need to replace any indirectbrs we need to establish integer + // constants that will correspond to each of the basic blocks in the function + // whose address escapes. We do that here and rewrite all the blockaddress + // constants to just be those integer constants cast to a pointer type. + SmallVector BBs; + + for (BasicBlock &BB : F) { + // Skip blocks that aren't successors to an indirectbr we're going to + // rewrite. + if (!IndirectBrSuccs.count(&BB)) + continue; + + auto IsBlockAddressUse = [&](const Use &U) { + return isa(U.getUser()); + }; + auto BlockAddressUseIt = llvm::find_if(BB.uses(), IsBlockAddressUse); + if (BlockAddressUseIt == BB.use_end()) + continue; + + assert(std::find_if(std::next(BlockAddressUseIt), BB.use_end(), + IsBlockAddressUse) == BB.use_end() && + "There should only ever be a single blockaddress use because it is " + "a constant and should be uniqued."); + + auto *BA = cast(BlockAddressUseIt->getUser()); + + // Skip if the constant was formed but ended up not being used (due to DCE + // or whatever). + if (!BA->isConstantUsed()) + continue; + + // Compute the index we want to use for this basic block. We can't use zero + // because null can be compared with block addresses. + int BBIndex = BBs.size() + 1; + BBs.push_back(&BB); + + auto *ITy = cast(DL.getIntPtrType(BA->getType())); + ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex); + + // Now rewrite the blockaddress to an integer constant based on the index. + // FIXME: We could potentially preserve the uses as arguments to inline asm. + // This would allow some uses such as diagnostic information in crashes to + // have higher quality even when this transform is enabled, but would break + // users that round-trip blockaddresses through inline assembly and then + // back into an indirectbr. + BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType())); + } + + if (BBs.empty()) { + // There are no blocks whose address is taken, so any indirectbr instruction + // cannot get a valid input and we can replace all of them with unreachable. + for (auto *IBr : IndirectBrs) { + (void)new UnreachableInst(F.getContext(), IBr); + IBr->eraseFromParent(); + } + return true; + } + + BasicBlock *SwitchBB; + Value *SwitchValue; + + // Compute a common integer type across all the indirectbr instructions. + IntegerType *CommonITy = nullptr; + for (auto *IBr : IndirectBrs) { + auto *ITy = + cast(DL.getIntPtrType(IBr->getAddress()->getType())); + if (!CommonITy || ITy->getBitWidth() > CommonITy->getBitWidth()) + CommonITy = ITy; + } + + auto GetSwitchValue = [DL, CommonITy](IndirectBrInst *IBr) { + return CastInst::CreatePointerCast( + IBr->getAddress(), CommonITy, + Twine(IBr->getAddress()->getName()) + ".switch_cast", IBr); + }; + + if (IndirectBrs.size() == 1) { + // If we only have one indirectbr, we can just directly replace it within + // its block. + SwitchBB = IndirectBrs[0]->getParent(); + SwitchValue = GetSwitchValue(IndirectBrs[0]); + IndirectBrs[0]->eraseFromParent(); + } else { + // Otherwise we need to create a new block to hold the switch across BBs, + // jump to that block instead of each indirectbr, and phi together the + // values for the switch. + SwitchBB = BasicBlock::Create(F.getContext(), "switch_bb", &F); + auto *SwitchPN = PHINode::Create(CommonITy, IndirectBrs.size(), + "switch_value_phi", SwitchBB); + SwitchValue = SwitchPN; + + // Now replace the indirectbr instructions with direct branches to the + // switch block and fill out the PHI operands. + for (auto *IBr : IndirectBrs) { + SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent()); + BranchInst::Create(SwitchBB, IBr); + IBr->eraseFromParent(); + } + } + + // Now build the switch in the block. The block will have no terminator + // already. + auto *SI = SwitchInst::Create(SwitchValue, BBs[0], BBs.size(), SwitchBB); + + // Add a case for each block. + for (int i : llvm::seq(1, BBs.size())) + SI->addCase(ConstantInt::get(CommonITy, i + 1), BBs[i]); + + return true; +} diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index aff6189283e6..86ce4b7a9464 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -26,9 +26,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -360,7 +360,7 @@ bool InlineSpiller::isSibling(unsigned Reg) { /// /// x = def /// spill x -/// y = use x +/// y = use killed x /// /// This hoist only helps when the copy kills its source. /// diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 23090cafb421..72227cc7bba9 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -14,8 +14,8 @@ #include "InterferenceCache.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index c6cc909e25d3..12777d5ed110 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -57,10 +57,10 @@ static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, } } -/// ReplaceCallWith - This function is used when we want to lower an intrinsic -/// call to a call of an external function. This handles hard cases such as -/// when there was already a prototype for the external function, and if that -/// prototype doesn't match the arguments we expect to pass in. +/// This function is used when we want to lower an intrinsic call to a call of +/// an external function. This handles hard cases such as when there was already +/// a prototype for the external function, but that prototype doesn't match the +/// arguments we expect to pass in. template static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, ArgIt ArgBegin, ArgIt ArgEnd, @@ -161,12 +161,11 @@ void IntrinsicLowering::AddPrototypes(Module &M) { } } -/// LowerBSWAP - Emit the code to lower bswap of V before the specified -/// instruction IP. +/// Emit the code to lower bswap of V before the specified instruction IP. static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { - assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); + assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!"); - unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned BitSize = V->getType()->getScalarSizeInBits(); IRBuilder<> Builder(IP); @@ -190,10 +189,10 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), "bswap.1"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), + ConstantInt::get(V->getType(), 0xFF0000), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), + ConstantInt::get(V->getType(), 0xFF00), "bswap.and2"); Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); @@ -221,27 +220,27 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { ConstantInt::get(V->getType(), 56), "bswap.1"); Tmp7 = Builder.CreateAnd(Tmp7, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF000000000000ULL), "bswap.and7"); Tmp6 = Builder.CreateAnd(Tmp6, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF0000000000ULL), "bswap.and6"); Tmp5 = Builder.CreateAnd(Tmp5, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF00000000ULL), "bswap.and5"); Tmp4 = Builder.CreateAnd(Tmp4, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF000000ULL), "bswap.and4"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF0000ULL), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::getInt64Ty(Context), + ConstantInt::get(V->getType(), 0xFF00ULL), "bswap.and2"); Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); @@ -257,8 +256,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { return V; } -/// LowerCTPOP - Emit the code to lower ctpop of V before the specified -/// instruction IP. +/// Emit the code to lower ctpop of V before the specified instruction IP. static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); @@ -297,8 +295,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { return Count; } -/// LowerCTLZ - Emit the code to lower ctlz of V before the specified -/// instruction IP. +/// Emit the code to lower ctlz of V before the specified instruction IP. static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { IRBuilder<> Builder(IP); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index d3ce115b87dd..4c6e21ab315a 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -18,9 +18,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" @@ -34,7 +32,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Scalar.h" using namespace llvm; void LLVMTargetMachine::initAsmInfo() { @@ -84,10 +81,9 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, this->OptLevel = OL; } -TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(BasicTTIImpl(this, F)); - }); +TargetTransformInfo +LLVMTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(BasicTTIImpl(this, F)); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -140,8 +136,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, - Options.MCOptions); + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); auto FOut = llvm::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, @@ -155,8 +150,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, - Options.MCOptions); + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) return true; @@ -229,17 +223,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, // Create the code emitter for the target if it exists. If not, .o file // emission fails. + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCRegisterInfo &MRI = *getMCRegisterInfo(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); MCAsmBackend *MAB = - getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, - Options.MCOptions); + getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) return true; const Triple &T = getTargetTriple(); - const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr AsmStreamer(getTarget().createMCObjectStreamer( T, *Ctx, std::unique_ptr(MAB), Out, std::unique_ptr(MCE), STI, Options.MCOptions.MCRelaxAll, diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 47ab4ef65c72..8c54751ee833 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -49,7 +49,7 @@ void LexicalScopes::reset() { void LexicalScopes::initialize(const MachineFunction &Fn) { reset(); // Don't attempt any lexical scope creation for a NoDebug compile unit. - if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() == + if (Fn.getFunction().getSubprogram()->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; MF = &Fn; @@ -173,7 +173,7 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { false)).first; if (!Parent) { - assert(cast(Scope)->describes(MF->getFunction())); + assert(cast(Scope)->describes(&MF->getFunction())); assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; } diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index 3d4e35e5bdc8..d18703803d31 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetFrameLowering.h" @@ -427,16 +426,39 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI, FrameInfo.isSpillSlotObjectIndex(FI))) return false; - // In a spill instruction generated by the InlineSpiller the spilled register - // has its kill flag set. Return false if we don't find such a register. - Reg = 0; + auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) { + if (!MO.isReg() || !MO.isUse()) { + Reg = 0; + return false; + } + Reg = MO.getReg(); + return MO.isKill(); + }; + for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.isUse() && MO.isKill()) { - Reg = MO.getReg(); - break; + // In a spill instruction generated by the InlineSpiller the spilled + // register has its kill flag set. + if (isKilledReg(MO, Reg)) + return true; + if (Reg != 0) { + // Check whether next instruction kills the spilled register. + // FIXME: Current solution does not cover search for killed register in + // bundles and instructions further down the chain. + auto NextI = std::next(MI.getIterator()); + // Skip next instruction that points to basic block end iterator. + if (MI.getParent()->end() == NextI) + continue; + unsigned RegNext; + for (const MachineOperand &MONext : NextI->operands()) { + // Return true if we came across the register from the + // previous spill instruction that is killed in NextI. + if (isKilledReg(MONext, RegNext) && RegNext == Reg) + return true; + } } } - return Reg != 0; + // Return false if we didn't find spilled register. + return false; } /// A spilled register may indicate that we have to end the current range of @@ -498,7 +520,7 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, const VarLocMap &VarLocIDs) { bool Changed = false; const MachineBasicBlock *CurMBB = MI.getParent(); - if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) + if (!(MI.isTerminator() || (&MI == &CurMBB->back()))) return false; if (OpenRanges.empty()) @@ -704,12 +726,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { - if (!MF.getFunction()->getSubprogram()) + if (!MF.getFunction().getSubprogram()) // LiveDebugValues will already have removed all DBG_VALUEs. return false; // Skip functions from NoDebug compilation units. - if (MF.getFunction()->getSubprogram()->getUnit()->getEmissionKind() == + if (MF.getFunction().getSubprogram()->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return false; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 97bb7c712f6a..75e3d35169cf 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -30,7 +30,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -242,8 +242,11 @@ class UserValue { // We are storing a MachineOperand outside a MachineInstr. locations.back().clearParent(); // Don't store def operands. - if (locations.back().isReg()) + if (locations.back().isReg()) { + if (locations.back().isDef()) + locations.back().setIsDead(false); locations.back().setIsUse(); + } return locations.size() - 1; } @@ -833,7 +836,7 @@ static void removeDebugValues(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; - if (!mf.getFunction()->getSubprogram()) { + if (!mf.getFunction().getSubprogram()) { removeDebugValues(mf); return false; } @@ -1174,7 +1177,7 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. @@ -1184,7 +1187,7 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (++MBB == MFEnd) break; MBBEnd = LIS.getMBBEndIdx(&*MBB); - DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + DEBUG(dbgs() << ' ' << printMBBReference(*MBB) << '-' << MBBEnd); insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index b306932832c9..302c75133e35 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -26,7 +26,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervals.cpp similarity index 98% rename from lib/CodeGen/LiveIntervalAnalysis.cpp rename to lib/CodeGen/LiveIntervals.cpp index b26628b3b5fd..79fdba7e062a 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervals.cpp @@ -1,4 +1,4 @@ -//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===// +//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,7 +14,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -323,7 +323,7 @@ void LiveIntervals::computeLiveInRegUnits() { // Create phi-defs at Begin for all live-in registers. SlotIndex Begin = Indexes->getMBBStartIdx(&MBB); - DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber()); + DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB)); for (const auto &LI : MBB.liveins()) { for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; @@ -698,11 +698,11 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // - // %EAX = COPY %vreg5 - // FOO %vreg5 <--- MI, cancel kill because %EAX is live. - // BAR %EAX + // %eax = COPY %5 + // FOO %5 <--- MI, cancel kill because %eax is live. + // BAR killed %eax // - // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + // There should be no kill flag on FOO when %5 is rewritten as %eax. for (auto &RUP : RU) { const LiveRange &RURange = *RUP.first; LiveRange::const_iterator &I = RUP.second; @@ -719,13 +719,13 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // When reading a partial undefined value we must not add a kill flag. // The regalloc might have used the undef lane for something else. // Example: - // %vreg1 = ... ; R32: %vreg1 - // %vreg2:high16 = ... ; R64: %vreg2 - // = read %vreg2 ; R64: %vreg2 - // = read %vreg1 ; R32: %vreg1 - // The flag is correct for %vreg2, but the register allocator may - // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 - // are actually never written by %vreg2. After assignment the + // %1 = ... ; R32: %1 + // %2:high16 = ... ; R64: %2 + // = read killed %2 ; R64: %2 + // = read %1 ; R32: %1 + // The flag is correct for %2, but the register allocator may + // assign R0L to %1, and R0 to %2 because the low 32bits of R0 + // are actually never written by %2. After assignment the // flag at the read instruction is invalid. LaneBitmask DefinedLanesMask; if (!SRs.empty()) { diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 0074a9fd907e..66c23b7b69ce 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -164,7 +164,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation - // by LiveIntervalAnalysis::addKillFlags(). + // by LiveIntervals::addKillFlags(). if (MO.isUse()) MO.setIsKill(false); // MO::readsReg returns "true" for subregister defs. This is for keeping @@ -377,7 +377,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, MBB->getParent()->verify(); const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); errs() << "The register " << printReg(PhysReg, TRI) - << " needs to be live in to BB#" << MBB->getNumber() + << " needs to be live in to " << printMBBReference(*MBB) << ", but is missing from the live-in list.\n"; report_fatal_error("Invalid global physical register"); } diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 31be5e233443..22f6b3260f41 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -31,21 +31,24 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg, + bool createSubRanges) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - if (VRM) { + if (VRM) VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); - } + LiveInterval &LI = LIS.createEmptyInterval(VReg); if (Parent && !Parent->isSpillable()) LI.markNotSpillable(); - // Create empty subranges if the OldReg's interval has them. Do not create - // the main range here---it will be constructed later after the subranges - // have been finalized. - LiveInterval &OldLI = LIS.getInterval(OldReg); - VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); - for (LiveInterval::SubRange &S : OldLI.subranges()) - LI.createSubRange(Alloc, S.LaneMask); + if (createSubRanges) { + // Create empty subranges if the OldReg's interval has them. Do not create + // the main range here---it will be constructed later after the subranges + // have been finalized. + LiveInterval &OldLI = LIS.getInterval(OldReg); + VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &S : OldLI.subranges()) + LI.createSubRange(Alloc, S.LaneMask); + } return LI; } @@ -357,12 +360,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, // LiveRangeEdit::DeadRemats and will be deleted after all the // allocations of the func are done. if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { - LiveInterval &NewLI = createEmptyIntervalFrom(Dest); - NewLI.removeEmptySubRanges(); + LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); pop_back(); - markDeadRemat(MI); + DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); MI->substituteRegister(Dest, NewLI.reg, 0, TRI); MI->getOperand(0).setIsDead(true); diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp index b237c677fd38..02e1f3b01ade 100644 --- a/lib/CodeGen/LiveRangeShrink.cpp +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -106,7 +106,7 @@ static void BuildInstOrderMap(MachineBasicBlock::iterator Start, } bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 92e7cf8a9c8c..bd435968296d 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -15,8 +15,8 @@ #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStacks.cpp similarity index 94% rename from lib/CodeGen/LiveStackAnalysis.cpp rename to lib/CodeGen/LiveStacks.cpp index 5f9ecbc33be2..80ecfdb7a507 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStacks.cpp @@ -1,4 +1,4 @@ -//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +//===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStacks.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index f9c5652e8a17..032dd66ae1d2 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -235,7 +234,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) { // Otherwise, the last sub-register def implicitly defines this register. // e.g. // AH = - // AL = ... , + // AL = ... implicit-def EAX, implicit killed AH // = AH // ... // = EAX @@ -321,17 +320,17 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AH = // // = AX - // = AL, AX + // = AL, implicit killed AX // AX = // // Or whole register is defined, but not used at all. - // AX = + // dead AX = // ... // AX = // // Or whole register is defined, but only partly used. - // AX = AL - // = AL + // dead AX = implicit-def AL + // = killed AL // AX = MachineInstr *LastPartDef = nullptr; unsigned LastPartDefDist = 0; @@ -364,7 +363,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { if (!PhysRegUse[Reg]) { // Partial uses. Mark register def dead and add implicit def of // sub-registers which are used. - // EAX = op AL + // dead EAX = op implicit-def AL // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { diff --git a/lib/CodeGen/LoopTraversal.cpp b/lib/CodeGen/LoopTraversal.cpp new file mode 100644 index 000000000000..a02d10e09d7d --- /dev/null +++ b/lib/CodeGen/LoopTraversal.cpp @@ -0,0 +1,77 @@ +//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LoopTraversal.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) { + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); + return MBBInfos[MBBNumber].PrimaryCompleted && + MBBInfos[MBBNumber].IncomingCompleted == + MBBInfos[MBBNumber].PrimaryIncoming && + MBBInfos[MBBNumber].IncomingProcessed == MBB->pred_size(); +} + +LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) { + // Initialize the MMBInfos + MBBInfos.assign(MF.getNumBlockIDs(), MBBInfo()); + + MachineBasicBlock *Entry = &*MF.begin(); + ReversePostOrderTraversal RPOT(Entry); + SmallVector Workqueue; + SmallVector MBBTraversalOrder; + for (MachineBasicBlock *MBB : RPOT) { + // N.B: IncomingProcessed and IncomingCompleted were already updated while + // processing this block's predecessors. + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBInfos.size() && "Unexpected basic block number."); + MBBInfos[MBBNumber].PrimaryCompleted = true; + MBBInfos[MBBNumber].PrimaryIncoming = MBBInfos[MBBNumber].IncomingProcessed; + bool Primary = true; + Workqueue.push_back(MBB); + while (!Workqueue.empty()) { + MachineBasicBlock *ActiveMBB = &*Workqueue.back(); + Workqueue.pop_back(); + bool Done = isBlockDone(ActiveMBB); + MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done)); + for (MachineBasicBlock *Succ : ActiveMBB->successors()) { + unsigned SuccNumber = Succ->getNumber(); + assert(SuccNumber < MBBInfos.size() && + "Unexpected basic block number."); + if (!isBlockDone(Succ)) { + if (Primary) + MBBInfos[SuccNumber].IncomingProcessed++; + if (Done) + MBBInfos[SuccNumber].IncomingCompleted++; + if (isBlockDone(Succ)) + Workqueue.push_back(Succ); + } + } + Primary = false; + } + } + + // We need to go through again and finalize any blocks that are not done yet. + // This is possible if blocks have dead predecessors, so we didn't visit them + // above. + for (MachineBasicBlock *MBB : RPOT) { + if (!isBlockDone(MBB)) + MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true)); + // Don't update successors here. We'll get to them anyway through this + // loop. + } + + MBBInfos.clear(); + + return MBBTraversalOrder; +} diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp index c1ccb94441ab..4b676a60a8cd 100644 --- a/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index d23df9c137bf..a5d66b5c9799 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -208,15 +208,25 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("internal", MIToken::kw_internal) .Case("early-clobber", MIToken::kw_early_clobber) .Case("debug-use", MIToken::kw_debug_use) + .Case("renamable", MIToken::kw_renamable) .Case("tied-def", MIToken::kw_tied_def) .Case("frame-setup", MIToken::kw_frame_setup) + .Case("frame-destroy", MIToken::kw_frame_destroy) .Case("debug-location", MIToken::kw_debug_location) .Case("same_value", MIToken::kw_cfi_same_value) .Case("offset", MIToken::kw_cfi_offset) + .Case("rel_offset", MIToken::kw_cfi_rel_offset) .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) + .Case("escape", MIToken::kw_cfi_escape) .Case("def_cfa", MIToken::kw_cfi_def_cfa) + .Case("remember_state", MIToken::kw_cfi_remember_state) .Case("restore", MIToken::kw_cfi_restore) + .Case("restore_state", MIToken::kw_cfi_restore_state) + .Case("undefined", MIToken::kw_cfi_undefined) + .Case("register", MIToken::kw_cfi_register) + .Case("window_save", MIToken::kw_cfi_window_save) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) @@ -277,6 +287,9 @@ static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, C.advance(); StringRef Number = NumberRange.upto(C); unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.' + // TODO: The format bb.. is supported only when it's not a + // reference. Once we deprecate the format where the irname shows up, we + // should only lex forward if it is a reference. if (C.peek() == '.') { C.advance(); // Skip '.' ++StringOffset; @@ -429,7 +442,7 @@ static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, ErrorCallbackType ErrorCallback) { - if (C.peek() != '$') + if (C.peek() != '&') return None; return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, ErrorCallback); diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 6894fe8b0ac5..275f92985f7f 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -60,15 +60,25 @@ struct MIToken { kw_internal, kw_early_clobber, kw_debug_use, + kw_renamable, kw_tied_def, kw_frame_setup, + kw_frame_destroy, kw_debug_location, kw_cfi_same_value, kw_cfi_offset, + kw_cfi_rel_offset, kw_cfi_def_cfa_register, kw_cfi_def_cfa_offset, + kw_cfi_adjust_cfa_offset, + kw_cfi_escape, kw_cfi_def_cfa, + kw_cfi_register, + kw_cfi_remember_state, kw_cfi_restore, + kw_cfi_restore_state, + kw_cfi_undefined, + kw_cfi_window_save, kw_blockaddress, kw_intrinsic, kw_target_index, @@ -166,7 +176,8 @@ struct MIToken { return Kind == kw_implicit || Kind == kw_implicit_define || Kind == kw_def || Kind == kw_dead || Kind == kw_killed || Kind == kw_undef || Kind == kw_internal || - Kind == kw_early_clobber || Kind == kw_debug_use; + Kind == kw_early_clobber || Kind == kw_debug_use || + Kind == kw_renamable; } bool isMemoryOperandFlag() const { diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index 10dbaf7045e8..4fa84c7bbd90 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -214,6 +213,7 @@ class MIParser { bool parseMetadataOperand(MachineOperand &Dest); bool parseCFIOffset(int &Offset); bool parseCFIRegister(unsigned &Reg); + bool parseCFIEscapeValues(std::string& Values); bool parseCFIOperand(MachineOperand &Dest); bool parseIRBlock(BasicBlock *&BB, const Function &F); bool parseBlockAddressOperand(MachineOperand &Dest); @@ -431,7 +431,7 @@ bool MIParser::parseBasicBlockDefinition( break; case MIToken::IRBlock: // TODO: Report an error when both name and ir block are specified. - if (parseIRBlock(BB, *MF.getFunction())) + if (parseIRBlock(BB, MF.getFunction())) return true; lex(); break; @@ -447,7 +447,7 @@ bool MIParser::parseBasicBlockDefinition( if (!Name.empty()) { BB = dyn_cast_or_null( - MF.getFunction()->getValueSymbolTable()->lookup(Name)); + MF.getFunction().getValueSymbolTable()->lookup(Name)); if (!BB) return error(Loc, Twine("basic block '") + Name + "' is not defined in the function '" + @@ -925,6 +925,9 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { if (Token.is(MIToken::kw_frame_setup)) { Flags |= MachineInstr::FrameSetup; lex(); + } else if (Token.is(MIToken::kw_frame_destroy)) { + Flags |= MachineInstr::FrameDestroy; + lex(); } if (Token.isNot(MIToken::Identifier)) return error("expected a machine instruction"); @@ -1060,6 +1063,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) { case MIToken::kw_debug_use: Flags |= RegState::Debug; break; + case MIToken::kw_renamable: + Flags |= RegState::Renamable; + break; default: llvm_unreachable("The current token should be a register flag"); } @@ -1212,7 +1218,8 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, - Flags & RegState::InternalRead); + Flags & RegState::InternalRead, Flags & RegState::Renamable); + return false; } @@ -1230,7 +1237,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, const Constant *&C) { auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; - C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(), + C = parseConstantValue(Source, Err, *MF.getFunction().getParent(), &PFS.IRSlots); if (!C) return error(Loc + Err.getColumnNo(), Err.getMessage()); @@ -1250,7 +1257,7 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) { lex(); return false; } else if (Token.is(MIToken::PointerType)) { - const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); unsigned AS = APSInt(Token.range().drop_front()).getZExtValue(); Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS)); lex(); @@ -1344,6 +1351,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { return error(Twine("use of undefined machine basic block #") + Twine(Number)); MBB = MBBInfo->second; + // TODO: Only parse the name if it's a MachineBasicBlockLabel. Deprecate once + // we drop the from the bb.. format. if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) return error(Twine("the name of machine basic block #") + Twine(Number) + " isn't '" + Token.stringValue() + "'"); @@ -1413,7 +1422,7 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { bool MIParser::parseGlobalValue(GlobalValue *&GV) { switch (Token.kind()) { case MIToken::NamedGlobalValue: { - const Module *M = MF.getFunction()->getParent(); + const Module *M = MF.getFunction().getParent(); GV = M->getNamedValue(Token.stringValue()); if (!GV) return error(Twine("use of undefined global value '") + Token.range() + @@ -1551,7 +1560,7 @@ bool MIParser::parseDIExpression(MDNode *&Expr) { if (expectAndConsume(MIToken::rparen)) return true; - Expr = DIExpression::get(MF.getFunction()->getContext(), Elements); + Expr = DIExpression::get(MF.getFunction().getContext(), Elements); return false; } @@ -1594,6 +1603,21 @@ bool MIParser::parseCFIRegister(unsigned &Reg) { return false; } +bool MIParser::parseCFIEscapeValues(std::string &Values) { + do { + if (Token.isNot(MIToken::HexLiteral)) + return error("expected a hexadecimal literal"); + unsigned Value; + if (getUnsigned(Value)) + return true; + if (Value > UINT8_MAX) + return error("expected a 8-bit integer (too large)"); + Values.push_back(static_cast(Value)); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + bool MIParser::parseCFIOperand(MachineOperand &Dest) { auto Kind = Token.kind(); lex(); @@ -1613,6 +1637,13 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); break; + case MIToken::kw_cfi_rel_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createRelOffset(nullptr, Reg, Offset)); + break; case MIToken::kw_cfi_def_cfa_register: if (parseCFIRegister(Reg)) return true; @@ -1626,6 +1657,12 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); break; + case MIToken::kw_cfi_adjust_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + CFIIndex = MF.addFrameInst( + MCCFIInstruction::createAdjustCfaOffset(nullptr, Offset)); + break; case MIToken::kw_cfi_def_cfa: if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || parseCFIOffset(Offset)) @@ -1634,12 +1671,42 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); break; + case MIToken::kw_cfi_remember_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + break; case MIToken::kw_cfi_restore: if (parseCFIRegister(Reg)) return true; CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, Reg)); break; + case MIToken::kw_cfi_restore_state: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + break; + case MIToken::kw_cfi_undefined: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, Reg)); + break; + case MIToken::kw_cfi_register: { + unsigned Reg2; + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIRegister(Reg2)) + return true; + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRegister(nullptr, Reg, Reg2)); + break; + } + case MIToken::kw_cfi_window_save: + CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); + break; + case MIToken::kw_cfi_escape: { + std::string Values; + if (parseCFIEscapeValues(Values)) + return true; + CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(nullptr, Values)); + break; + } default: // TODO: Parse the other CFI operands. llvm_unreachable("The current token should be a cfi operand"); @@ -1878,6 +1945,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, case MIToken::kw_internal: case MIToken::kw_early_clobber: case MIToken::kw_debug_use: + case MIToken::kw_renamable: case MIToken::underscore: case MIToken::NamedRegister: case MIToken::VirtualRegister: @@ -1915,10 +1983,18 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseMetadataOperand(Dest); case MIToken::kw_cfi_same_value: case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_rel_offset: case MIToken::kw_cfi_def_cfa_register: case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_adjust_cfa_offset: + case MIToken::kw_cfi_escape: case MIToken::kw_cfi_def_cfa: + case MIToken::kw_cfi_register: + case MIToken::kw_cfi_remember_state: case MIToken::kw_cfi_restore: + case MIToken::kw_cfi_restore_state: + case MIToken::kw_cfi_undefined: + case MIToken::kw_cfi_window_save: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: return parseBlockAddressOperand(Dest); @@ -2029,7 +2105,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) { bool MIParser::parseIRValue(const Value *&V) { switch (Token.kind()) { case MIToken::NamedIRValue: { - V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue()); + V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue()); break; } case MIToken::IRValue: { @@ -2280,9 +2356,15 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); + // Optional 'store' for operands that both load and store. + if (Token.is(MIToken::Identifier) && Token.stringValue() == "store") { + Flags |= MachineMemOperand::MOStore; + lex(); + } + // Optional synchronization scope. SyncScope::ID SSID; - if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + if (parseOptionalScope(MF.getFunction().getContext(), SSID)) return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). @@ -2302,7 +2384,11 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { MachinePointerInfo Ptr = MachinePointerInfo(); if (Token.is(MIToken::Identifier)) { - const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + const char *Word = + ((Flags & MachineMemOperand::MOLoad) && + (Flags & MachineMemOperand::MOStore)) + ? "on" + : Flags & MachineMemOperand::MOLoad ? "from" : "into"; if (Token.stringValue() != Word) return error(Twine("expected '") + Word + "'"); lex(); @@ -2459,12 +2545,12 @@ static const BasicBlock *getIRBlockFromSlot( const BasicBlock *MIParser::getIRBlock(unsigned Slot) { if (Slots2BasicBlocks.empty()) - initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks); + initSlots2BasicBlocks(MF.getFunction(), Slots2BasicBlocks); return getIRBlockFromSlot(Slot, Slots2BasicBlocks); } const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { - if (&F == MF.getFunction()) + if (&F == &MF.getFunction()) return getIRBlock(Slot); DenseMap CustomSlots2BasicBlocks; initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); @@ -2495,7 +2581,7 @@ static void initSlots2Values(const Function &F, const Value *MIParser::getIRValue(unsigned Slot) { if (Slots2Values.empty()) - initSlots2Values(*MF.getFunction(), Slots2Values); + initSlots2Values(MF.getFunction(), Slots2Values); auto ValueInfo = Slots2Values.find(Slot); if (ValueInfo == Slots2Values.end()) return nullptr; diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 836cc1db48af..e4e3fbbd75d8 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -417,6 +417,8 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, computeFunctionProperties(MF); + MF.getSubtarget().mirFileLoaded(MF); + MF.verify(); return false; } @@ -551,7 +553,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { MachineFunction &MF = PFS.MF; MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); @@ -722,7 +724,7 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) { DenseMap &ConstantPoolSlots = PFS.ConstantPoolSlots; const MachineFunction &MF = PFS.MF; - const auto &M = *MF.getFunction()->getParent(); + const auto &M = *MF.getFunction().getParent(); SMDiagnostic Error; for (const auto &YamlConstant : YamlMF.Constants) { if (YamlConstant.IsTargetSpecific) diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 02b0b7ea5e52..09316175a789 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -75,7 +75,8 @@ using namespace llvm; -static cl::opt SimplifyMIR("simplify-mir", +static cl::opt SimplifyMIR( + "simplify-mir", cl::Hidden, cl::desc("Leave out unnecessary information when printing MIR")); namespace { @@ -156,20 +157,14 @@ class MIPrinter { void print(const MachineBasicBlock &MBB); void print(const MachineInstr &MI); - void printMBBReference(const MachineBasicBlock &MBB); - void printIRBlockReference(const BasicBlock &BB); void printIRValueReference(const Value &V); void printStackObjectReference(int FrameIndex); - void printOffset(int64_t Offset); - void printTargetFlags(const MachineOperand &Op); void print(const MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, - LLT TypeToPrint, bool IsDef = false); + LLT TypeToPrint, bool PrintDef = true); void print(const LLVMContext &Context, const TargetInstrInfo &TII, const MachineMemOperand &Op); void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); - - void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; } // end namespace llvm @@ -192,23 +187,10 @@ template <> struct BlockScalarTraits { } // end namespace yaml } // end namespace llvm -static void printRegMIR(unsigned Reg, raw_ostream &OS, - const TargetRegisterInfo *TRI) { - // TODO: Print Stack Slots. - if (!Reg) - OS << '_'; - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); - else if (Reg < TRI->getNumRegs()) - OS << '%' << StringRef(TRI->getName(Reg)).lower(); - else - llvm_unreachable("Can't print this kind of register yet"); -} - static void printRegMIR(unsigned Reg, yaml::StringValue &Dest, const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); - printRegMIR(Reg, OS, TRI); + OS << printReg(Reg, TRI); } void MIRPrinter::print(const MachineFunction &MF) { @@ -227,8 +209,8 @@ void MIRPrinter::print(const MachineFunction &MF) { MachineFunctionProperties::Property::Selected); convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); - ModuleSlotTracker MST(MF.getFunction()->getParent()); - MST.incorporateFunction(*MF.getFunction()); + ModuleSlotTracker MST(MF.getFunction().getParent()); + MST.incorporateFunction(MF.getFunction()); convert(MST, YamlMF.FrameInfo, MF.getFrameInfo()); convertStackObjects(YamlMF, MF, MST); if (const auto *ConstantPool = MF.getConstantPool()) @@ -262,7 +244,7 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS, if (RegMask[I / 32] & (1u << (I % 32))) { if (IsRegInRegMaskFound) OS << ','; - printRegMIR(I, OS, TRI); + OS << printReg(I, TRI); IsRegInRegMaskFound = true; } } @@ -270,25 +252,11 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS, OS << ')'; } -static void printRegClassOrBank(unsigned Reg, raw_ostream &OS, - const MachineRegisterInfo &RegInfo, - const TargetRegisterInfo *TRI) { - if (RegInfo.getRegClassOrNull(Reg)) - OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); - else if (RegInfo.getRegBankOrNull(Reg)) - OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); - else { - OS << "_"; - assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && - "Generic registers must have a valid type"); - } -} - static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { raw_string_ostream OS(Dest.Value); - printRegClassOrBank(Reg, OS, RegInfo, TRI); + OS << printRegClassOrBank(Reg, RegInfo, TRI); } @@ -302,7 +270,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; - printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); + ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) printRegMIR(PreferredReg, VReg.PreferredRegister, TRI); @@ -350,13 +318,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); if (MFI.getSavePoint()) { raw_string_ostream StrOS(YamlMFI.SavePoint.Value); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MFI.getSavePoint()); + StrOS << printMBBReference(*MFI.getSavePoint()); } if (MFI.getRestorePoint()) { raw_string_ostream StrOS(YamlMFI.RestorePoint.Value); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MFI.getRestorePoint()); + StrOS << printMBBReference(*MFI.getRestorePoint()); } } @@ -505,8 +471,7 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, Entry.ID = ID++; for (const auto *MBB : Table.MBBs) { raw_string_ostream StrOS(Str); - MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) - .printMBBReference(*MBB); + StrOS << printMBBReference(*MBB); Entry.Blocks.push_back(StrOS.str()); Str.clear(); } @@ -628,7 +593,7 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { if (I != MBB.succ_begin()) OS << ", "; - printMBBReference(**I); + OS << printMBBReference(**I); if (!SimplifyMIR || !canPredictProbs) OS << '(' << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator()) @@ -648,7 +613,7 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { if (!First) OS << ", "; First = false; - printRegMIR(LI.PhysReg, OS, &TRI); + OS << printReg(LI.PhysReg, &TRI); if (!LI.LaneMask.all()) OS << ":0x" << PrintLaneMask(LI.LaneMask); } @@ -677,44 +642,6 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { OS.indent(2) << "}\n"; } -/// Return true when an instruction has tied register that can't be determined -/// by the instruction's descriptor. -static bool hasComplexRegisterTies(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { - const auto &Operand = MI.getOperand(I); - if (!Operand.isReg() || Operand.isDef()) - // Ignore the defined registers as MCID marks only the uses as tied. - continue; - int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); - int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1; - if (ExpectedTiedIdx != TiedIdx) - return true; - } - return false; -} - -static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx, - SmallBitVector &PrintedTypes, - const MachineRegisterInfo &MRI) { - const MachineOperand &Op = MI.getOperand(OpIdx); - if (!Op.isReg()) - return LLT{}; - - if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands()) - return MRI.getType(Op.getReg()); - - auto &OpInfo = MI.getDesc().OpInfo[OpIdx]; - if (!OpInfo.isGenericType()) - return MRI.getType(Op.getReg()); - - if (PrintedTypes[OpInfo.getGenericTypeIndex()]) - return LLT{}; - - PrintedTypes.set(OpInfo.getGenericTypeIndex()); - return MRI.getType(Op.getReg()); -} - void MIPrinter::print(const MachineInstr &MI) { const auto *MF = MI.getMF(); const auto &MRI = MF->getRegInfo(); @@ -727,7 +654,7 @@ void MIPrinter::print(const MachineInstr &MI) { assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); SmallBitVector PrintedTypes(8); - bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); + bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies(); unsigned I = 0, E = MI.getNumOperands(); for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && !MI.getOperand(I).isImplicit(); @@ -735,14 +662,17 @@ void MIPrinter::print(const MachineInstr &MI) { if (I) OS << ", "; print(MI, I, TRI, ShouldPrintRegisterTies, - getTypeToPrint(MI, I, PrintedTypes, MRI), - /*IsDef=*/true); + MI.getTypeToPrint(I, PrintedTypes, MRI), + /*PrintDef=*/false); } if (I) OS << " = "; if (MI.getFlag(MachineInstr::FrameSetup)) OS << "frame-setup "; + else if (MI.getFlag(MachineInstr::FrameDestroy)) + OS << "frame-destroy "; + OS << TII->getName(MI.getOpcode()); if (I < E) OS << ' '; @@ -752,20 +682,20 @@ void MIPrinter::print(const MachineInstr &MI) { if (NeedComma) OS << ", "; print(MI, I, TRI, ShouldPrintRegisterTies, - getTypeToPrint(MI, I, PrintedTypes, MRI)); + MI.getTypeToPrint(I, PrintedTypes, MRI)); NeedComma = true; } - if (MI.getDebugLoc()) { + if (const DebugLoc &DL = MI.getDebugLoc()) { if (NeedComma) OS << ','; OS << " debug-location "; - MI.getDebugLoc()->printAsOperand(OS, MST); + DL->printAsOperand(OS, MST); } if (!MI.memoperands_empty()) { OS << " :: "; - const LLVMContext &Context = MF->getFunction()->getContext(); + const LLVMContext &Context = MF->getFunction().getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) @@ -776,40 +706,6 @@ void MIPrinter::print(const MachineInstr &MI) { } } -void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { - OS << "%bb." << MBB.getNumber(); - if (const auto *BB = MBB.getBasicBlock()) { - if (BB->hasName()) - OS << '.' << BB->getName(); - } -} - -static void printIRSlotNumber(raw_ostream &OS, int Slot) { - if (Slot == -1) - OS << ""; - else - OS << Slot; -} - -void MIPrinter::printIRBlockReference(const BasicBlock &BB) { - OS << "%ir-block."; - if (BB.hasName()) { - printLLVMNameWithoutPrefix(OS, BB.getName()); - return; - } - const Function *F = BB.getParent(); - int Slot; - if (F == MST.getCurrentFunction()) { - Slot = MST.getLocalSlot(&BB); - } else { - ModuleSlotTracker CustomMST(F->getParent(), - /*ShouldInitializeAllMetadata=*/false); - CustomMST.incorporateFunction(*F); - Slot = CustomMST.getLocalSlot(&BB); - } - printIRSlotNumber(OS, Slot); -} - void MIPrinter::printIRValueReference(const Value &V) { if (isa(V)) { V.printAsOperand(OS, /*PrintType=*/false, MST); @@ -827,7 +723,7 @@ void MIPrinter::printIRValueReference(const Value &V) { printLLVMNameWithoutPrefix(OS, V.getName()); return; } - printIRSlotNumber(OS, MST.getLocalSlot(&V)); + MachineOperand::printIRSlotNumber(OS, MST.getLocalSlot(&V)); } void MIPrinter::printStackObjectReference(int FrameIndex) { @@ -835,195 +731,49 @@ void MIPrinter::printStackObjectReference(int FrameIndex) { assert(ObjectInfo != StackObjectOperandMapping.end() && "Invalid frame index"); const FrameIndexOperand &Operand = ObjectInfo->second; - if (Operand.IsFixed) { - OS << "%fixed-stack." << Operand.ID; - return; - } - OS << "%stack." << Operand.ID; - if (!Operand.Name.empty()) - OS << '.' << Operand.Name; -} - -void MIPrinter::printOffset(int64_t Offset) { - if (Offset == 0) - return; - if (Offset < 0) { - OS << " - " << -Offset; - return; - } - OS << " + " << Offset; -} - -static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { - auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); - for (const auto &I : Flags) { - if (I.first == TF) { - return I.second; - } - } - return nullptr; -} - -void MIPrinter::printTargetFlags(const MachineOperand &Op) { - if (!Op.getTargetFlags()) - return; - const auto *TII = Op.getParent()->getMF()->getSubtarget().getInstrInfo(); - assert(TII && "expected instruction info"); - auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); - OS << "target-flags("; - const bool HasDirectFlags = Flags.first; - const bool HasBitmaskFlags = Flags.second; - if (!HasDirectFlags && !HasBitmaskFlags) { - OS << ") "; - return; - } - if (HasDirectFlags) { - if (const auto *Name = getTargetFlagName(TII, Flags.first)) - OS << Name; - else - OS << ""; - } - if (!HasBitmaskFlags) { - OS << ") "; - return; - } - bool IsCommaNeeded = HasDirectFlags; - unsigned BitMask = Flags.second; - auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); - for (const auto &Mask : BitMasks) { - // Check if the flag's bitmask has the bits of the current mask set. - if ((BitMask & Mask.first) == Mask.first) { - if (IsCommaNeeded) - OS << ", "; - IsCommaNeeded = true; - OS << Mask.second; - // Clear the bits which were serialized from the flag's bitmask. - BitMask &= ~(Mask.first); - } - } - if (BitMask) { - // When the resulting flag's bitmask isn't zero, we know that we didn't - // serialize all of the bit flags. - if (IsCommaNeeded) - OS << ", "; - OS << ""; - } - OS << ") "; -} - -static const char *getTargetIndexName(const MachineFunction &MF, int Index) { - const auto *TII = MF.getSubtarget().getInstrInfo(); - assert(TII && "expected instruction info"); - auto Indices = TII->getSerializableTargetIndices(); - for (const auto &I : Indices) { - if (I.first == Index) { - return I.second; - } - } - return nullptr; + MachineOperand::printStackObjectReference(OS, Operand.ID, Operand.IsFixed, + Operand.Name); } void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies, LLT TypeToPrint, - bool IsDef) { + bool PrintDef) { const MachineOperand &Op = MI.getOperand(OpIdx); - printTargetFlags(Op); switch (Op.getType()) { - case MachineOperand::MO_Register: { - unsigned Reg = Op.getReg(); - if (Op.isImplicit()) - OS << (Op.isDef() ? "implicit-def " : "implicit "); - else if (!IsDef && Op.isDef()) - // Print the 'def' flag only when the operand is defined after '='. - OS << "def "; - if (Op.isInternalRead()) - OS << "internal "; - if (Op.isDead()) - OS << "dead "; - if (Op.isKill()) - OS << "killed "; - if (Op.isUndef()) - OS << "undef "; - if (Op.isEarlyClobber()) - OS << "early-clobber "; - if (Op.isDebug()) - OS << "debug-use "; - printRegMIR(Reg, OS, TRI); - // Print the sub register. - if (Op.getSubReg() != 0) - OS << '.' << TRI->getSubRegIndexName(Op.getSubReg()); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const MachineRegisterInfo &MRI = Op.getParent()->getMF()->getRegInfo(); - if (IsDef || MRI.def_empty(Reg)) { - OS << ':'; - printRegClassOrBank(Reg, OS, MRI, TRI); - } - } - if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) - OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(OpIdx) << ")"; - if (TypeToPrint.isValid()) - OS << '(' << TypeToPrint << ')'; - break; - } case MachineOperand::MO_Immediate: - if (MI.isOperandSubregIdx(OpIdx)) - OS << "%subreg." << TRI->getSubRegIndexName(Op.getImm()); - else - OS << Op.getImm(); - break; + if (MI.isOperandSubregIdx(OpIdx)) { + MachineOperand::printTargetFlags(OS, Op); + MachineOperand::printSubRegIdx(OS, Op.getImm(), TRI); + break; + } + LLVM_FALLTHROUGH; + case MachineOperand::MO_Register: case MachineOperand::MO_CImmediate: - Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); - break; case MachineOperand::MO_FPImmediate: - Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); - break; case MachineOperand::MO_MachineBasicBlock: - printMBBReference(*Op.getMBB()); - break; - case MachineOperand::MO_FrameIndex: - printStackObjectReference(Op.getIndex()); - break; case MachineOperand::MO_ConstantPoolIndex: - OS << "%const." << Op.getIndex(); - printOffset(Op.getOffset()); - break; case MachineOperand::MO_TargetIndex: - OS << "target-index("; - if (const auto *Name = - getTargetIndexName(*Op.getParent()->getMF(), Op.getIndex())) - OS << Name; - else - OS << ""; - OS << ')'; - printOffset(Op.getOffset()); - break; case MachineOperand::MO_JumpTableIndex: - OS << "%jump-table." << Op.getIndex(); - break; - case MachineOperand::MO_ExternalSymbol: { - StringRef Name = Op.getSymbolName(); - OS << '$'; - if (Name.empty()) { - OS << "\"\""; - } else { - printLLVMNameWithoutPrefix(OS, Name); - } - printOffset(Op.getOffset()); - break; - } + case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_GlobalAddress: - Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); - printOffset(Op.getOffset()); + case MachineOperand::MO_RegisterLiveOut: + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: + case MachineOperand::MO_CFIIndex: + case MachineOperand::MO_IntrinsicID: + case MachineOperand::MO_Predicate: + case MachineOperand::MO_BlockAddress: { + unsigned TiedOperandIdx = 0; + if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef()) + TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx); + const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo(); + Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII); break; - case MachineOperand::MO_BlockAddress: - OS << "blockaddress("; - Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false, - MST); - OS << ", "; - printIRBlockReference(*Op.getBlockAddress()->getBasicBlock()); - OS << ')'; - printOffset(Op.getOffset()); + } + case MachineOperand::MO_FrameIndex: + printStackObjectReference(Op.getIndex()); break; case MachineOperand::MO_RegisterMask: { auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask()); @@ -1033,49 +783,6 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx, printCustomRegMask(Op.getRegMask(), OS, TRI); break; } - case MachineOperand::MO_RegisterLiveOut: { - const uint32_t *RegMask = Op.getRegLiveOut(); - OS << "liveout("; - bool IsCommaNeeded = false; - for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { - if (RegMask[Reg / 32] & (1U << (Reg % 32))) { - if (IsCommaNeeded) - OS << ", "; - printRegMIR(Reg, OS, TRI); - IsCommaNeeded = true; - } - } - OS << ")"; - break; - } - case MachineOperand::MO_Metadata: - Op.getMetadata()->printAsOperand(OS, MST); - break; - case MachineOperand::MO_MCSymbol: - OS << ""; - break; - case MachineOperand::MO_CFIIndex: { - const MachineFunction &MF = *Op.getParent()->getMF(); - print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI); - break; - } - case MachineOperand::MO_IntrinsicID: { - Intrinsic::ID ID = Op.getIntrinsicID(); - if (ID < Intrinsic::num_intrinsics) - OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; - else { - const MachineFunction &MF = *Op.getParent()->getMF(); - const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); - OS << "intrinsic(@" << TII->getName(ID) << ')'; - } - break; - } - case MachineOperand::MO_Predicate: { - auto Pred = static_cast(Op.getPredicate()); - OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred(" - << CmpInst::getPredicateName(Pred) << ')'; - break; - } } } @@ -1110,12 +817,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) << "\" "; + + assert((Op.isLoad() || Op.isStore()) && "machine memory operand must be a load or store (or both)"); if (Op.isLoad()) OS << "load "; - else { - assert(Op.isStore() && "Non load machine operand must be a store"); + if (Op.isStore()) OS << "store "; - } printSyncScope(Context, Op.getSyncScopeID()); @@ -1126,10 +833,12 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, OS << Op.getSize(); if (const Value *Val = Op.getValue()) { - OS << (Op.isLoad() ? " from " : " into "); + OS << ((Op.isLoad() && Op.isStore()) ? " on " + : Op.isLoad() ? " from " : " into "); printIRValueReference(*Val); } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) { - OS << (Op.isLoad() ? " from " : " into "); + OS << ((Op.isLoad() && Op.isStore()) ? " on " + : Op.isLoad() ? " from " : " into "); assert(PVal && "Expected a pseudo source value"); switch (PVal->kind()) { case PseudoSourceValue::Stack: @@ -1154,7 +863,7 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, OS, /*PrintType=*/false, MST); break; case PseudoSourceValue::ExternalSymbolCallEntry: - OS << "call-entry $"; + OS << "call-entry &"; printLLVMNameWithoutPrefix( OS, cast(PVal)->getSymbol()); break; @@ -1163,7 +872,7 @@ void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, break; } } - printOffset(Op.getOffset()); + MachineOperand::printOperandOffset(OS, Op.getOffset()); if (Op.getBaseAlignment() != Op.getSize()) OS << ", align " << Op.getBaseAlignment(); auto AAInfo = Op.getAAInfo(); @@ -1203,64 +912,6 @@ void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { } } -static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, - const TargetRegisterInfo *TRI) { - int Reg = TRI->getLLVMRegNum(DwarfReg, true); - if (Reg == -1) { - OS << ""; - return; - } - printRegMIR(Reg, OS, TRI); -} - -void MIPrinter::print(const MCCFIInstruction &CFI, - const TargetRegisterInfo *TRI) { - switch (CFI.getOperation()) { - case MCCFIInstruction::OpSameValue: - OS << "same_value "; - if (CFI.getLabel()) - OS << " "; - printCFIRegister(CFI.getRegister(), OS, TRI); - break; - case MCCFIInstruction::OpOffset: - OS << "offset "; - if (CFI.getLabel()) - OS << " "; - printCFIRegister(CFI.getRegister(), OS, TRI); - OS << ", " << CFI.getOffset(); - break; - case MCCFIInstruction::OpDefCfaRegister: - OS << "def_cfa_register "; - if (CFI.getLabel()) - OS << " "; - printCFIRegister(CFI.getRegister(), OS, TRI); - break; - case MCCFIInstruction::OpDefCfaOffset: - OS << "def_cfa_offset "; - if (CFI.getLabel()) - OS << " "; - OS << CFI.getOffset(); - break; - case MCCFIInstruction::OpDefCfa: - OS << "def_cfa "; - if (CFI.getLabel()) - OS << " "; - printCFIRegister(CFI.getRegister(), OS, TRI); - OS << ", " << CFI.getOffset(); - break; - case MCCFIInstruction::OpRestore: - OS << "restore "; - if (CFI.getLabel()) - OS << " "; - printCFIRegister(CFI.getRegister(), OS, TRI); - break; - default: - // TODO: Print the other CFI Operations. - OS << ""; - break; - } -} - void llvm::printMIR(raw_ostream &OS, const Module &M) { yaml::Output Out(OS); Out << const_cast(M); diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index 09354cf70c3c..1a8427430ea0 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 8863ac236072..1ed810bf817c 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,6 +70,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { return OS; } +Printable llvm::printMBBReference(const MachineBasicBlock &MBB) { + return Printable([&MBB](raw_ostream &OS) { return MBB.printAsOperand(OS); }); +} + /// When an MBB is added to an MF, we need to update the parent pointer of the /// MBB, the MBB numbering, and any instructions in the MBB to be on the right /// operand list for registers. @@ -255,22 +259,23 @@ std::string MachineBasicBlock::getFullName() const { return Name; } -void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) - const { +void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes, + bool IsStandalone) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" << " is null\n"; return; } - const Function *F = MF->getFunction(); - const Module *M = F ? F->getParent() : nullptr; + const Function &F = MF->getFunction(); + const Module *M = F.getParent(); ModuleSlotTracker MST(M); - print(OS, MST, Indexes); + print(OS, MST, Indexes, IsStandalone); } void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, - const SlotIndexes *Indexes) const { + const SlotIndexes *Indexes, + bool IsStandalone) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -281,7 +286,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; - OS << "BB#" << getNumber() << ": "; + OS << printMBBReference(*this) << ": "; const char *Comma = ""; if (const BasicBlock *LBB = getBasicBlock()) { @@ -313,7 +318,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS << " Predecessors according to CFG:"; for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) - OS << " BB#" << (*PI)->getNumber(); + OS << " " << printMBBReference(*(*PI)); OS << '\n'; } @@ -326,7 +331,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '\t'; if (I.isInsideBundle()) OS << " * "; - I.print(OS, MST); + I.print(OS, MST, IsStandalone); } // Print the successors of this block according to the CFG. @@ -334,7 +339,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { - OS << " BB#" << (*SI)->getNumber(); + OS << " " << printMBBReference(*(*SI)); if (!Probs.empty()) OS << '(' << *getProbabilityIterator(SI) << ')'; } @@ -350,7 +355,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { - OS << "BB#" << getNumber(); + OS << "%bb." << getNumber(); } void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { @@ -767,10 +772,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); - DEBUG(dbgs() << "Splitting critical edge:" - " BB#" << getNumber() - << " -- BB#" << NMBB->getNumber() - << " -- BB#" << Succ->getNumber() << '\n'); + DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this) + << " -- " << printMBBReference(*NMBB) << " -- " + << printMBBReference(*Succ) << '\n'); LiveIntervals *LIS = P.getAnalysisIfAvailable(); SlotIndexes *Indexes = P.getAnalysisIfAvailable(); @@ -1023,8 +1027,8 @@ bool MachineBasicBlock::canSplitCriticalEdge( // case that we can't handle. Since this never happens in properly optimized // code, just skip those edges. if (TBB && TBB == FBB) { - DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" - << getNumber() << '\n'); + DEBUG(dbgs() << "Won't split critical edge after degenerate " + << printMBBReference(*this) << '\n'); return false; } return true; diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 2c336e450569..3459a9f71a73 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -224,14 +224,14 @@ MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const { Optional MachineBlockFrequencyInfo::getBlockProfileCount( const MachineBasicBlock *MBB) const { - const Function *F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None; + const Function &F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getBlockProfileCount(F, MBB) : None; } Optional MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const { - const Function *F = MBFI->getFunction()->getFunction(); - return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None; + const Function &F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getProfileCountFromFreq(F, Freq) : None; } bool diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index f0285ea8f8eb..84c808ee7938 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -546,7 +546,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, static std::string getBlockName(const MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber(); + OS << printMBBReference(*BB); OS << " ('" << BB->getName() << "')"; OS.flush(); return Result; @@ -1235,7 +1235,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { // When profile is available, we need to handle the triangle-shape CFG. static BranchProbability getLayoutSuccessorProbThreshold( const MachineBasicBlock *BB) { - if (!BB->getParent()->getFunction()->getEntryCount()) + if (!BB->getParent()->getFunction().hasProfileData()) return BranchProbability(StaticLikelyProb, 100); if (BB->succ_size() == 2) { const MachineBasicBlock *Succ1 = *BB->succ_begin(); @@ -1769,7 +1769,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return L.getHeader(); // Check that the header hasn't been fused with a preheader block due to @@ -2178,7 +2178,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) { // will be merged into the first outer loop chain for which this block is not // cold anymore. This needs precise profile data and we only do this when // profile data is available. - if (F->getFunction()->getEntryCount() || ForceLoopColdBlock) { + if (F->getFunction().hasProfileData() || ForceLoopColdBlock) { BlockFrequency LoopFreq(0); for (auto LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) @@ -2220,7 +2220,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // for better layout. bool RotateLoopWithProfile = ForcePreciseRotationCost || - (PreciseRotationCost && F->getFunction()->getEntryCount()); + (PreciseRotationCost && F->getFunction().hasProfileData()); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the @@ -2485,7 +2485,7 @@ void MachineBlockPlacement::alignBlocks() { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F->getFunction()->optForSize()) + if (F->getFunction().optForSize()) return; BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) @@ -2715,7 +2715,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Check for single-block functions and skip them. @@ -2760,7 +2760,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (TailDupPlacement) { MPDT = &getAnalysis(); - if (MF.getFunction()->optForSize()) + if (MF.getFunction().optForSize()) TailDupSize = 1; bool PreRegAlloc = false; TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize); @@ -2817,7 +2817,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || - F->getFunction()->getName().equals(ViewBlockFreqFuncName))) { + F->getFunction().getName().equals(ViewBlockFreqFuncName))) { MBFI->view("MBP." + MF.getName(), false); } diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 21eff9dfff9c..e4952aaaba06 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -84,7 +84,7 @@ raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( const MachineBasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); - OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + OS << "edge " << printMBBReference(*Src) << " -> " << printMBBReference(*Dst) << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index aaac6ad9336c..8b7d2980ac8e 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -176,8 +176,7 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, // class given a super-reg class and subreg index. if (DefMI->getOperand(1).getSubReg()) continue; - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - if (!MRI->constrainRegClass(SrcReg, RC)) + if (!MRI->constrainRegAttrs(SrcReg, Reg)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); @@ -588,11 +587,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { break; } - // Don't perform CSE if the result of the old instruction cannot exist - // within the register class of the new instruction. - const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); - if (!MRI->constrainRegClass(NewReg, OldRC)) { - DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); + // Don't perform CSE if the result of the new instruction cannot exist + // within the constraints (register class, bank, or low-level type) of + // the old instruction. + if (!MRI->constrainRegAttrs(NewReg, OldReg)) { + DEBUG(dbgs() << "*** Not the same register constraints, avoid CSE!\n"); DoCSE = false; break; } @@ -623,12 +622,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Go through implicit defs of CSMI and MI, and clear the kill flags on // their uses in all the instructions between CSMI and MI. // We might have made some of the kill flags redundant, consider: - // subs ... %NZCV <- CSMI - // csinc ... %NZCV <- this kill flag isn't valid anymore - // subs ... %NZCV <- MI, to be eliminated - // csinc ... %NZCV + // subs ... implicit-def %nzcv <- CSMI + // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore + // subs ... implicit-def %nzcv <- MI, to be eliminated + // csinc ... implicit killed %nzcv // Since we eliminated MI, and reused a register imp-def'd by CSMI - // (here %NZCV), that register, if it was killed before MI, should have + // (here %nzcv), that register, if it was killed before MI, should have // that kill flag removed, because it's lifetime was extended. if (CSMI->getParent() == MI->getParent()) { for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) @@ -727,7 +726,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index f61db309ed7b..702d21228477 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" @@ -282,9 +281,16 @@ bool MachineCombiner::improvesCriticalPathLen( // of the original code sequence. This may allow the transform to proceed // even if the instruction depths (data dependency cycles) become worse. - unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); - unsigned RootLatency = 0; + // Account for the latency of the inserted and deleted instructions by + // adding up their latencies. This assumes that the inserted and deleted + // instructions are dependent instruction chains, which might not hold + // in all cases. + unsigned NewRootLatency = 0; + for (unsigned i = 0; i < InsInstrs.size() - 1; i++) + NewRootLatency += TSchedModel.computeInstrLatency(InsInstrs[i]); + NewRootLatency += getLatency(Root, NewRoot, BlockTrace); + unsigned RootLatency = 0; for (auto I : DelInstrs) RootLatency += TSchedModel.computeInstrLatency(I); @@ -542,7 +548,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { MLI = &getAnalysis(); Traces = &getAnalysis(); MinInstr = nullptr; - OptSize = MF.getFunction()->optForSize(); + OptSize = MF.getFunction().optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index f0cbcf6fcd2b..fcec05adc732 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -226,19 +225,19 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // The two copies cancel out and the source of the first copy // hasn't been overridden, eliminate the second one. e.g. - // %ECX = COPY %EAX - // ... nothing clobbered EAX. - // %EAX = COPY %ECX + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %eax = COPY %ecx // => - // %ECX = COPY %EAX + // %ecx = COPY %eax // // or // - // %ECX = COPY %EAX - // ... nothing clobbered EAX. - // %ECX = COPY %EAX + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %ecx = COPY %eax // => - // %ECX = COPY %EAX + // %ecx = COPY %eax if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) continue; @@ -262,11 +261,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If 'Def' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. - // %xmm9 = copy %xmm2 + // %xmm9 = copy %xmm2 // ... - // %xmm2 = copy %xmm0 + // %xmm2 = copy %xmm0 // ... - // %xmm2 = copy %xmm9 + // %xmm2 = copy %xmm9 ClobberRegister(Def); for (const MachineOperand &MO : MI->implicit_operands()) { if (!MO.isReg() || !MO.isDef()) @@ -379,7 +378,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; Changed = false; diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 845e8232477c..517ac29b6450 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -26,7 +26,7 @@ static bool VerifyMachineDomInfo = true; static bool VerifyMachineDomInfo = false; #endif static cl::opt VerifyMachineDomInfoX( - "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), + "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden, cl::desc("Verify machine dominator info (time consuming)")); namespace llvm { @@ -148,7 +148,8 @@ void MachineDominatorTree::verifyDomTree() const { OtherDT.recalculate(F); if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() || DT->compare(OtherDT)) { - errs() << "MachineDominatorTree is not up to date!\nComputed:\n"; + errs() << "MachineDominatorTree for function " << F.getName() + << " is not up to date!\nComputed:\n"; DT->print(errs()); errs() << "\nActual:\n"; OtherDT.print(errs()); diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp index 572aed8abf40..2aa9d6b816c8 100644 --- a/lib/CodeGen/MachineFrameInfo.cpp +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -47,12 +47,13 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, } int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, const AllocaInst *Alloca, - uint8_t ID) { + bool IsSpillSlot, + const AllocaInst *Alloca, + uint8_t StackID) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, - !isSS, ID)); + Objects.push_back(StackObject(Size, Alignment, 0, false, IsSpillSlot, Alloca, + !IsSpillSlot, StackID)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -78,7 +79,7 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, } int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isAliased) { + bool IsImmutable, bool IsAliased) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and @@ -86,23 +87,24 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. Note that unlike the non-fixed case, if the // stack needs realignment, we can't assume that the stack will in fact be // aligned. - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr, isAliased)); + unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.insert(Objects.begin(), + StackObject(Size, Alignment, SPOffset, IsImmutable, + /*isSpillSlot=*/false, /*Alloca=*/nullptr, + IsAliased)); return -++NumFixedObjects; } int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ true, - /*Alloca*/ nullptr, - /*isAliased*/ false)); + bool IsImmutable) { + unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.insert(Objects.begin(), + StackObject(Size, Alignment, SPOffset, IsImmutable, + /*IsSpillSlot=*/true, /*Alloca=*/nullptr, + /*IsAliased=*/false)); return -++NumFixedObjects; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 1f55b8fa495e..58ad3c96c341 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -119,16 +119,16 @@ void ilist_alloc_traits::deleteNode(MachineBasicBlock *MBB) { } static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, - const Function *Fn) { - if (Fn->hasFnAttribute(Attribute::StackAlignment)) - return Fn->getFnStackAlignment(); + const Function &F) { + if (F.hasFnAttribute(Attribute::StackAlignment)) + return F.getFnStackAlignment(); return STI->getFrameLowering()->getStackAlignment(); } -MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, +MachineFunction::MachineFunction(const Function &F, const TargetMachine &Target, + const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), - MMI(mmi) { + : F(F), Target(Target), STI(&STI), Ctx(mmi.getContext()), MMI(mmi) { FunctionNumber = FunctionNum; init(); } @@ -146,21 +146,21 @@ void MachineFunction::init() { // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && - !Fn->hasFnAttribute("no-realign-stack"); + !F.hasFnAttribute("no-realign-stack"); FrameInfo = new (Allocator) MachineFrameInfo( - getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP, + getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP, /*ForceRealign=*/CanRealignSP && - Fn->hasFnAttribute(Attribute::StackAlignment)); + F.hasFnAttribute(Attribute::StackAlignment)); - if (Fn->hasFnAttribute(Attribute::StackAlignment)) - FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); + if (F.hasFnAttribute(Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(F.getFnStackAlignment()); ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); - // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + // FIXME: Shouldn't use pref alignment if explicit alignment is set on F. // FIXME: Use Function::optForSize(). - if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) + if (!F.hasFnAttribute(Attribute::OptimizeForSize)) Alignment = std::max(Alignment, STI->getTargetLowering()->getPrefFunctionAlignment()); @@ -170,7 +170,7 @@ void MachineFunction::init() { JumpTableInfo = nullptr; if (isFuncletEHPersonality(classifyEHPersonality( - Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) { + F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr))) { WinEHInfo = new (Allocator) WinEHFuncInfo(); } @@ -228,7 +228,7 @@ void MachineFunction::clear() { } const DataLayout &MachineFunction::getDataLayout() const { - return Fn->getParent()->getDataLayout(); + return F.getParent()->getDataLayout(); } /// Get the JumpTableInfo for this function. @@ -244,7 +244,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { /// Should we be emitting segmented stack stuff for the function bool MachineFunction::shouldSplitStack() const { - return getFunction()->hasFnAttribute("split-stack"); + return getFunction().hasFnAttribute("split-stack"); } /// This discards all of the MachineBasicBlock numbers and recomputes them. @@ -485,8 +485,7 @@ LLVM_DUMP_METHOD void MachineFunction::dump() const { #endif StringRef MachineFunction::getName() const { - assert(getFunction() && "No function!"); - return getFunction()->getName(); + return getFunction().getName(); } void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { @@ -519,11 +518,13 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { OS << '\n'; } - ModuleSlotTracker MST(getFunction()->getParent()); - MST.incorporateFunction(*getFunction()); + ModuleSlotTracker MST(getFunction().getParent()); + MST.incorporateFunction(getFunction()); for (const auto &BB : *this) { OS << '\n'; - BB.print(OS, MST, Indexes); + // If we print the whole function, don't print any verbose information, + // since that information is already present. + BB.print(OS, MST, Indexes, /*IsStandalone=*/false); } OS << "\n# End machine code for function " << getName() << ".\n\n"; @@ -546,7 +547,7 @@ namespace llvm { raw_string_ostream OSS(OutStr); if (isSimple()) { - OSS << "BB#" << Node->getNumber(); + OSS << printMBBReference(*Node); if (const BasicBlock *BB = Node->getBasicBlock()) OSS << ": " << BB->getName(); } else @@ -906,9 +907,9 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { OS << "Jump Tables:\n"; for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { - OS << " jt#" << i << ": "; + OS << printJumpTableEntryReference(i) << ": "; for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) - OS << " BB#" << JumpTables[i].MBBs[j]->getNumber(); + OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]); } OS << '\n'; @@ -918,6 +919,10 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); } #endif +Printable llvm::printJumpTableEntryReference(unsigned Idx) { + return Printable([Idx](raw_ostream &OS) { OS << "%jump-table." << Idx; }); +} + //===----------------------------------------------------------------------===// // MachineConstantPool implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 2d5307c78a5f..32785dee0cbc 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -73,731 +74,29 @@ using namespace llvm; -static cl::opt PrintRegMaskNumRegs( - "print-regmask-num-regs", - cl::desc("Number of registers to limit to when " - "printing regmask operands in IR dumps. " - "unlimited = -1"), - cl::init(32), cl::Hidden); - -//===----------------------------------------------------------------------===// -// MachineOperand Implementation -//===----------------------------------------------------------------------===// - -void MachineOperand::setReg(unsigned Reg) { - if (getReg() == Reg) return; // No change. - - // Otherwise, we have to change the register. If this operand is embedded - // into a machine function, we need to update the old and new register's - // use/def lists. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) { - MachineRegisterInfo &MRI = MF->getRegInfo(); - MRI.removeRegOperandFromUseList(this); - SmallContents.RegNo = Reg; - MRI.addRegOperandToUseList(this); - return; - } - - // Otherwise, just change the register, no problem. :) - SmallContents.RegNo = Reg; -} - -void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, - const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isVirtualRegister(Reg)); - if (SubIdx && getSubReg()) - SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); - setReg(Reg); - if (SubIdx) - setSubReg(SubIdx); -} - -void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - if (getSubReg()) { - Reg = TRI.getSubReg(Reg, getSubReg()); - // Note that getSubReg() may return 0 if the sub-register doesn't exist. - // That won't happen in legal code. - setSubReg(0); - if (isDef()) - setIsUndef(false); - } - setReg(Reg); -} - -/// Change a def to a use, or a use to a def. -void MachineOperand::setIsDef(bool Val) { - assert(isReg() && "Wrong MachineOperand accessor"); - assert((!Val || !isDebug()) && "Marking a debug operation as def"); - if (IsDef == Val) - return; - // MRI may keep uses and defs in different list positions. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) { - MachineRegisterInfo &MRI = MF->getRegInfo(); - MRI.removeRegOperandFromUseList(this); - IsDef = Val; - MRI.addRegOperandToUseList(this); - return; - } - IsDef = Val; -} - -// If this operand is currently a register operand, and if this is in a -// function, deregister the operand from the register's use/def list. -void MachineOperand::removeRegFromUses() { - if (!isReg() || !isOnRegUseList()) - return; - - if (MachineInstr *MI = getParent()) { - if (MachineBasicBlock *MBB = MI->getParent()) { - if (MachineFunction *MF = MBB->getParent()) - MF->getRegInfo().removeRegOperandFromUseList(this); - } - } -} - -/// ChangeToImmediate - Replace this operand with a new immediate operand of -/// the specified value. If an operand is known to be an immediate already, -/// the setImm method should be used. -void MachineOperand::ChangeToImmediate(int64_t ImmVal) { - assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - - removeRegFromUses(); - - OpKind = MO_Immediate; - Contents.ImmVal = ImmVal; -} - -void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { - assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - - removeRegFromUses(); - - OpKind = MO_FPImmediate; - Contents.CFP = FPImm; -} - -void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into an external symbol"); - - removeRegFromUses(); - - OpKind = MO_ExternalSymbol; - Contents.OffsetedInfo.Val.SymbolName = SymName; - setOffset(0); // Offset is always 0. - setTargetFlags(TargetFlags); -} - -void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into an MCSymbol"); - - removeRegFromUses(); - - OpKind = MO_MCSymbol; - Contents.Sym = Sym; -} - -void MachineOperand::ChangeToFrameIndex(int Idx) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into a FrameIndex"); - - removeRegFromUses(); - - OpKind = MO_FrameIndex; - setIndex(Idx); -} - -void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, - unsigned char TargetFlags) { - assert((!isReg() || !isTied()) && - "Cannot change a tied operand into a FrameIndex"); - - removeRegFromUses(); - - OpKind = MO_TargetIndex; - setIndex(Idx); - setOffset(Offset); - setTargetFlags(TargetFlags); -} - -/// ChangeToRegister - Replace this operand with a new register operand of -/// the specified value. If an operand is known to be an register already, -/// the setReg method should be used. -void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, - bool isKill, bool isDead, bool isUndef, - bool isDebug) { - MachineRegisterInfo *RegInfo = nullptr; - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - RegInfo = &MF->getRegInfo(); - // If this operand is already a register operand, remove it from the - // register's use/def lists. - bool WasReg = isReg(); - if (RegInfo && WasReg) - RegInfo->removeRegOperandFromUseList(this); - - // Change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - SubReg_TargetFlags = 0; - IsDef = isDef; - IsImp = isImp; - IsKill = isKill; - IsDead = isDead; - IsUndef = isUndef; - IsInternalRead = false; - IsEarlyClobber = false; - IsDebug = isDebug; - // Ensure isOnRegUseList() returns false. - Contents.Reg.Prev = nullptr; - // Preserve the tie when the operand was already a register. - if (!WasReg) - TiedTo = 0; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (RegInfo) - RegInfo->addRegOperandToUseList(this); -} - -/// isIdenticalTo - Return true if this operand is identical to the specified -/// operand. Note that this should stay in sync with the hash_value overload -/// below. -bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { - if (getType() != Other.getType() || - getTargetFlags() != Other.getTargetFlags()) - return false; - - switch (getType()) { - case MachineOperand::MO_Register: - return getReg() == Other.getReg() && isDef() == Other.isDef() && - getSubReg() == Other.getSubReg(); - case MachineOperand::MO_Immediate: - return getImm() == Other.getImm(); - case MachineOperand::MO_CImmediate: - return getCImm() == Other.getCImm(); - case MachineOperand::MO_FPImmediate: - return getFPImm() == Other.getFPImm(); - case MachineOperand::MO_MachineBasicBlock: - return getMBB() == Other.getMBB(); - case MachineOperand::MO_FrameIndex: - return getIndex() == Other.getIndex(); - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_TargetIndex: - return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); - case MachineOperand::MO_JumpTableIndex: - return getIndex() == Other.getIndex(); - case MachineOperand::MO_GlobalAddress: - return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); - case MachineOperand::MO_ExternalSymbol: - return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && - getOffset() == Other.getOffset(); - case MachineOperand::MO_BlockAddress: - return getBlockAddress() == Other.getBlockAddress() && - getOffset() == Other.getOffset(); - case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: { - // Shallow compare of the two RegMasks - const uint32_t *RegMask = getRegMask(); - const uint32_t *OtherRegMask = Other.getRegMask(); - if (RegMask == OtherRegMask) - return true; - - // Calculate the size of the RegMask - const MachineFunction *MF = getParent()->getMF(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; - - // Deep compare of the two RegMasks - return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); - } - case MachineOperand::MO_MCSymbol: - return getMCSymbol() == Other.getMCSymbol(); - case MachineOperand::MO_CFIIndex: - return getCFIIndex() == Other.getCFIIndex(); - case MachineOperand::MO_Metadata: - return getMetadata() == Other.getMetadata(); - case MachineOperand::MO_IntrinsicID: - return getIntrinsicID() == Other.getIntrinsicID(); - case MachineOperand::MO_Predicate: - return getPredicate() == Other.getPredicate(); - } - llvm_unreachable("Invalid machine operand type"); -} - -// Note: this must stay exactly in sync with isIdenticalTo above. -hash_code llvm::hash_value(const MachineOperand &MO) { - switch (MO.getType()) { - case MachineOperand::MO_Register: - // Register operands don't have target flags. - return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); - case MachineOperand::MO_Immediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); - case MachineOperand::MO_CImmediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); - case MachineOperand::MO_FPImmediate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); - case MachineOperand::MO_MachineBasicBlock: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); - case MachineOperand::MO_FrameIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_TargetIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), - MO.getOffset()); - case MachineOperand::MO_JumpTableIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); - case MachineOperand::MO_ExternalSymbol: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), - MO.getSymbolName()); - case MachineOperand::MO_GlobalAddress: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), - MO.getOffset()); - case MachineOperand::MO_BlockAddress: - return hash_combine(MO.getType(), MO.getTargetFlags(), - MO.getBlockAddress(), MO.getOffset()); - case MachineOperand::MO_RegisterMask: - case MachineOperand::MO_RegisterLiveOut: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); - case MachineOperand::MO_Metadata: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); - case MachineOperand::MO_MCSymbol: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); - case MachineOperand::MO_CFIIndex: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); - case MachineOperand::MO_IntrinsicID: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); - case MachineOperand::MO_Predicate: - return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); - } - llvm_unreachable("Invalid machine operand type"); -} - -void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, - const TargetIntrinsicInfo *IntrinsicInfo) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST, TRI, IntrinsicInfo); -} - -void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, - const TargetRegisterInfo *TRI, - const TargetIntrinsicInfo *IntrinsicInfo) const { - switch (getType()) { - case MachineOperand::MO_Register: - OS << printReg(getReg(), TRI, getSubReg()); - - if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isInternalRead() || isEarlyClobber() || isTied()) { - OS << '<'; - bool NeedComma = false; - if (isDef()) { - if (NeedComma) OS << ','; - if (isEarlyClobber()) - OS << "earlyclobber,"; - if (isImplicit()) - OS << "imp-"; - OS << "def"; - NeedComma = true; - // only makes sense when getSubReg() is set. - // Don't clutter the output otherwise. - if (isUndef() && getSubReg()) - OS << ",read-undef"; - } else if (isImplicit()) { - OS << "imp-use"; - NeedComma = true; - } - - if (isKill()) { - if (NeedComma) OS << ','; - OS << "kill"; - NeedComma = true; - } - if (isDead()) { - if (NeedComma) OS << ','; - OS << "dead"; - NeedComma = true; - } - if (isUndef() && isUse()) { - if (NeedComma) OS << ','; - OS << "undef"; - NeedComma = true; - } - if (isInternalRead()) { - if (NeedComma) OS << ','; - OS << "internal"; - NeedComma = true; - } - if (isTied()) { - if (NeedComma) OS << ','; - OS << "tied"; - if (TiedTo != 15) - OS << unsigned(TiedTo - 1); - } - OS << '>'; - } - break; - case MachineOperand::MO_Immediate: - OS << getImm(); - break; - case MachineOperand::MO_CImmediate: - getCImm()->getValue().print(OS, false); - break; - case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType()->isFloatTy()) { - OS << getFPImm()->getValueAPF().convertToFloat(); - } else if (getFPImm()->getType()->isHalfTy()) { - APFloat APF = getFPImm()->getValueAPF(); - bool Unused; - APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused); - OS << "half " << APF.convertToFloat(); - } else if (getFPImm()->getType()->isFP128Ty()) { - APFloat APF = getFPImm()->getValueAPF(); - SmallString<16> Str; - getFPImm()->getValueAPF().toString(Str); - OS << "quad " << Str; - } else if (getFPImm()->getType()->isX86_FP80Ty()) { - APFloat APF = getFPImm()->getValueAPF(); - OS << "x86_fp80 0xK"; - APInt API = APF.bitcastToAPInt(); - OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, - /*Upper=*/true); - OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, - /*Upper=*/true); - } else { - OS << getFPImm()->getValueAPF().convertToDouble(); - } - break; - case MachineOperand::MO_MachineBasicBlock: - OS << "getNumber() << ">"; - break; - case MachineOperand::MO_FrameIndex: - OS << "'; - break; - case MachineOperand::MO_ConstantPoolIndex: - OS << "'; - break; - case MachineOperand::MO_TargetIndex: - OS << "'; - break; - case MachineOperand::MO_JumpTableIndex: - OS << "'; - break; - case MachineOperand::MO_GlobalAddress: - OS << "printAsOperand(OS, /*PrintType=*/false, MST); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_ExternalSymbol: - OS << "'; - break; - case MachineOperand::MO_BlockAddress: - OS << '<'; - getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST); - if (getOffset()) OS << "+" << getOffset(); - OS << '>'; - break; - case MachineOperand::MO_RegisterMask: { - unsigned NumRegsInMask = 0; - unsigned NumRegsEmitted = 0; - OS << "getNumRegs(); ++i) { - unsigned MaskWord = i / 32; - unsigned MaskBit = i % 32; - if (getRegMask()[MaskWord] & (1 << MaskBit)) { - if (PrintRegMaskNumRegs < 0 || - NumRegsEmitted <= static_cast(PrintRegMaskNumRegs)) { - OS << " " << printReg(i, TRI); - NumRegsEmitted++; - } - NumRegsInMask++; - } - } - if (NumRegsEmitted != NumRegsInMask) - OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; - OS << ">"; - break; - } - case MachineOperand::MO_RegisterLiveOut: - OS << ""; - break; - case MachineOperand::MO_Metadata: - OS << '<'; - getMetadata()->printAsOperand(OS, MST); - OS << '>'; - break; - case MachineOperand::MO_MCSymbol: - OS << "'; - break; - case MachineOperand::MO_CFIIndex: - OS << ""; - break; - case MachineOperand::MO_IntrinsicID: { - Intrinsic::ID ID = getIntrinsicID(); - if (ID < Intrinsic::num_intrinsics) - OS << "'; - else if (IntrinsicInfo) - OS << "getName(ID) << '>'; - else - OS << "'; - break; - } - case MachineOperand::MO_Predicate: { - auto Pred = static_cast(getPredicate()); - OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred") - << CmpInst::getPredicateName(Pred) << '>'; - break; - } - } - if (unsigned TF = getTargetFlags()) - OS << "[TF=" << TF << ']'; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineOperand::dump() const { - dbgs() << *this << '\n'; -} -#endif - -//===----------------------------------------------------------------------===// -// MachineMemOperand Implementation -//===----------------------------------------------------------------------===// - -/// getAddrSpace - Return the LLVM IR address space number that this pointer -/// points into. -unsigned MachinePointerInfo::getAddrSpace() const { - if (V.isNull()) return 0; - - if (V.is()) - return V.get()->getAddressSpace(); - - return cast(V.get()->getType())->getAddressSpace(); -} - -/// isDereferenceable - Return true if V is always dereferenceable for -/// Offset + Size byte. -bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, - const DataLayout &DL) const { - if (!V.is()) - return false; - - const Value *BasePtr = V.get(); - if (BasePtr == nullptr) - return false; - - return isDereferenceableAndAlignedPointer( - BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); -} - -/// getConstantPool - Return a MachinePointerInfo record that refers to the -/// constant pool. -MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getConstantPool()); -} - -/// getFixedStack - Return a MachinePointerInfo record that refers to the -/// the specified FrameIndex. -MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, - int FI, int64_t Offset) { - return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); -} - -MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getJumpTable()); -} - -MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { - return MachinePointerInfo(MF.getPSVManager().getGOT()); -} - -MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, - int64_t Offset, - uint8_t ID) { - return MachinePointerInfo(MF.getPSVManager().getStack(), Offset,ID); +static const MachineFunction *getMFIfAvailable(const MachineInstr &MI) { + if (const MachineBasicBlock *MBB = MI.getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF; + return nullptr; } -MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, - uint64_t s, unsigned int a, - const AAMDNodes &AAInfo, - const MDNode *Ranges, - SyncScope::ID SSID, - AtomicOrdering Ordering, - AtomicOrdering FailureOrdering) - : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), - AAInfo(AAInfo), Ranges(Ranges) { - assert((PtrInfo.V.isNull() || PtrInfo.V.is() || - isa(PtrInfo.V.get()->getType())) && - "invalid pointer value"); - assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); - assert((isLoad() || isStore()) && "Not a load/store!"); - - AtomicInfo.SSID = static_cast(SSID); - assert(getSyncScopeID() == SSID && "Value truncated"); - AtomicInfo.Ordering = static_cast(Ordering); - assert(getOrdering() == Ordering && "Value truncated"); - AtomicInfo.FailureOrdering = static_cast(FailureOrdering); - assert(getFailureOrdering() == FailureOrdering && "Value truncated"); -} +// Try to crawl up to the machine function and get TRI and IntrinsicInfo from +// it. +static void tryToGetTargetInfo(const MachineInstr &MI, + const TargetRegisterInfo *&TRI, + const MachineRegisterInfo *&MRI, + const TargetIntrinsicInfo *&IntrinsicInfo, + const TargetInstrInfo *&TII) { -/// Profile - Gather unique data for the object. -/// -void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getOffset()); - ID.AddInteger(Size); - ID.AddPointer(getOpaqueValue()); - ID.AddInteger(getFlags()); - ID.AddInteger(getBaseAlignment()); -} - -void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { - // The Value and Offset may differ due to CSE. But the flags and size - // should be the same. - assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); - assert(MMO->getSize() == getSize() && "Size mismatch!"); - - if (MMO->getBaseAlignment() >= getBaseAlignment()) { - // Update the alignment value. - BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; - // Also update the base and offset, because the new alignment may - // not be applicable with the old ones. - PtrInfo = MMO->PtrInfo; + if (const MachineFunction *MF = getMFIfAvailable(MI)) { + TRI = MF->getSubtarget().getRegisterInfo(); + MRI = &MF->getRegInfo(); + IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); + TII = MF->getSubtarget().getInstrInfo(); } } -/// getAlignment - Return the minimum known alignment in bytes of the -/// actual memory reference. -uint64_t MachineMemOperand::getAlignment() const { - return MinAlign(getBaseAlignment(), getOffset()); -} - -void MachineMemOperand::print(raw_ostream &OS) const { - ModuleSlotTracker DummyMST(nullptr); - print(OS, DummyMST); -} -void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { - assert((isLoad() || isStore()) && - "SV has to be a load, store or both."); - - if (isVolatile()) - OS << "Volatile "; - - if (isLoad()) - OS << "LD"; - if (isStore()) - OS << "ST"; - OS << getSize(); - - // Print the address information. - OS << "["; - if (const Value *V = getValue()) - V->printAsOperand(OS, /*PrintType=*/false, MST); - else if (const PseudoSourceValue *PSV = getPseudoValue()) - PSV->printCustom(OS); - else - OS << ""; - - unsigned AS = getAddrSpace(); - if (AS != 0) - OS << "(addrspace=" << AS << ')'; - - // If the alignment of the memory reference itself differs from the alignment - // of the base pointer, print the base alignment explicitly, next to the base - // pointer. - if (getBaseAlignment() != getAlignment()) - OS << "(align=" << getBaseAlignment() << ")"; - - if (getOffset() != 0) - OS << "+" << getOffset(); - OS << "]"; - - // Print the alignment of the reference. - if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) - OS << "(align=" << getAlignment() << ")"; - - // Print TBAA info. - if (const MDNode *TBAAInfo = getAAInfo().TBAA) { - OS << "(tbaa="; - if (TBAAInfo->getNumOperands() > 0) - TBAAInfo->getOperand(0)->printAsOperand(OS, MST); - else - OS << ""; - OS << ")"; - } - - // Print AA scope info. - if (const MDNode *ScopeInfo = getAAInfo().Scope) { - OS << "(alias.scope="; - if (ScopeInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { - ScopeInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie-1) - OS << ","; - } - else - OS << ""; - OS << ")"; - } - - // Print AA noalias scope info. - if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { - OS << "(noalias="; - if (NoAliasInfo->getNumOperands() > 0) - for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { - NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); - if (i != ie-1) - OS << ","; - } - else - OS << ""; - OS << ")"; - } - - if (const MDNode *Ranges = getRanges()) { - unsigned NumRanges = Ranges->getNumOperands(); - if (NumRanges != 0) { - OS << "(ranges="; - - for (unsigned I = 0; I != NumRanges; ++I) { - Ranges->getOperand(I)->printAsOperand(OS, MST); - if (I != NumRanges - 1) - OS << ','; - } - - OS << ')'; - } - } - - if (isNonTemporal()) - OS << "(nontemporal)"; - if (isDereferenceable()) - OS << "(dereferenceable)"; - if (isInvariant()) - OS << "(invariant)"; - if (getFlags() & MOTargetFlag1) - OS << "(flag1)"; - if (getFlags() & MOTargetFlag2) - OS << "(flag2)"; - if (getFlags() & MOTargetFlag3) - OS << "(flag3)"; -} - -//===----------------------------------------------------------------------===// -// MachineInstr Implementation -//===----------------------------------------------------------------------===// - void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; @@ -1467,7 +766,7 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg, if (MO.isUse()) Use |= !MO.isUndef(); else if (MO.getSubReg() && !MO.isUndef()) - // A partial doesn't count as reading the register. + // A partial def undef doesn't count as reading the register. PartDef = true; else FullDef = true; @@ -1888,6 +1187,41 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF, } } +bool MachineInstr::hasComplexRegisterTies() const { + const MCInstrDesc &MCID = getDesc(); + for (unsigned I = 0, E = getNumOperands(); I < E; ++I) { + const auto &Operand = getOperand(I); + if (!Operand.isReg() || Operand.isDef()) + // Ignore the defined registers as MCID marks only the uses as tied. + continue; + int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); + int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1; + if (ExpectedTiedIdx != TiedIdx) + return true; + } + return false; +} + +LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes, + const MachineRegisterInfo &MRI) const { + const MachineOperand &Op = getOperand(OpIdx); + if (!Op.isReg()) + return LLT{}; + + if (isVariadic() || OpIdx >= getNumExplicitOperands()) + return MRI.getType(Op.getReg()); + + auto &OpInfo = getDesc().OpInfo[OpIdx]; + if (!OpInfo.isGenericType()) + return MRI.getType(Op.getReg()); + + if (PrintedTypes[OpInfo.getGenericTypeIndex()]) + return LLT{}; + + PrintedTypes.set(OpInfo.getGenericTypeIndex()); + return MRI.getType(Op.getReg()); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MachineInstr::dump() const { dbgs() << " "; @@ -1895,60 +1229,71 @@ LLVM_DUMP_METHOD void MachineInstr::dump() const { } #endif -void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc, - const TargetInstrInfo *TII) const { +void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers, + bool SkipDebugLoc, const TargetInstrInfo *TII) const { const Module *M = nullptr; - if (const MachineBasicBlock *MBB = getParent()) - if (const MachineFunction *MF = MBB->getParent()) - M = MF->getFunction()->getParent(); + const Function *F = nullptr; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + F = &MF->getFunction(); + M = F->getParent(); + } ModuleSlotTracker MST(M); - print(OS, MST, SkipOpers, SkipDebugLoc, TII); + if (F) + MST.incorporateFunction(*F); + print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII); } void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, - bool SkipOpers, bool SkipDebugLoc, + bool IsStandalone, bool SkipOpers, bool SkipDebugLoc, const TargetInstrInfo *TII) const { // We can be a bit tidier if we know the MachineFunction. const MachineFunction *MF = nullptr; const TargetRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; const TargetIntrinsicInfo *IntrinsicInfo = nullptr; + tryToGetTargetInfo(*this, TRI, MRI, IntrinsicInfo, TII); + + if (isCFIInstruction()) + assert(getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); + + SmallBitVector PrintedTypes(8); + bool ShouldPrintRegisterTies = hasComplexRegisterTies(); + auto getTiedOperandIdx = [&](unsigned OpIdx) { + if (!ShouldPrintRegisterTies) + return 0U; + const MachineOperand &MO = getOperand(OpIdx); + if (MO.isReg() && MO.isTied() && !MO.isDef()) + return findTiedOperandIdx(OpIdx); + return 0U; + }; + unsigned StartOp = 0; + unsigned e = getNumOperands(); - if (const MachineBasicBlock *MBB = getParent()) { - MF = MBB->getParent(); - if (MF) { - MRI = &MF->getRegInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - if (!TII) - TII = MF->getSubtarget().getInstrInfo(); - IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); - } - } + // Print explicitly defined operands on the left of an assignment syntax. + while (StartOp < e) { + const MachineOperand &MO = getOperand(StartOp); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + break; - // Save a list of virtual registers. - SmallVector VirtRegs; + if (StartOp != 0) + OS << ", "; - // Print explicitly defined operands on the left of an assignment syntax. - unsigned StartOp = 0, e = getNumOperands(); - for (; StartOp < e && getOperand(StartOp).isReg() && - getOperand(StartOp).isDef() && - !getOperand(StartOp).isImplicit(); - ++StartOp) { - if (StartOp != 0) OS << ", "; - getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo); - unsigned Reg = getOperand(StartOp).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - VirtRegs.push_back(Reg); - LLT Ty = MRI ? MRI->getType(Reg) : LLT{}; - if (Ty.isValid()) - OS << '(' << Ty << ')'; - } + LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(StartOp); + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + ++StartOp; } if (StartOp != 0) OS << " = "; + if (getFlag(MachineInstr::FrameSetup)) + OS << "frame-setup "; + else if (getFlag(MachineInstr::FrameDestroy)) + OS << "frame-destroy "; + // Print the opcode name. if (TII) OS << TII->getName(getOpcode()); @@ -1966,7 +1311,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { // Print asm string. OS << " "; - getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI); + const unsigned OpIdx = InlineAsm::MIOp_AsmString; + LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx); + getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, + IntrinsicInfo); // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1992,28 +1342,20 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - VirtRegs.push_back(MO.getReg()); - if (FirstOp) FirstOp = false; else OS << ","; OS << " "; - if (i < getDesc().NumOperands) { - const MCOperandInfo &MCOI = getDesc().OpInfo[i]; - if (MCOI.isPredicate()) - OS << "pred:"; - if (MCOI.isOptionalDef()) - OS << "opt:"; - } + if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. auto *DIV = dyn_cast(MO.getMetadata()); if (DIV && !DIV->getName().empty()) OS << "!\"" << DIV->getName() << '\"'; - else - MO.print(OS, MST, TRI); - } else if (TRI && (isInsertSubreg() || isRegSequence() || - (isSubregToReg() && i == 3)) && MO.isImm()) { - OS << TRI->getSubRegIndexName(MO.getImm()); + else { + LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(i); + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + } } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -2072,26 +1414,27 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, // Compute the index of the next operand descriptor. AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); - } else - MO.print(OS, MST, TRI); + } else { + LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{}; + unsigned TiedOperandIdx = getTiedOperandIdx(i); + if (MO.isImm() && isOperandSubregIdx(i)) + MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI); + else + MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone, + ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo); + } } - bool HaveSemi = false; - const unsigned PrintableFlags = FrameSetup | FrameDestroy; - if (Flags & PrintableFlags) { - if (!HaveSemi) { - OS << ";"; - HaveSemi = true; + if (!SkipDebugLoc) { + if (const DebugLoc &DL = getDebugLoc()) { + if (!FirstOp) + OS << ','; + OS << " debug-location "; + DL->printAsOperand(OS, MST); } - OS << " flags: "; - - if (Flags & FrameSetup) - OS << "FrameSetup"; - - if (Flags & FrameDestroy) - OS << "FrameDestroy"; } + bool HaveSemi = false; if (!memoperands_empty()) { if (!HaveSemi) { OS << ";"; @@ -2107,34 +1450,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } } - // Print the regclass of any virtual registers encountered. - if (MRI && !VirtRegs.empty()) { - if (!HaveSemi) { - OS << ";"; - HaveSemi = true; - } - for (unsigned i = 0; i != VirtRegs.size(); ++i) { - const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]); - if (!RC) - continue; - // Generic virtual registers do not have register classes. - if (RC.is()) - OS << " " << RC.get()->getName(); - else - OS << " " - << TRI->getRegClassName(RC.get()); - OS << ':' << printReg(VirtRegs[i]); - for (unsigned j = i+1; j != VirtRegs.size();) { - if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) { - ++j; - continue; - } - if (VirtRegs[i] != VirtRegs[j]) - OS << "," << printReg(VirtRegs[j]); - VirtRegs.erase(VirtRegs.begin()+j); - } - } - } + if (SkipDebugLoc) + return; // Print debug location information. if (isDebugValue() && getOperand(e - 2).isMetadata()) { @@ -2152,13 +1469,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } if (isIndirectDebugValue()) OS << " indirect"; - } else if (SkipDebugLoc) { - return; - } else if (debugLoc && MF) { - if (!HaveSemi) - OS << ";"; - OS << " dbg:"; - debugLoc.print(OS); } OS << '\n'; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 3e622b4a23c0..2c1b4f09a326 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -85,14 +85,14 @@ STATISTIC(NumPostRAHoisted, namespace { - class MachineLICM : public MachineFunctionPass { + class MachineLICMBase : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; TargetSchedModel SchedModel; - bool PreRegAlloc = true; + bool PreRegAlloc; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -138,16 +138,8 @@ namespace { unsigned SpeculationState; public: - static char ID; // Pass identification, replacement for typeid - - MachineLICM() : MachineFunctionPass(ID) { - initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } - - explicit MachineLICM(bool PreRA) - : MachineFunctionPass(ID), PreRegAlloc(PreRA) { - initializeMachineLICMPass(*PassRegistry::getPassRegistry()); - } + MachineLICMBase(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -252,11 +244,29 @@ namespace { MachineBasicBlock *getCurPreheader(); }; + class MachineLICM : public MachineLICMBase { + public: + static char ID; + MachineLICM() : MachineLICMBase(ID, false) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + }; + + class EarlyMachineLICM : public MachineLICMBase { + public: + static char ID; + EarlyMachineLICM() : MachineLICMBase(ID, true) { + initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry()); + } + }; + } // end anonymous namespace -char MachineLICM::ID = 0; +char MachineLICM::ID; +char EarlyMachineLICM::ID; char &llvm::MachineLICMID = MachineLICM::ID; +char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) @@ -266,6 +276,14 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", + "Early Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", + "Early Machine Loop Invariant Code Motion", false, false) + /// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { // Check whether this loop even has a unique predecessor. @@ -279,8 +297,8 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { return true; } -bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) +bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) return false; Changed = FirstInLoop = false; @@ -368,11 +386,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { /// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. -void MachineLICM::ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet &StoredFIs, - SmallVectorImpl &Candidates) { +void MachineLICMBase::ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, + SmallSet &StoredFIs, + SmallVectorImpl &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -455,7 +473,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// Walk the specified region of the CFG and hoist loop invariants out to the /// preheader. -void MachineLICM::HoistRegionPostRA() { +void MachineLICMBase::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -541,7 +559,7 @@ void MachineLICM::HoistRegionPostRA() { /// Add register 'Reg' to the livein sets of BBs in the current loop, and make /// sure it is not killed by any instructions in the loop. -void MachineLICM::AddToLiveIns(unsigned Reg) { +void MachineLICMBase::AddToLiveIns(unsigned Reg) { const std::vector &Blocks = CurLoop->getBlocks(); for (MachineBasicBlock *BB : Blocks) { if (!BB->isLiveIn(Reg)) @@ -558,13 +576,13 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// When an instruction is found to only use loop invariant operands that is /// safe to hoist, this instruction is called to do the dirty work. -void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { +void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); // Now move the instructions to the predecessor, inserting it before any // terminator instructions. - DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" - << MI->getParent()->getNumber() << ": " << *MI); + DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) << " from " + << printMBBReference(*MI->getParent()) << ": " << *MI); // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); @@ -581,7 +599,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { /// Check if this mbb is guaranteed to execute. If not then a load from this mbb /// may not be safe to hoist. -bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { +bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -600,24 +618,24 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { return true; } -void MachineLICM::EnterScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n'); +void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); // Remember livein register pressure. BackTrace.push_back(RegPressure); } -void MachineLICM::ExitScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n'); +void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); BackTrace.pop_back(); } /// Destroy scope for the MBB that corresponds to the given dominator tree node /// if its a leaf or all of its children are done. Walk up the dominator tree to /// destroy ancestors which are now done. -void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap) { +void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap &OpenChildren, + DenseMap &ParentMap) { if (OpenChildren[Node]) return; @@ -638,7 +656,7 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, /// specified header block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. -void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { +void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -719,7 +737,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. -void MachineLICM::SinkIntoLoop() { +void MachineLICMBase::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) return; @@ -773,7 +791,7 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { /// Find all virtual register references that are liveout of the preheader to /// initialize the starting "register pressure". Note this does not count live /// through (livein but not used) registers. -void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { +void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) { std::fill(RegPressure.begin(), RegPressure.end(), 0); // If the preheader has only a single predecessor and it ends with a @@ -792,8 +810,8 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { } /// Update estimate of register pressure after the specified instruction. -void MachineLICM::UpdateRegPressure(const MachineInstr *MI, - bool ConsiderUnseenAsDef) { +void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); for (const auto &RPIdAndCost : Cost) { unsigned Class = RPIdAndCost.first; @@ -811,8 +829,8 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI, /// figure out which usages are live-ins. /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap -MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, - bool ConsiderUnseenAsDef) { +MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, + bool ConsiderUnseenAsDef) { DenseMap Cost; if (MI->isImplicitDef()) return Cost; @@ -873,7 +891,7 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { /// Returns true if the instruction may be a suitable candidate for LICM. /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. -bool MachineLICM::IsLICMCandidate(MachineInstr &I) { +bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; if (!I.isSafeToMove(AA, DontMoveAcrossStore)) @@ -896,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { /// I.e., all virtual register operands are defined outside of the loop, /// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. -bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { +bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { if (!IsLICMCandidate(I)) return false; @@ -949,7 +967,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { /// Return true if the specified instruction is used by a phi node and hoisting /// it could cause a copy to be inserted. -bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { +bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector Work(1, MI); do { MI = Work.pop_back_val(); @@ -984,8 +1002,9 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { /// Compute operand latency between a def of 'Reg' and an use in the current /// loop, return true if the target considered it high. -bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, - unsigned DefIdx, unsigned Reg) const { +bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, + unsigned Reg) const { if (MRI->use_nodbg_empty(Reg)) return false; @@ -1015,7 +1034,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// Return true if the instruction is marked "cheap" or the operand latency /// between its def and a use is one or less. -bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { +bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike()) return true; @@ -1040,8 +1059,9 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// Visit BBs from header to current BB, check if hoisting an instruction of the /// given cost matrix can cause high register pressure. -bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, - bool CheapInstr) { +bool +MachineLICMBase::CanCauseHighRegPressure(const DenseMap& Cost, + bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { if (RPIdAndCost.second <= 0) continue; @@ -1065,7 +1085,7 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, /// Traverse the back trace from header to the current block and update their /// register pressures to reflect the effect of hoisting MI from the current /// block to the preheader. -void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { +void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) { // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, @@ -1079,7 +1099,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { /// Return true if it is potentially profitable to hoist the given loop /// invariant. -bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { +bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; @@ -1171,7 +1191,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { /// Unfold a load from the given machineinstr if the load itself could be /// hoisted. Return the unfolded and hoistable load, or null if the load /// couldn't be unfolded or if it wouldn't be hoistable. -MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { +MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) return nullptr; @@ -1229,7 +1249,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /// Initialize the CSE map with instructions that are in the current loop /// preheader that may become duplicates of instructions that are hoisted /// out of the loop. -void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { +void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { for (MachineInstr &MI : *BB) CSEMap[MI.getOpcode()].push_back(&MI); } @@ -1237,8 +1257,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { /// Find an instruction amount PrevMIs that is a duplicate of MI. /// Return this instruction if it's found. const MachineInstr* -MachineLICM::LookForDuplicate(const MachineInstr *MI, - std::vector &PrevMIs) { +MachineLICMBase::LookForDuplicate(const MachineInstr *MI, + std::vector &PrevMIs) { for (const MachineInstr *PrevMI : PrevMIs) if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; @@ -1250,8 +1270,8 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, /// computes the same value. If it's found, do a RAU on with the definition of /// the existing instruction rather than hoisting the instruction to the /// preheader. -bool MachineLICM::EliminateCSE(MachineInstr *MI, - DenseMap>::iterator &CI) { +bool MachineLICMBase::EliminateCSE(MachineInstr *MI, + DenseMap>::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate // the undef property onto uses. if (CI == CSEMap.end() || MI->isImplicitDef()) @@ -1308,7 +1328,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// Return true if the given instruction will be CSE'd if it's hoisted out of /// the loop. -bool MachineLICM::MayCSE(MachineInstr *MI) { +bool MachineLICMBase::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); DenseMap>::iterator CI = CSEMap.find(Opcode); @@ -1323,7 +1343,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { /// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// It returns true if the instruction is hoisted. -bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { +bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -1336,9 +1356,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { DEBUG({ dbgs() << "Hoisting " << *MI; if (MI->getParent()->getBasicBlock()) - dbgs() << " from BB#" << MI->getParent()->getNumber(); + dbgs() << " from " << printMBBReference(*MI->getParent()); if (Preheader->getBasicBlock()) - dbgs() << " to BB#" << Preheader->getNumber(); + dbgs() << " to " << printMBBReference(*Preheader); dbgs() << "\n"; }); @@ -1386,7 +1406,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { } /// Get the preheader for the current loop, splitting a critical edge if needed. -MachineBasicBlock *MachineLICM::getCurPreheader() { +MachineBasicBlock *MachineLICMBase::getCurPreheader() { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 8db75d48b207..8f0b89657d02 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -277,7 +276,8 @@ MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { MachineFunction *MF; if (I.second) { // No pre-existing machine function, create a new one. - MF = new MachineFunction(&F, TM, NextFnNum++, *this); + const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F); + MF = new MachineFunction(F, TM, STI, NextFnNum++, *this); // Update the set entry. I.first->second.reset(MF); } else { diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp new file mode 100644 index 000000000000..9122edefac7e --- /dev/null +++ b/lib/CodeGen/MachineOperand.cpp @@ -0,0 +1,1068 @@ +//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Methods common to all machine operands. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +static cl::opt + PrintRegMaskNumRegs("print-regmask-num-regs", + cl::desc("Number of registers to limit to when " + "printing regmask operands in IR dumps. " + "unlimited = -1"), + cl::init(32), cl::Hidden); + +static const MachineFunction *getMFIfAvailable(const MachineOperand &MO) { + if (const MachineInstr *MI = MO.getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF; + return nullptr; +} +static MachineFunction *getMFIfAvailable(MachineOperand &MO) { + return const_cast( + getMFIfAvailable(const_cast(MO))); +} + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) + return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineFunction *MF = getMFIfAvailable(*this)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + SmallContents.RegNo = Reg; + MRI.addRegOperandToUseList(this); + return; + } + + // Otherwise, just change the register, no problem. :) + SmallContents.RegNo = Reg; +} + +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + // Note that getSubReg() may return 0 if the sub-register doesn't exist. + // That won't happen in legal code. + setSubReg(0); + if (isDef()) + setIsUndef(false); + } + setReg(Reg); +} + +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + assert(!IsDeadOrKill && "Changing def/use with dead/kill set not supported"); + // MRI may keep uses and defs in different list positions. + if (MachineFunction *MF = getMFIfAvailable(*this)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + +bool MachineOperand::isRenamable() const { + assert(isReg() && "Wrong MachineOperand accessor"); + assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + "isRenamable should only be checked on physical registers"); + return IsRenamable; +} + +void MachineOperand::setIsRenamable(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert(TargetRegisterInfo::isPhysicalRegister(getReg()) && + "setIsRenamable should only be called on physical registers"); + if (const MachineInstr *MI = getParent()) + if ((isDef() && MI->hasExtraDefRegAllocReq()) || + (isUse() && MI->hasExtraSrcRegAllocReq())) + assert(!Val && "isRenamable should be false for " + "hasExtraDefRegAllocReq/hasExtraSrcRegAllocReq opcodes"); + IsRenamable = Val; +} + +void MachineOperand::setIsRenamableIfNoExtraRegAllocReq() { + if (const MachineInstr *MI = getParent()) + if ((isDef() && MI->hasExtraDefRegAllocReq()) || + (isUse() && MI->hasExtraSrcRegAllocReq())) + return; + + setIsRenamable(true); +} + +// If this operand is currently a register operand, and if this is in a +// function, deregister the operand from the register's use/def list. +void MachineOperand::removeRegFromUses() { + if (!isReg() || !isOnRegUseList()) + return; + + if (MachineFunction *MF = getMFIfAvailable(*this)) + MF->getRegInfo().removeRegOperandFromUseList(this); +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_FPImmediate; + Contents.CFP = FPImm; +} + +void MachineOperand::ChangeToES(const char *SymName, + unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an external symbol"); + + removeRegFromUses(); + + OpKind = MO_ExternalSymbol; + Contents.OffsetedInfo.Val.SymbolName = SymName; + setOffset(0); // Offset is always 0. + setTargetFlags(TargetFlags); +} + +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an MCSymbol"); + + removeRegFromUses(); + + OpKind = MO_MCSymbol; + Contents.Sym = Sym; +} + +void MachineOperand::ChangeToFrameIndex(int Idx) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_FrameIndex; + setIndex(Idx); +} + +void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset, + unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into a FrameIndex"); + + removeRegFromUses(); + + OpKind = MO_TargetIndex; + setIndex(Idx); + setOffset(Offset); + setTargetFlags(TargetFlags); +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead, bool isUndef, + bool isDebug) { + MachineRegisterInfo *RegInfo = nullptr; + if (MachineFunction *MF = getMFIfAvailable(*this)) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the + // register's use/def lists. + bool WasReg = isReg(); + if (RegInfo && WasReg) + RegInfo->removeRegOperandFromUseList(this); + + // Change this to a register and set the reg#. + assert(!(isDead && !isDef) && "Dead flag on non-def"); + assert(!(isKill && isDef) && "Kill flag on def"); + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg_TargetFlags = 0; + IsDef = isDef; + IsImp = isImp; + IsDeadOrKill = isKill | isDead; + IsRenamable = false; + IsUndef = isUndef; + IsInternalRead = false; + IsEarlyClobber = false; + IsDebug = isDebug; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = nullptr; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. Note that this should stay in sync with the hash_value overload +/// below. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType() || + getTargetFlags() != Other.getTargetFlags()) + return false; + + switch (getType()) { + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && + getOffset() == Other.getOffset(); + case MachineOperand::MO_BlockAddress: + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: { + // Shallow compare of the two RegMasks + const uint32_t *RegMask = getRegMask(); + const uint32_t *OtherRegMask = Other.getRegMask(); + if (RegMask == OtherRegMask) + return true; + + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + // Calculate the size of the RegMask + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + + // Deep compare of the two RegMasks + return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask); + } + // We don't know the size of the RegMask, so we can't deep compare the two + // reg masks. + return false; + } + case MachineOperand::MO_MCSymbol: + return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_CFIIndex: + return getCFIIndex() == Other.getCFIIndex(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); + case MachineOperand::MO_IntrinsicID: + return getIntrinsicID() == Other.getIntrinsicID(); + case MachineOperand::MO_Predicate: + return getPredicate() == Other.getPredicate(); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getBlockAddress(), + MO.getOffset()); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_CFIIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + case MachineOperand::MO_IntrinsicID: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); + case MachineOperand::MO_Predicate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Try to crawl up to the machine function and get TRI and IntrinsicInfo from +// it. +static void tryToGetTargetInfo(const MachineOperand &MO, + const TargetRegisterInfo *&TRI, + const TargetIntrinsicInfo *&IntrinsicInfo) { + if (const MachineFunction *MF = getMFIfAvailable(MO)) { + TRI = MF->getSubtarget().getRegisterInfo(); + IntrinsicInfo = MF->getTarget().getIntrinsicInfo(); + } +} + +static const char *getTargetIndexName(const MachineFunction &MF, int Index) { + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + auto Found = find_if(Indices, [&](const std::pair &I) { + return I.first == Index; + }); + if (Found != Indices.end()) + return Found->second; + return nullptr; +} + +static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TF) { + return I.second; + } + } + return nullptr; +} + +static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + if (!TRI) { + OS << "%dwarfreg." << DwarfReg; + return; + } + + int Reg = TRI->getLLVMRegNum(DwarfReg, true); + if (Reg == -1) { + OS << ""; + return; + } + OS << printReg(Reg, TRI); +} + +static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB, + ModuleSlotTracker &MST) { + OS << "%ir-block."; + if (BB.hasName()) { + printLLVMNameWithoutPrefix(OS, BB.getName()); + return; + } + Optional Slot; + if (const Function *F = BB.getParent()) { + if (F == MST.getCurrentFunction()) { + Slot = MST.getLocalSlot(&BB); + } else if (const Module *M = F->getParent()) { + ModuleSlotTracker CustomMST(M, /*ShouldInitializeAllMetadata=*/false); + CustomMST.incorporateFunction(*F); + Slot = CustomMST.getLocalSlot(&BB); + } + } + if (Slot) + MachineOperand::printIRSlotNumber(OS, *Slot); + else + OS << ""; +} + +void MachineOperand::printSubRegIdx(raw_ostream &OS, uint64_t Index, + const TargetRegisterInfo *TRI) { + OS << "%subreg."; + if (TRI) + OS << TRI->getSubRegIndexName(Index); + else + OS << Index; +} + +void MachineOperand::printTargetFlags(raw_ostream &OS, + const MachineOperand &Op) { + if (!Op.getTargetFlags()) + return; + const MachineFunction *MF = getMFIfAvailable(Op); + if (!MF) + return; + + const auto *TII = MF->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); + OS << "target-flags("; + const bool HasDirectFlags = Flags.first; + const bool HasBitmaskFlags = Flags.second; + if (!HasDirectFlags && !HasBitmaskFlags) { + OS << ") "; + return; + } + if (HasDirectFlags) { + if (const auto *Name = getTargetFlagName(TII, Flags.first)) + OS << Name; + else + OS << ""; + } + if (!HasBitmaskFlags) { + OS << ") "; + return; + } + bool IsCommaNeeded = HasDirectFlags; + unsigned BitMask = Flags.second; + auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &Mask : BitMasks) { + // Check if the flag's bitmask has the bits of the current mask set. + if ((BitMask & Mask.first) == Mask.first) { + if (IsCommaNeeded) + OS << ", "; + IsCommaNeeded = true; + OS << Mask.second; + // Clear the bits which were serialized from the flag's bitmask. + BitMask &= ~(Mask.first); + } + } + if (BitMask) { + // When the resulting flag's bitmask isn't zero, we know that we didn't + // serialize all of the bit flags. + if (IsCommaNeeded) + OS << ", "; + OS << ""; + } + OS << ") "; +} + +void MachineOperand::printSymbol(raw_ostream &OS, MCSymbol &Sym) { + OS << ""; +} + +void MachineOperand::printStackObjectReference(raw_ostream &OS, + unsigned FrameIndex, + bool IsFixed, StringRef Name) { + if (IsFixed) { + OS << "%fixed-stack." << FrameIndex; + return; + } + + OS << "%stack." << FrameIndex; + if (!Name.empty()) + OS << '.' << Name; +} + +void MachineOperand::printOperandOffset(raw_ostream &OS, int64_t Offset) { + if (Offset == 0) + return; + if (Offset < 0) { + OS << " - " << -Offset; + return; + } + OS << " + " << Offset; +} + +void MachineOperand::printIRSlotNumber(raw_ostream &OS, int Slot) { + if (Slot == -1) + OS << ""; + else + OS << Slot; +} + +static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, + const TargetRegisterInfo *TRI) { + switch (CFI.getOperation()) { + case MCCFIInstruction::OpSameValue: + OS << "same_value "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpRememberState: + OS << "remember_state "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; + case MCCFIInstruction::OpRestoreState: + OS << "restore_state "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; + case MCCFIInstruction::OpOffset: + OS << "offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfaRegister: + OS << "def_cfa_register "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpDefCfaOffset: + OS << "def_cfa_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + OS << "def_cfa "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpRelOffset: + OS << "rel_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + OS << "adjust_cfa_offset "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpRestore: + OS << "restore "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpEscape: { + OS << "escape "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + if (!CFI.getValues().empty()) { + size_t e = CFI.getValues().size() - 1; + for (size_t i = 0; i < e; ++i) + OS << format("0x%02x", uint8_t(CFI.getValues()[i])) << ", "; + OS << format("0x%02x", uint8_t(CFI.getValues()[e])) << ", "; + } + break; + } + case MCCFIInstruction::OpUndefined: + OS << "undefined "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpRegister: + OS << "register "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", "; + printCFIRegister(CFI.getRegister2(), OS, TRI); + break; + case MCCFIInstruction::OpWindowSave: + OS << "window_save "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); + break; + default: + // TODO: Print the other CFI Operations. + OS << ""; + break; + } +} + +void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { + tryToGetTargetInfo(*this, TRI, IntrinsicInfo); + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST, LLT{}, /*PrintDef=*/false, /*IsStandalone=*/true, + /*ShouldPrintRegisterTies=*/true, + /*TiedOperandIdx=*/0, TRI, IntrinsicInfo); +} + +void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, + LLT TypeToPrint, bool PrintDef, bool IsStandalone, + bool ShouldPrintRegisterTies, + unsigned TiedOperandIdx, + const TargetRegisterInfo *TRI, + const TargetIntrinsicInfo *IntrinsicInfo) const { + printTargetFlags(OS, *this); + switch (getType()) { + case MachineOperand::MO_Register: { + unsigned Reg = getReg(); + if (isImplicit()) + OS << (isDef() ? "implicit-def " : "implicit "); + else if (PrintDef && isDef()) + // Print the 'def' flag only when the operand is defined after '='. + OS << "def "; + if (isInternalRead()) + OS << "internal "; + if (isDead()) + OS << "dead "; + if (isKill()) + OS << "killed "; + if (isUndef()) + OS << "undef "; + if (isEarlyClobber()) + OS << "early-clobber "; + if (isDebug()) + OS << "debug-use "; + if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable()) + OS << "renamable "; + OS << printReg(Reg, TRI); + // Print the sub register. + if (unsigned SubReg = getSubReg()) { + if (TRI) + OS << '.' << TRI->getSubRegIndexName(SubReg); + else + OS << ".subreg" << SubReg; + } + // Print the register class / bank. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) { + OS << ':'; + OS << printRegClassOrBank(Reg, MRI, TRI); + } + } + } + // Print ties. + if (ShouldPrintRegisterTies && isTied() && !isDef()) + OS << "(tied-def " << TiedOperandIdx << ")"; + // Print types. + if (TypeToPrint.isValid()) + OS << '(' << TypeToPrint << ')'; + break; + } + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_CImmediate: + getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FPImmediate: + getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << printMBBReference(*getMBB()); + break; + case MachineOperand::MO_FrameIndex: { + int FrameIndex = getIndex(); + bool IsFixed = false; + StringRef Name; + if (const MachineFunction *MF = getMFIfAvailable(*this)) { + const MachineFrameInfo &MFI = MF->getFrameInfo(); + IsFixed = MFI.isFixedObjectIndex(FrameIndex); + if (const AllocaInst *Alloca = MFI.getObjectAllocation(FrameIndex)) + if (Alloca->hasName()) + Name = Alloca->getName(); + if (IsFixed) + FrameIndex -= MFI.getObjectIndexBegin(); + } + printStackObjectReference(OS, FrameIndex, IsFixed, Name); + break; + } + case MachineOperand::MO_ConstantPoolIndex: + OS << "%const." << getIndex(); + printOperandOffset(OS, getOffset()); + break; + case MachineOperand::MO_TargetIndex: { + OS << "target-index("; + const char *Name = ""; + if (const MachineFunction *MF = getMFIfAvailable(*this)) + if (const auto *TargetIndexName = getTargetIndexName(*MF, getIndex())) + Name = TargetIndexName; + OS << Name << ')'; + printOperandOffset(OS, getOffset()); + break; + } + case MachineOperand::MO_JumpTableIndex: + OS << printJumpTableEntryReference(getIndex()); + break; + case MachineOperand::MO_GlobalAddress: + getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + printOperandOffset(OS, getOffset()); + break; + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = getSymbolName(); + OS << '&'; + if (Name.empty()) { + OS << "\"\""; + } else { + printLLVMNameWithoutPrefix(OS, Name); + } + printOperandOffset(OS, getOffset()); + break; + } + case MachineOperand::MO_BlockAddress: { + OS << "blockaddress("; + getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false, + MST); + OS << ", "; + printIRBlockReference(OS, *getBlockAddress()->getBasicBlock(), MST); + OS << ')'; + MachineOperand::printOperandOffset(OS, getOffset()); + break; + } + case MachineOperand::MO_RegisterMask: { + OS << "getNumRegs(); ++i) { + unsigned MaskWord = i / 32; + unsigned MaskBit = i % 32; + if (getRegMask()[MaskWord] & (1 << MaskBit)) { + if (PrintRegMaskNumRegs < 0 || + NumRegsEmitted <= static_cast(PrintRegMaskNumRegs)) { + OS << " " << printReg(i, TRI); + NumRegsEmitted++; + } + NumRegsInMask++; + } + } + if (NumRegsEmitted != NumRegsInMask) + OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; + } else { + OS << " ..."; + } + OS << ">"; + break; + } + case MachineOperand::MO_RegisterLiveOut: { + const uint32_t *RegMask = getRegLiveOut(); + OS << "liveout("; + if (!TRI) { + OS << ""; + } else { + bool IsCommaNeeded = false; + for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { + if (RegMask[Reg / 32] & (1U << (Reg % 32))) { + if (IsCommaNeeded) + OS << ", "; + OS << printReg(Reg, TRI); + IsCommaNeeded = true; + } + } + } + OS << ")"; + break; + } + case MachineOperand::MO_Metadata: + getMetadata()->printAsOperand(OS, MST); + break; + case MachineOperand::MO_MCSymbol: + printSymbol(OS, *getMCSymbol()); + break; + case MachineOperand::MO_CFIIndex: { + if (const MachineFunction *MF = getMFIfAvailable(*this)) + printCFI(OS, MF->getFrameInstructions()[getCFIIndex()], TRI); + else + OS << ""; + break; + } + case MachineOperand::MO_IntrinsicID: { + Intrinsic::ID ID = getIntrinsicID(); + if (ID < Intrinsic::num_intrinsics) + OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; + else if (IntrinsicInfo) + OS << "intrinsic(@" << IntrinsicInfo->getName(ID) << ')'; + else + OS << "intrinsic(" << ID << ')'; + break; + } + case MachineOperand::MO_Predicate: { + auto Pred = static_cast(getPredicate()); + OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred(" + << CmpInst::getPredicateName(Pred) << ')'; + break; + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineOperand::dump() const { dbgs() << *this << '\n'; } +#endif + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +/// getAddrSpace - Return the LLVM IR address space number that this pointer +/// points into. +unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; } + +/// isDereferenceable - Return true if V is always dereferenceable for +/// Offset + Size byte. +bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C, + const DataLayout &DL) const { + if (!V.is()) + return false; + + const Value *BasePtr = V.get(); + if (BasePtr == nullptr) + return false; + + return isDereferenceableAndAlignedPointer( + BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL); +} + +/// getConstantPool - Return a MachinePointerInfo record that refers to the +/// constant pool. +MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getConstantPool()); +} + +/// getFixedStack - Return a MachinePointerInfo record that refers to the +/// the specified FrameIndex. +MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, + int FI, int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); +} + +MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getJumpTable()); +} + +MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getGOT()); +} + +MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, + int64_t Offset, uint8_t ID) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset, ID); +} + +MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) { + return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace()); +} + +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, + uint64_t s, unsigned int a, + const AAMDNodes &AAInfo, + const MDNode *Ranges, SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), + AAInfo(AAInfo), Ranges(Ranges) { + assert((PtrInfo.V.isNull() || PtrInfo.V.is() || + isa(PtrInfo.V.get()->getType())) && + "invalid pointer value"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); + + AtomicInfo.SSID = static_cast(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); + AtomicInfo.Ordering = static_cast(Ordering); + assert(getOrdering() == Ordering && "Value truncated"); + AtomicInfo.FailureOrdering = static_cast(FailureOrdering); + assert(getFailureOrdering() == FailureOrdering && "Value truncated"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(getOffset()); + ID.AddInteger(Size); + ID.AddPointer(getOpaqueValue()); + ID.AddInteger(getFlags()); + ID.AddInteger(getBaseAlignment()); +} + +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + PtrInfo = MMO->PtrInfo; + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +void MachineMemOperand::print(raw_ostream &OS) const { + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST); +} +void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { + assert((isLoad() || isStore()) && "SV has to be a load, store or both."); + + if (isVolatile()) + OS << "Volatile "; + + if (isLoad()) + OS << "LD"; + if (isStore()) + OS << "ST"; + OS << getSize(); + + // Print the address information. + OS << "["; + if (const Value *V = getValue()) + V->printAsOperand(OS, /*PrintType=*/false, MST); + else if (const PseudoSourceValue *PSV = getPseudoValue()) + PSV->printCustom(OS); + else + OS << ""; + + unsigned AS = getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (getBaseAlignment() != getAlignment()) + OS << "(align=" << getBaseAlignment() << ")"; + + if (getOffset() != 0) + OS << "+" << getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) + OS << "(align=" << getAlignment() << ")"; + + // Print TBAA info. + if (const MDNode *TBAAInfo = getAAInfo().TBAA) { + OS << "(tbaa="; + if (TBAAInfo->getNumOperands() > 0) + TBAAInfo->getOperand(0)->printAsOperand(OS, MST); + else + OS << ""; + OS << ")"; + } + + // Print AA scope info. + if (const MDNode *ScopeInfo = getAAInfo().Scope) { + OS << "(alias.scope="; + if (ScopeInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { + ScopeInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie - 1) + OS << ","; + } + else + OS << ""; + OS << ")"; + } + + // Print AA noalias scope info. + if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { + OS << "(noalias="; + if (NoAliasInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { + NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie - 1) + OS << ","; + } + else + OS << ""; + OS << ")"; + } + + if (const MDNode *Ranges = getRanges()) { + unsigned NumRanges = Ranges->getNumOperands(); + if (NumRanges != 0) { + OS << "(ranges="; + + for (unsigned I = 0; I != NumRanges; ++I) { + Ranges->getOperand(I)->printAsOperand(OS, MST); + if (I != NumRanges - 1) + OS << ','; + } + + OS << ')'; + } + } + + if (isNonTemporal()) + OS << "(nontemporal)"; + if (isDereferenceable()) + OS << "(dereferenceable)"; + if (isInvariant()) + OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; +} diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index ecc569dab835..906d5560d568 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" @@ -28,7 +27,8 @@ DiagnosticInfoMIROptimization::MachineArgument::MachineArgument( Key = MKey; raw_string_ostream OS(Val); - MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true); + MI.print(OS, /*IsStandalone=*/true, /*SkipOpers=*/false, + /*SkipDebugLoc=*/true); } Optional @@ -51,12 +51,11 @@ void MachineOptimizationRemarkEmitter::emit( auto &OptDiag = cast(OptDiagCommon); computeHotness(OptDiag); - LLVMContext &Ctx = MF.getFunction()->getContext(); + LLVMContext &Ctx = MF.getFunction().getContext(); - // If a diagnostic has a hotness value, then only emit it if its hotness - // meets the threshold. - if (OptDiag.getHotness() && - *OptDiag.getHotness() < Ctx.getDiagnosticsHotnessThreshold()) { + // Only emit it if its hotness meets the threshold. + if (OptDiag.getHotness().getValueOr(0) < + Ctx.getDiagnosticsHotnessThreshold()) { return; } @@ -73,7 +72,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction( MachineFunction &MF) { MachineBlockFrequencyInfo *MBFI; - if (MF.getFunction()->getContext().getDiagnosticsHotnessRequested()) + if (MF.getFunction().getContext().getDiagnosticsHotnessRequested()) MBFI = &getAnalysis().getBFI(); else MBFI = nullptr; diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 055cef36e0ae..c515fa8c1b36 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -59,20 +59,19 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Mangler.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include #include #include @@ -99,6 +98,9 @@ struct Candidate { /// The number of instructions in this \p Candidate. unsigned Len; + /// The MachineFunction containing this \p Candidate. + MachineFunction *MF = nullptr; + public: /// Set to false if the candidate overlapped with another candidate. bool InCandidateList = true; @@ -110,6 +112,15 @@ struct Candidate { /// Contains all target-specific information for this \p Candidate. TargetInstrInfo::MachineOutlinerInfo MInfo; + /// If there is a DISubprogram associated with the function that this + /// Candidate lives in, return it. + DISubprogram *getSubprogramOrNull() const { + assert(MF && "Candidate has no MF!"); + if (DISubprogram *SP = MF->getFunction().getSubprogram()) + return SP; + return nullptr; + } + /// Return the number of instructions in this Candidate. unsigned getLength() const { return Len; } @@ -128,8 +139,9 @@ struct Candidate { /// for some given candidate. unsigned Benefit = 0; - Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) - : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} + Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx, + MachineFunction *MF) + : StartIdx(StartIdx), Len(Len), MF(MF), FunctionIdx(FunctionIdx) {} Candidate() {} @@ -165,6 +177,15 @@ struct OutlinedFunction { /// Contains all target-specific information for this \p OutlinedFunction. TargetInstrInfo::MachineOutlinerInfo MInfo; + /// If there is a DISubprogram for any Candidate for this outlined function, + /// then return it. Otherwise, return nullptr. + DISubprogram *getSubprogramOrNull() const { + for (const auto &C : Candidates) + if (DISubprogram *SP = C->getSubprogramOrNull()) + return SP; + return nullptr; + } + /// Return the number of candidates for this \p OutlinedFunction. unsigned getOccurrenceCount() { return OccurrenceCount; } @@ -723,11 +744,13 @@ struct InstructionMapper { void convertToUnsignedVec(MachineBasicBlock &MBB, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { + unsigned Flags = TII.getMachineOutlinerMBBFlags(MBB); + for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et; It++) { // Keep track of where this instruction is in the module. - switch (TII.getOutliningType(*It)) { + switch (TII.getOutliningType(It, Flags)) { case TargetInstrInfo::MachineOutlinerInstrType::Illegal: mapToIllegalUnsigned(It); break; @@ -777,6 +800,9 @@ struct MachineOutliner : public ModulePass { /// linkonceodr linkage. bool OutlineFromLinkOnceODRs = false; + // Collection of IR functions created by the outliner. + std::vector CreatedIRFunctions; + StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -939,17 +965,52 @@ unsigned MachineOutliner::findCandidates( SuffixTreeNode *M = ChildPair.second; if (M && M->IsInTree && M->isLeaf()) { - // Each sequence is over [StartIt, EndIt]. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[M->SuffixIdx + StringLen - 1]; - - CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, - FunctionList.size()); - RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); - // Never visit this leaf again. M->IsInTree = false; + unsigned StartIdx = M->SuffixIdx; + unsigned EndIdx = StartIdx + StringLen - 1; + + // Trick: Discard some candidates that would be incompatible with the + // ones we've already found for this sequence. This will save us some + // work in candidate selection. + // + // If two candidates overlap, then we can't outline them both. This + // happens when we have candidates that look like, say + // + // AA (where each "A" is an instruction). + // + // We might have some portion of the module that looks like this: + // AAAAAA (6 A's) + // + // In this case, there are 5 different copies of "AA" in this range, but + // at most 3 can be outlined. If only outlining 3 of these is going to + // be unbeneficial, then we ought to not bother. + // + // Note that two things DON'T overlap when they look like this: + // start1...end1 .... start2...end2 + // That is, one must either + // * End before the other starts + // * Start after the other ends + if (std::all_of(CandidatesForRepeatedSeq.begin(), + CandidatesForRepeatedSeq.end(), + [&StartIdx, &EndIdx](const Candidate &C) { + return (EndIdx < C.getStartIdx() || + StartIdx > C.getEndIdx()); + })) { + // It doesn't overlap with anything, so we can outline it. + // Each sequence is over [StartIt, EndIt]. + MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; + MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; + + // Save the MachineFunction containing the Candidate. + MachineFunction *MF = StartIt->getParent()->getParent(); + assert(MF && "Candidate doesn't have a MF?"); + + // Save the candidate and its location. + CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, + FunctionList.size(), MF); + RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); + } } } @@ -961,8 +1022,8 @@ unsigned MachineOutliner::findCandidates( std::vector Seq; for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) Seq.push_back(ST.Str[i]); - OutlinedFunction OF(FunctionList.size(), Parent.OccurrenceCount, Seq, - MInfo); + OutlinedFunction OF(FunctionList.size(), CandidatesForRepeatedSeq.size(), + Seq, MInfo); unsigned Benefit = OF.getBenefit(); // Is it better to outline this candidate than not? @@ -1180,6 +1241,9 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, F->setLinkage(GlobalValue::PrivateLinkage); F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // Save F so that we can add debug info later if we need to. + CreatedIRFunctions.push_back(F); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); @@ -1203,13 +1267,50 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, NewMI->dropMemRefs(); // Don't keep debug information for outlined instructions. - // FIXME: This means outlined functions are currently undebuggable. NewMI->setDebugLoc(DebugLoc()); MBB.insert(MBB.end(), NewMI); } TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); + // If there's a DISubprogram associated with this outlined function, then + // emit debug info for the outlined function. + if (DISubprogram *SP = OF.getSubprogramOrNull()) { + // We have a DISubprogram. Get its DICompileUnit. + DICompileUnit *CU = SP->getUnit(); + DIBuilder DB(M, true, CU); + DIFile *Unit = SP->getFile(); + Mangler Mg; + + // Walk over each IR function we created in the outliner and create + // DISubprograms for each function. + for (Function *F : CreatedIRFunctions) { + // Get the mangled name of the function for the linkage name. + std::string Dummy; + llvm::raw_string_ostream MangledNameStream(Dummy); + Mg.getNameWithPrefix(MangledNameStream, F, false); + + DISubprogram *SP = DB.createFunction( + Unit /* Context */, F->getName(), StringRef(MangledNameStream.str()), + Unit /* File */, + 0 /* Line 0 is reserved for compiler-generated code. */, + DB.createSubroutineType( + DB.getOrCreateTypeArray(None)), /* void type */ + false, true, 0, /* Line 0 is reserved for compiler-generated code. */ + DINode::DIFlags::FlagArtificial /* Compiler-generated code. */, + true /* Outlined code is optimized code by definition. */); + + // Don't add any new variables to the subprogram. + DB.finalizeSubprogram(SP); + + // Attach subprogram to the function. + F->setSubprogram(SP); + } + + // We're done with the DIBuilder. + DB.finalize(); + } + return &MF; } @@ -1313,7 +1414,7 @@ bool MachineOutliner::runOnModule(Module &M) { MMI.getOrCreateMachineFunction(*M.begin()).getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); - + InstructionMapper Mapper; // Build instruction mappings for each function in the module. @@ -1328,8 +1429,8 @@ bool MachineOutliner::runOnModule(Module &M) { // If it is, look at each MachineBasicBlock in the function. for (MachineBasicBlock &MBB : MF) { - // Is there anything in MBB? - if (MBB.empty()) + // Is there anything in MBB? And is it the target of an indirect branch? + if (MBB.empty() || MBB.hasAddressTaken()) continue; // If yes, map it. @@ -1350,5 +1451,7 @@ bool MachineOutliner::runOnModule(Module &M) { pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); // Outline each of the candidates and return true if something was outlined. - return outline(M, CandidateList, FunctionList, Mapper); + bool OutlinedSomething = outline(M, CandidateList, FunctionList, Mapper); + + return OutlinedSomething; } diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index ea38bcf40ae8..3cce7b3649b1 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -73,7 +73,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -138,7 +138,7 @@ static cl::opt EnableSWPOptSize("enable-pipeliner-opt-size", /// A command line argument to limit minimum initial interval for pipelining. static cl::opt SwpMaxMii("pipeliner-max-mii", - cl::desc("Size limit for the the MII."), + cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27)); /// A command line argument to limit the number of stages in the pipeline. @@ -313,7 +313,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// Return the latest time an instruction my be scheduled. int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } - /// The mobility function, which the the number of slots in which + /// The mobility function, which the number of slots in which /// an instruction may be scheduled. int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } @@ -729,13 +729,13 @@ INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, /// The "main" function for implementing Swing Modulo Scheduling. bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (!EnableSWP) return false; - if (mf.getFunction()->getAttributes().hasAttribute( + if (mf.getFunction().getAttributes().hasAttribute( AttributeList::FunctionIndex, Attribute::OptimizeForSize) && !EnableSWPOptSize.getPosition()) return false; @@ -808,11 +808,9 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { // because we don't know how to maintain subreg information in the // VMap structure. MachineBasicBlock *MBB = L.getHeader(); - for (MachineBasicBlock::iterator BBI = MBB->instr_begin(), - BBE = MBB->getFirstNonPHI(); - BBI != BBE; ++BBI) - for (unsigned i = 1; i != BBI->getNumOperands(); i += 2) - if (BBI->getOperand(i).getSubReg() != 0) + for (auto &PHI : MBB->phis()) + for (unsigned i = 1; i != PHI.getNumOperands(); i += 2) + if (PHI.getOperand(i).getSubReg() != 0) return false; return true; @@ -972,7 +970,7 @@ static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { return 0; } -/// Return the Phi register value that comes the the loop block. +/// Return the Phi register value that comes the loop block. static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) { for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2) if (Phi.getOperand(i + 1).getMBB() == LoopBB) @@ -2924,10 +2922,8 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, SMSchedule &Schedule) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(), - BBF = KernelBB->getFirstNonPHI(); - BBI != BBF; ++BBI) { - unsigned Def = BBI->getOperand(0).getReg(); + for (auto &PHI : KernelBB->phis()) { + unsigned Def = PHI.getOperand(0).getReg(); // Check for any Phi definition that used as an operand of another Phi // in the same block. for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def), @@ -2935,7 +2931,7 @@ void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB, I != E; ++I) { if (I->isPHI() && I->getParent() == KernelBB) { // Get the loop carried definition. - unsigned LCDef = getLoopPhiReg(*BBI, KernelBB); + unsigned LCDef = getLoopPhiReg(PHI, KernelBB); if (!LCDef) continue; MachineInstr *MI = MRI.getVRegDef(LCDef); @@ -3249,13 +3245,11 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, SMSchedule &Schedule, ValueMapTy *VRMap, InstrMapTy &InstrMap) { - for (MachineBasicBlock::iterator BBI = BB->instr_begin(), - BBE = BB->getFirstNonPHI(); - BBI != BBE; ++BBI) { + for (auto &PHI : BB->phis()) { unsigned InitVal = 0; unsigned LoopVal = 0; - getPhiRegs(*BBI, BB, InitVal, LoopVal); - unsigned PhiDef = BBI->getOperand(0).getReg(); + getPhiRegs(PHI, BB, InitVal, LoopVal); + unsigned PhiDef = PHI.getOperand(0).getReg(); unsigned PhiStage = (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef))); @@ -3269,7 +3263,7 @@ void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB, getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB); if (!NewVal) NewVal = InitVal; - rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI, + rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI, PhiDef, NewVal); } } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index a075543aecfb..983822ba0c5f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -65,23 +65,66 @@ void MachineRegisterInfo::setRegBank(unsigned Reg, VRegInfo[Reg].first = &RegBank; } -const TargetRegisterClass * -MachineRegisterInfo::constrainRegClass(unsigned Reg, - const TargetRegisterClass *RC, - unsigned MinNumRegs) { - const TargetRegisterClass *OldRC = getRegClass(Reg); +static const TargetRegisterClass * +constrainRegClass(MachineRegisterInfo &MRI, unsigned Reg, + const TargetRegisterClass *OldRC, + const TargetRegisterClass *RC, unsigned MinNumRegs) { if (OldRC == RC) return RC; const TargetRegisterClass *NewRC = - getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); + MRI.getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) return nullptr; - setRegClass(Reg, NewRC); + MRI.setRegClass(Reg, NewRC); return NewRC; } +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, + const TargetRegisterClass *RC, + unsigned MinNumRegs) { + return ::constrainRegClass(*this, Reg, getRegClass(Reg), RC, MinNumRegs); +} + +bool +MachineRegisterInfo::constrainRegAttrs(unsigned Reg, + unsigned ConstrainingReg, + unsigned MinNumRegs) { + auto const *OldRC = getRegClassOrNull(Reg); + auto const *RC = getRegClassOrNull(ConstrainingReg); + // A virtual register at any point must have either a low-level type + // or a class assigned, but not both. The only exception is the internals of + // GlobalISel's instruction selection pass, which is allowed to temporarily + // introduce registers with types and classes both. + assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type"); + assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both"); + assert((RC || getType(ConstrainingReg).isValid()) && + "ConstrainingReg has neither class nor type"); + assert((!RC || !getType(ConstrainingReg).isValid()) && + "ConstrainingReg has class and type both"); + if (OldRC && RC) + return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs); + // If one of the virtual registers is generic (used in generic machine + // instructions, has a low-level type, doesn't have a class), and the other is + // concrete (used in target specific instructions, doesn't have a low-level + // type, has a class), we can not unify them. + if (OldRC || RC) + return false; + // At this point, both registers are guaranteed to have a valid low-level + // type, and they must agree. + if (getType(Reg) != getType(ConstrainingReg)) + return false; + auto const *OldRB = getRegBankOrNull(Reg); + auto const *RB = getRegBankOrNull(ConstrainingReg); + if (OldRB) + return !RB || RB == OldRB; + if (RB) + setRegBank(Reg, *RB); + return true; +} + bool MachineRegisterInfo::recomputeRegClass(unsigned Reg) { const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -531,7 +574,7 @@ static bool isNoReturnDef(const MachineOperand &MO) { const MachineFunction &MF = *MBB.getParent(); // We need to keep correct unwind information even if the function will // not return, since the runtime may need it. - if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) + if (MF.getFunction().hasFnAttribute(Attribute::UWTable)) return false; const Function *Called = getCalledFunction(MI); return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) || diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 6aaacb479feb..e15eb658a05c 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -98,7 +98,7 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static cl::opt SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function")); static cl::opt SchedOnlyBlock("misched-only-block", cl::Hidden, - cl::desc("Only schedule this MBB#")); + cl::desc("Only schedule this MBB#")); #else static bool ViewMISchedDAGs = false; #endif // NDEBUG @@ -351,7 +351,7 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (EnableMachineSched.getNumOccurrences()) { @@ -389,7 +389,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; if (EnablePostRAMachineSched.getNumOccurrences()) { @@ -548,15 +548,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, continue; } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF->getName() - << ":BB#" << MBB->getNumber() << " " << MBB->getName() - << "\n From: " << *I << " To: "; + DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) << " " + << MBB->getName() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); if (DumpCriticalPathLength) { errs() << MF->getName(); - errs() << ":BB# " << MBB->getNumber(); + errs() << ":%bb. " << MBB->getNumber(); errs() << " " << MBB->getName() << " \n"; } @@ -823,11 +822,11 @@ void ScheduleDAGMI::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } /// Apply each ScheduleDAGMutation step in order. @@ -1054,7 +1053,10 @@ void ScheduleDAGMILive::initRegPressure() { dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); ); - assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + assert((BotRPTracker.getPos() == RegionEnd || + (RegionEnd->isDebugValue() && + BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && + "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. @@ -1261,11 +1263,11 @@ void ScheduleDAGMILive::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } /// Build the DAG and setup three register pressure trackers. @@ -1460,7 +1462,8 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { RegOpers.detectDeadDefs(*MI, *LIS); } - BotRPTracker.recedeSkipDebugValues(); + if (BotRPTracker.getPos() != CurrentBottom) + BotRPTracker.recedeSkipDebugValues(); SmallVector LiveUses; BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 6f3753e88b8c..bedfdd84b1ca 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -243,17 +244,17 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. // - // BB#1: derived from LLVM BB %bb4.preheader - // Predecessors according to CFG: BB#0 + // %bb.1: derived from LLVM BB %bb4.preheader + // Predecessors according to CFG: %bb.0 // ... - // %reg16385 = DEC64_32r %reg16437, %EFLAGS + // %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags // ... - // JE_4 , %EFLAGS - // Successors according to CFG: BB#37 BB#2 + // JE_4 <%bb.37>, implicit %eflags + // Successors according to CFG: %bb.37 %bb.2 // - // BB#2: derived from LLVM BB %bb.nph - // Predecessors according to CFG: BB#0 BB#1 - // %reg16386 = PHI %reg16434, , %reg16385, + // %bb.2: derived from LLVM BB %bb.nph + // Predecessors according to CFG: %bb.0 %bb.1 + // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1 BreakPHIEdge = true; for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { MachineInstr *UseInst = MO.getParent(); @@ -291,7 +292,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "******** Machine Sinking ********\n"); @@ -321,10 +322,10 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { for (auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << Pair.first->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << Pair.second->getNumber() << '\n'); + DEBUG(dbgs() << " *** Splitting critical edge: " + << printMBBReference(*Pair.first) << " -- " + << printMBBReference(*NewSucc) << " -- " + << printMBBReference(*Pair.second) << '\n'); MadeChange = true; ++NumSplit; } else @@ -460,33 +461,33 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, // It's not always legal to break critical edges and sink the computation // to the edge. // - // BB#1: + // %bb.1: // v1024 - // Beq BB#3 + // Beq %bb.3 // - // BB#2: + // %bb.2: // ... no uses of v1024 // - // BB#3: + // %bb.3: // ... // = v1024 // - // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: // - // BB#1: + // %bb.1: // ... - // Bne BB#2 - // BB#4: + // Bne %bb.2 + // %bb.4: // v1024 = - // B BB#3 - // BB#2: + // B %bb.3 + // %bb.2: // ... no uses of v1024 // - // BB#3: + // %bb.3: // ... // = v1024 // - // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 // flow. We need to ensure the new basic block where the computation is // sunk to dominates all the uses. // It's only legal to break critical edge and sink the computation to the @@ -868,11 +869,20 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, SmallVector DbgValuesToSink; collectDebugValues(MI, DbgValuesToSink); + // Merge or erase debug location to ensure consistent stepping in profilers + // and debuggers. + if (!SuccToSinkTo->empty() && InsertPos != SuccToSinkTo->end()) + MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), + InsertPos->getDebugLoc())); + else + MI.setDebugLoc(DebugLoc()); + + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Move debug values. + // Move previously adjacent debug value instructions to the insert position. for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 453b47b71f7f..d81c6f8a31e1 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -396,7 +396,8 @@ MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { } void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Invalidate traces through " << printMBBReference(*MBB) + << '\n'); BlockInfo[MBB->getNumber()].invalidate(); for (unsigned i = 0; i != TS_NumStrategies; ++i) if (Ensembles[i]) @@ -476,8 +477,8 @@ class po_iterator_storage { /// Compute the trace through MBB. void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" - << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Computing " << getName() << " trace through " + << printMBBReference(*MBB) << '\n'); // Set up loop bounds for the backwards post-order traversal. LoopBounds Bounds(BlockInfo, MTM.Loops); @@ -485,13 +486,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = false; Bounds.Visited.clear(); for (auto I : inverse_post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + DEBUG(dbgs() << " pred for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the predecessors have been visited, pick the preferred one. TBI.Pred = pickTracePred(I); DEBUG({ if (TBI.Pred) - dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + dbgs() << printMBBReference(*TBI.Pred) << '\n'; else dbgs() << "null\n"; }); @@ -503,13 +504,13 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { Bounds.Downward = true; Bounds.Visited.clear(); for (auto I : post_order_ext(MBB, Bounds)) { - DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + DEBUG(dbgs() << " succ for " << printMBBReference(*I) << ": "); TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; // All the successors have been visited, pick the preferred one. TBI.Succ = pickTraceSucc(I); DEBUG({ if (TBI.Succ) - dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + dbgs() << printMBBReference(*TBI.Succ) << '\n'; else dbgs() << "null\n"; }); @@ -530,8 +531,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() - << " height.\n"); + DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " height.\n"); // Find any MBB predecessors that have MBB as their preferred successor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Pred : MBB->predecessors()) { @@ -555,8 +556,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { WorkList.push_back(BadMBB); do { const MachineBasicBlock *MBB = WorkList.pop_back_val(); - DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() - << " depth.\n"); + DEBUG(dbgs() << "Invalidate " << printMBBReference(*MBB) << ' ' + << getName() << " depth.\n"); // Find any MBB successors that have MBB as their preferred predecessor. // They are the only ones that need to be invalidated. for (const MachineBasicBlock *Succ : MBB->successors()) { @@ -859,7 +860,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Go through trace blocks in top-down order, stopping after the center block. while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; @@ -1044,7 +1045,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { SmallVector Deps; for (;!Stack.empty(); Stack.pop_back()) { MBB = Stack.back(); - DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "Heights for " << printMBBReference(*MBB) << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; @@ -1131,7 +1132,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // Update virtual live-in heights. They were added by addLiveIns() with a 0 // height because the final height isn't known until now. - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + DEBUG(dbgs() << printMBBReference(*MBB) << " Live-ins:"); for (LiveInReg &LIR : TBI.LiveIns) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); LIR.Height = Heights.lookup(DefMI); @@ -1289,7 +1290,7 @@ bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI, void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { - OS << " BB#" << i << '\t'; + OS << " %bb." << i << '\t'; BlockInfo[i].print(OS); OS << '\n'; } @@ -1299,10 +1300,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { if (hasValidDepth()) { OS << "depth=" << InstrDepth; if (Pred) - OS << " pred=BB#" << Pred->getNumber(); + OS << " pred=" << printMBBReference(*Pred); else OS << " pred=null"; - OS << " head=BB#" << Head; + OS << " head=%bb." << Head; if (HasValidInstrDepths) OS << " +instrs"; } else @@ -1311,10 +1312,10 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { if (hasValidHeight()) { OS << "height=" << InstrHeight; if (Succ) - OS << " succ=BB#" << Succ->getNumber(); + OS << " succ=" << printMBBReference(*Succ); else OS << " succ=null"; - OS << " tail=BB#" << Tail; + OS << " tail=%bb." << Tail; if (HasValidInstrHeights) OS << " +instrs"; } else @@ -1326,18 +1327,18 @@ void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { unsigned MBBNum = &TBI - &TE.BlockInfo[0]; - OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum - << " --> BB#" << TBI.Tail << ':'; + OS << TE.getName() << " trace %bb." << TBI.Head << " --> %bb." << MBBNum + << " --> %bb." << TBI.Tail << ':'; if (TBI.hasValidHeight() && TBI.hasValidDepth()) OS << ' ' << getInstrCount() << " instrs."; if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) OS << ' ' << TBI.CriticalPath << " cycles."; const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; - OS << "\nBB#" << MBBNum; + OS << "\n%bb." << MBBNum; while (Block->hasValidDepth() && Block->Pred) { unsigned Num = Block->Pred->getNumber(); - OS << " <- BB#" << Num; + OS << " <- " << printMBBReference(*Block->Pred); Block = &TE.BlockInfo[Num]; } @@ -1345,7 +1346,7 @@ void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { OS << "\n "; while (Block->hasValidHeight() && Block->Succ) { unsigned Num = Block->Succ->getNumber(); - OS << " -> BB#" << Num; + OS << " -> " << printMBBReference(*Block->Succ); Block = &TE.BlockInfo[Num]; } OS << '\n'; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 4f6eb428c8ea..e0cc2ca9a2a2 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -36,8 +36,8 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -471,9 +471,8 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - errs() << "- basic block: BB#" << MBB->getNumber() - << ' ' << MBB->getName() - << " (" << (const void*)MBB << ')'; + errs() << "- basic block: " << printMBBReference(*MBB) << ' ' + << MBB->getName() << " (" << (const void *)MBB << ')'; if (Indexes) errs() << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -619,8 +618,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has successor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Preds.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the predecessor list of the successor BB#" - << (*I)->getNumber() << ".\n"; + errs() << "MBB is not in the predecessor list of the successor " + << printMBBReference(*(*I)) << ".\n"; } } @@ -631,19 +630,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has predecessor that isn't part of the function.", MBB); if (!MBBInfoMap[*I].Succs.count(MBB)) { report("Inconsistent CFG", MBB); - errs() << "MBB is not in the successor list of the predecessor BB#" - << (*I)->getNumber() << ".\n"; + errs() << "MBB is not in the successor list of the predecessor " + << printMBBReference(*(*I)) << ".\n"; } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); const BasicBlock *BB = MBB->getBasicBlock(); - const Function *Fn = MF->getFunction(); + const Function &F = MF->getFunction(); if (LandingPadSuccs.size() > 1 && !(AsmInfo && AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && BB && isa(BB->getTerminator())) && - !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn()))) + !isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. @@ -1097,11 +1096,19 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); - errs() << TRI->getName(Reg) << " is not a " - << TRI->getRegClassName(DRC) << " register.\n"; + errs() << printReg(Reg, TRI) << " is not a " + << TRI->getRegClassName(DRC) << " register.\n"; } } } + if (MO->isRenamable() && + ((MO->isDef() && MI->hasExtraDefRegAllocReq()) || + (MO->isUse() && MI->hasExtraSrcRegAllocReq()))) { + report("Illegal isRenamable setting for opcode with extra regalloc " + "requirements", + MO, MONum); + return; + } } else { // Virtual register. const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); @@ -1653,7 +1660,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { if (MInfo.reachable) { seen.insert(&Pre); BBInfo &PrInfo = MBBInfoMap[&Pre]; - if (PrInfo.reachable && !PrInfo.isLiveOut(MO0.getReg())) + if (!MO0.isUndef() && PrInfo.reachable && + !PrInfo.isLiveOut(MO0.getReg())) report("PHI operand is not live-out from predecessor", &MO0, I); } } @@ -1663,8 +1671,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { for (MachineBasicBlock *Pred : MBB.predecessors()) { if (!seen.count(Pred)) { report("Missing PHI operand", &Phi); - errs() << "BB#" << Pred->getNumber() - << " is a predecessor according to the CFG.\n"; + errs() << printMBBReference(*Pred) + << " is a predecessor according to the CFG.\n"; } } } @@ -1689,7 +1697,7 @@ void MachineVerifier::visitMachineFunctionAfter() { if (MInfo.regsKilled.count(*I)) { report("Virtual register killed in block, but needed live out.", &MBB); errs() << "Virtual register " << printReg(*I) - << " is used after the block.\n"; + << " is used after the block.\n"; } } @@ -1722,13 +1730,13 @@ void MachineVerifier::verifyLiveVariables() { if (!VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block missing from AliveBlocks", &MBB); errs() << "Virtual register " << printReg(Reg) - << " must be live through the block.\n"; + << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MBB.getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", &MBB); errs() << "Virtual register " << printReg(Reg) - << " is not needed live through the block.\n"; + << " is not needed live through the block.\n"; } } } @@ -1961,7 +1969,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (MOI->isDef()) { if (Sub != 0) { hasSubRegDef = true; - // An operand vreg0:sub0 reads vreg0:sub1..n. Invert the lane + // An operand %0:sub0 reads %0:sub1..n. Invert the lane // mask for subregister defs. Read-undef defs will be handled by // readsReg below. SLM = ~SLM; @@ -2038,8 +2046,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); - errs() << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " + errs() << " live into " << printMBBReference(*MFI) << '@' + << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " << PEnd << '\n'; continue; } @@ -2048,9 +2056,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (!IsPHI && PVNI != VNI) { report("Different value live out of predecessor", *PI); report_context(LR, Reg, LaneMask); - errs() << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id - << " live into BB#" << MFI->getNumber() << '@' + errs() << "Valno #" << PVNI->id << " live out of " + << printMBBReference(*(*PI)) << '@' << PEnd << "\nValno #" + << VNI->id << " live into " << printMBBReference(*MFI) << '@' << LiveInts->getMBBStartIdx(&*MFI) << '\n'; } } @@ -2201,11 +2209,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { report("The exit stack state of a predecessor is inconsistent.", MBB); - errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" - << SPState[(*I)->getNumber()].ExitValue << ", " - << SPState[(*I)->getNumber()].ExitIsSetup - << "), while BB#" << MBB->getNumber() << " has entry state (" - << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + errs() << "Predecessor " << printMBBReference(*(*I)) + << " has exit state (" << SPState[(*I)->getNumber()].ExitValue + << ", " << SPState[(*I)->getNumber()].ExitIsSetup << "), while " + << printMBBReference(*MBB) << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; } } @@ -2217,11 +2225,11 @@ void MachineVerifier::verifyStackFrame() { (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { report("The entry stack state of a successor is inconsistent.", MBB); - errs() << "Successor BB#" << (*I)->getNumber() << " has entry state (" - << SPState[(*I)->getNumber()].EntryValue << ", " - << SPState[(*I)->getNumber()].EntryIsSetup - << "), while BB#" << MBB->getNumber() << " has exit state (" - << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + errs() << "Successor " << printMBBReference(*(*I)) + << " has entry state (" << SPState[(*I)->getNumber()].EntryValue + << ", " << SPState[(*I)->getNumber()].EntryIsSetup << "), while " + << printMBBReference(*MBB) << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; } } diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 13ddad593829..e7f426c469a0 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -33,42 +33,74 @@ using namespace llvm; static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); -static void fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, +static bool isHazard(const SDep &Dep) { + return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; +} + +static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU, SUnit &SecondSU) { + // Check that neither instr is already paired with another along the edge + // between them. + for (SDep &SI : FirstSU.Succs) + if (SI.isCluster()) + return false; + + for (SDep &SI : SecondSU.Preds) + if (SI.isCluster()) + return false; + // Though the reachability checks above could be made more generic, + // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid, + // the extra computation cost makes it less interesting in general cases. + // Create a single weak edge between the adjacent instrs. The only effect is // to cause bottom-up scheduling to heavily prioritize the clustered instrs. - DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)); + if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster))) + return false; - // Adjust the latency between the anchor instr and its - // predecessors. - for (SDep &IDep : SecondSU.Preds) - if (IDep.getSUnit() == &FirstSU) - IDep.setLatency(0); + // Adjust the latency between both instrs. + for (SDep &SI : FirstSU.Succs) + if (SI.getSUnit() == &SecondSU) + SI.setLatency(0); - // Adjust the latency between the dependent instr and its - // predecessors. - for (SDep &IDep : FirstSU.Succs) - if (IDep.getSUnit() == &SecondSU) - IDep.setLatency(0); + for (SDep &SI : SecondSU.Preds) + if (SI.getSUnit() == &FirstSU) + SI.setLatency(0); - DEBUG(dbgs() << DAG.MF.getName() << "(): Macro fuse "; + DEBUG(dbgs() << "Macro fuse: "; FirstSU.print(dbgs(), &DAG); dbgs() << " - "; SecondSU.print(dbgs(), &DAG); dbgs() << " / "; dbgs() << DAG.TII->getName(FirstSU.getInstr()->getOpcode()) << " - " << DAG.TII->getName(SecondSU.getInstr()->getOpcode()) << '\n'; ); + // Make data dependencies from the FirstSU also dependent on the SecondSU to + // prevent them from being scheduled between the FirstSU and the SecondSU. if (&SecondSU != &DAG.ExitSU) - // Make instructions dependent on FirstSU also dependent on SecondSU to - // prevent them from being scheduled between FirstSU and and SecondSU. for (const SDep &SI : FirstSU.Succs) { - if (SI.getSUnit() == &SecondSU) + SUnit *SU = SI.getSUnit(); + if (SI.isWeak() || isHazard(SI) || + SU == &DAG.ExitSU || SU == &SecondSU || SU->isPred(&SecondSU)) + continue; + DEBUG(dbgs() << " Bind "; + SecondSU.print(dbgs(), &DAG); dbgs() << " - "; + SU->print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(SU, SDep(&SecondSU, SDep::Artificial)); + } + + // Make the FirstSU also dependent on the dependencies of the SecondSU to + // prevent them from being scheduled between the FirstSU and the SecondSU. + if (&FirstSU != &DAG.EntrySU) + for (const SDep &SI : SecondSU.Preds) { + SUnit *SU = SI.getSUnit(); + if (SI.isWeak() || isHazard(SI) || &FirstSU == SU || FirstSU.isSucc(SU)) continue; - DEBUG(dbgs() << " Copy Succ "; - SI.getSUnit()->print(dbgs(), &DAG); dbgs() << '\n';); - DAG.addEdge(SI.getSUnit(), SDep(&SecondSU, SDep::Artificial)); + DEBUG(dbgs() << " Bind "; + SU->print(dbgs(), &DAG); dbgs() << " - "; + FirstSU.print(dbgs(), &DAG); dbgs() << '\n';); + DAG.addEdge(&FirstSU, SDep(SU, SDep::Artificial)); } ++NumFused; + return true; } namespace { @@ -116,9 +148,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { // Explorer for fusion candidates among the dependencies of the anchor instr. for (SDep &Dep : AnchorSU.Preds) { - // Ignore dependencies that don't enforce ordering. - if (Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output || - Dep.isWeak()) + // Ignore dependencies other than data or strong ordering. + if (Dep.isWeak() || isHazard(Dep)) continue; SUnit &DepSU = *Dep.getSUnit(); @@ -129,8 +160,8 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) { if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) continue; - fuseInstructionPair(DAG, DepSU, AnchorSU); - return true; + if (fuseInstructionPair(DAG, DepSU, AnchorSU)) + return true; } return false; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 82ec1cb5c94f..8972867ba083 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" @@ -73,7 +72,7 @@ INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE, "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; MRI = &Fn.getRegInfo(); @@ -154,7 +153,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { if (PHIsInCycle.size() == 16) return false; - for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) { + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(DstReg)) { if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle)) return false; } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 864d6d547caa..54c5a940275d 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -19,7 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -593,9 +593,9 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) continue; if (ShouldSplit) { - DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge BB#" - << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() - << ": " << *BBI); + DEBUG(dbgs() << printReg(Reg) << " live-out before critical edge " + << printMBBReference(*PreMBB) << " -> " + << printMBBReference(MBB) << ": " << *BBI); } // If Reg is not live-in to MBB, it means it must be live-in to some diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp index 50dd44fa659f..ff8680a0540d 100644 --- a/lib/CodeGen/ParallelCG.cpp +++ b/lib/CodeGen/ParallelCG.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SplitModule.h" diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index cb900ce94be6..0957705b19bb 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -54,11 +54,11 @@ static bool doesNotGeneratecode(const MachineInstr &MI) { } bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { - if (!MF.getFunction()->hasFnAttribute("patchable-function")) + if (!MF.getFunction().hasFnAttribute("patchable-function")) return false; #ifndef NDEBUG - Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function"); + Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function"); StringRef PatchType = PatchAttr.getValueAsString(); assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); #endif diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index e3dceac384f9..1320f9985553 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -98,6 +98,8 @@ #include using namespace llvm; +using RegSubRegPair = TargetInstrInfo::RegSubRegPair; +using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx; #define DEBUG_TYPE "peephole-opt" @@ -110,6 +112,9 @@ static cl::opt DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); +/// Specifiy whether or not the value tracking looks through +/// complex instructions. When this is true, the value tracker +/// bails on everything that is not a copy or a bitcast. static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); @@ -132,11 +137,11 @@ static cl::opt MaxRecurrenceChain( "of commuting operands")); -STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumCmps, "Number of compares eliminated"); -STATISTIC(NumImmFold, "Number of move immediate folded"); -STATISTIC(NumLoadFold, "Number of loads folded"); -STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumCmps, "Number of compares eliminated"); +STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); @@ -149,9 +154,9 @@ namespace { class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - MachineDominatorTree *DT; // Machine dominator tree - MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + MachineLoopInfo *MLI; public: static char ID; // Pass identification @@ -173,31 +178,28 @@ namespace { } } - /// \brief Track Def -> Use info used for rewriting copies. - using RewriteMapTy = - SmallDenseMap; + /// Track Def -> Use info used for rewriting copies. + using RewriteMapTy = SmallDenseMap; - /// \brief Sequence of instructions that formulate recurrence cycle. + /// Sequence of instructions that formulate recurrence cycle. using RecurrenceCycle = SmallVector; private: - bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); - bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + bool optimizeCmpInstr(MachineInstr &MI); + bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, SmallPtrSetImpl &LocalMIs); - bool optimizeSelect(MachineInstr *MI, + bool optimizeSelect(MachineInstr &MI, SmallPtrSetImpl &LocalMIs); - bool optimizeCondBranch(MachineInstr *MI); - bool optimizeCoalescableCopy(MachineInstr *MI); - bool optimizeUncoalescableCopy(MachineInstr *MI, + bool optimizeCondBranch(MachineInstr &MI); + bool optimizeCoalescableCopy(MachineInstr &MI); + bool optimizeUncoalescableCopy(MachineInstr &MI, SmallPtrSetImpl &LocalMIs); bool optimizeRecurrence(MachineInstr &PHI); - bool findNextSource(unsigned Reg, unsigned SubReg, - RewriteMapTy &RewriteMap); - bool isMoveImmediate(MachineInstr *MI, + bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); + bool isMoveImmediate(MachineInstr &MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); - bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, - SmallSet &ImmDefRegs, + bool foldImmediate(MachineInstr &MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); /// \brief Finds recurrence cycles, but only ones that formulated around @@ -212,11 +214,11 @@ namespace { /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. - bool foldRedundantCopy(MachineInstr *MI, + bool foldRedundantCopy(MachineInstr &MI, SmallSet &CopySrcRegs, DenseMap &CopyMIs); - /// \brief Is the register \p Reg a non-allocatable physical register? + /// Is the register \p Reg a non-allocatable physical register? bool isNAPhysCopy(unsigned Reg); /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical @@ -224,11 +226,10 @@ namespace { /// non-allocatable physical register was previously copied to a virtual /// registered and hasn't been clobbered, the virt->phys copy can be /// deleted. - bool foldRedundantNAPhysCopy( - MachineInstr *MI, + bool foldRedundantNAPhysCopy(MachineInstr &MI, DenseMap &NAPhysToVirtMIs); - bool isLoadFoldable(MachineInstr *MI, + bool isLoadFoldable(MachineInstr &MI, SmallSet &FoldAsLoadDefCandidates); /// \brief Check whether \p MI is understood by the register coalescer @@ -249,10 +250,13 @@ namespace { (MI.isRegSequenceLike() || MI.isInsertSubregLike() || MI.isExtractSubregLike())); } + + MachineInstr &rewriteSource(MachineInstr &CopyLike, + RegSubRegPair Def, RewriteMapTy &RewriteMap); }; - /// \brief Helper class to hold instructions that are inside recurrence - /// cycles. The recurrence cycle is formulated around 1) a def operand and its + /// Helper class to hold instructions that are inside recurrence cycles. + /// The recurrence cycle is formulated around 1) a def operand and its /// tied use operand, or 2) a def operand and a use operand that is commutable /// with another use operand which is tied to the def operand. In the latter /// case, index of the tied use operand and the commutable use operand are @@ -273,13 +277,13 @@ namespace { Optional CommutePair; }; - /// \brief Helper class to hold a reply for ValueTracker queries. Contains the - /// returned sources for a given search and the instructions where the sources - /// were tracked from. + /// Helper class to hold a reply for ValueTracker queries. + /// Contains the returned sources for a given search and the instructions + /// where the sources were tracked from. class ValueTrackerResult { private: /// Track all sources found by one ValueTracker query. - SmallVector RegSrcs; + SmallVector RegSrcs; /// Instruction using the sources in 'RegSrcs'. const MachineInstr *Inst = nullptr; @@ -302,16 +306,20 @@ namespace { } void addSource(unsigned SrcReg, unsigned SrcSubReg) { - RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); + RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); } void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { assert(Idx < getNumSources() && "Reg pair source out of index"); - RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); + RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); } int getNumSources() const { return RegSrcs.size(); } + RegSubRegPair getSrc(int Idx) const { + return RegSrcs[Idx]; + } + unsigned getSrcReg(int Idx) const { assert(Idx < getNumSources() && "Reg source out of index"); return RegSrcs[Idx].Reg; @@ -367,59 +375,41 @@ namespace { /// The register where the value can be found. unsigned Reg; - /// Specifiy whether or not the value tracking looks through - /// complex instructions. When this is false, the value tracker - /// bails on everything that is not a copy or a bitcast. - /// - /// Note: This could have been implemented as a specialized version of - /// the ValueTracker class but that would have complicated the code of - /// the users of this class. - bool UseAdvancedTracking; - /// MachineRegisterInfo used to perform tracking. const MachineRegisterInfo &MRI; - /// Optional TargetInstrInfo used to perform some complex - /// tracking. + /// Optional TargetInstrInfo used to perform some complex tracking. const TargetInstrInfo *TII; - /// \brief Dispatcher to the right underlying implementation of - /// getNextSource. + /// Dispatcher to the right underlying implementation of getNextSource. ValueTrackerResult getNextSourceImpl(); - /// \brief Specialized version of getNextSource for Copy instructions. + /// Specialized version of getNextSource for Copy instructions. ValueTrackerResult getNextSourceFromCopy(); - /// \brief Specialized version of getNextSource for Bitcast instructions. + /// Specialized version of getNextSource for Bitcast instructions. ValueTrackerResult getNextSourceFromBitcast(); - /// \brief Specialized version of getNextSource for RegSequence - /// instructions. + /// Specialized version of getNextSource for RegSequence instructions. ValueTrackerResult getNextSourceFromRegSequence(); - /// \brief Specialized version of getNextSource for InsertSubreg - /// instructions. + /// Specialized version of getNextSource for InsertSubreg instructions. ValueTrackerResult getNextSourceFromInsertSubreg(); - /// \brief Specialized version of getNextSource for ExtractSubreg - /// instructions. + /// Specialized version of getNextSource for ExtractSubreg instructions. ValueTrackerResult getNextSourceFromExtractSubreg(); - /// \brief Specialized version of getNextSource for SubregToReg - /// instructions. + /// Specialized version of getNextSource for SubregToReg instructions. ValueTrackerResult getNextSourceFromSubregToReg(); - /// \brief Specialized version of getNextSource for PHI instructions. + /// Specialized version of getNextSource for PHI instructions. ValueTrackerResult getNextSourceFromPHI(); public: - /// \brief Create a ValueTracker instance for the value defined by \p Reg. + /// Create a ValueTracker instance for the value defined by \p Reg. /// \p DefSubReg represents the sub register index the value tracker will /// track. It does not need to match the sub register index used in the /// definition of \p Reg. - /// \p UseAdvancedTracking specifies whether or not the value tracker looks - /// through complex instructions. By default (false), it handles only copy - /// and bitcast instructions. /// If \p Reg is a physical register, a value tracker constructed with /// this constructor will not find any alternative source. /// Indeed, when \p Reg is a physical register that constructor does not @@ -427,46 +417,20 @@ namespace { /// Use the next constructor to track a physical register. ValueTracker(unsigned Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI, - bool UseAdvancedTracking = false, const TargetInstrInfo *TII = nullptr) - : DefSubReg(DefSubReg), Reg(Reg), - UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { Def = MRI.getVRegDef(Reg); DefIdx = MRI.def_begin(Reg).getOperandNo(); } } - /// \brief Create a ValueTracker instance for the value defined by - /// the pair \p MI, \p DefIdx. - /// Unlike the other constructor, the value tracker produced by this one - /// may be able to find a new source when the definition is a physical - /// register. - /// This could be useful to rewrite target specific instructions into - /// generic copy instructions. - ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, - const MachineRegisterInfo &MRI, - bool UseAdvancedTracking = false, - const TargetInstrInfo *TII = nullptr) - : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), - UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { - assert(DefIdx < Def->getDesc().getNumDefs() && - Def->getOperand(DefIdx).isReg() && "Invalid definition"); - Reg = Def->getOperand(DefIdx).getReg(); - } - /// \brief Following the use-def chain, get the next available source /// for the tracked value. /// \return A ValueTrackerResult containing a set of registers /// and sub registers with tracked values. A ValueTrackerResult with /// an empty set of registers means no source was found. ValueTrackerResult getNextSource(); - - /// \brief Get the last register where the initial value can be found. - /// Initially this is the register of the definition. - /// Then, after each successful call to getNextSource, this is the - /// register of the last source. - unsigned getReg() const { return Reg; } }; } // end anonymous namespace @@ -476,11 +440,11 @@ char PeepholeOptimizer::ID = 0; char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, - "Peephole Optimizations", false, false) + "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, - "Peephole Optimizations", false, false) + "Peephole Optimizations", false, false) /// If instruction is a copy-like instruction, i.e. it reads a single register /// and writes a single register and it does not modify the source, and if the @@ -491,10 +455,10 @@ INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: -optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, +optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, SmallPtrSetImpl &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; - if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || @@ -535,7 +499,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, bool ExtendLife = true; for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); - if (UseMI == MI) + if (UseMI == &MI) continue; if (UseMI->isPHI()) { @@ -568,7 +532,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { + if (UseMBB == &MBB) { // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); @@ -576,7 +540,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); - } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + } else if (Aggressive && DT->dominates(&MBB, UseMBB)) { // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); @@ -640,19 +604,18 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, /// against already sets (or could be modified to set) the same flag as the /// compare, then we can remove the comparison and use the flag from the /// previous instruction. -bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { +bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; - if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || TargetRegisterInfo::isPhysicalRegister(SrcReg) || (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. - if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++NumCmps; return true; } @@ -661,27 +624,26 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, } /// Optimize a select instruction. -bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, +bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI, SmallPtrSetImpl &LocalMIs) { unsigned TrueOp = 0; unsigned FalseOp = 0; bool Optimizable = false; SmallVector Cond; - if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable)) + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) return false; if (!Optimizable) return false; - if (!TII->optimizeSelect(*MI, LocalMIs)) + if (!TII->optimizeSelect(MI, LocalMIs)) return false; - MI->eraseFromParent(); + MI.eraseFromParent(); ++NumSelects; return true; } -/// \brief Check if a simpler conditional branch can be -/// generated -bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { - return TII->optimizeCondBranch(*MI); +/// Check if a simpler conditional branch can be generated. +bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { + return TII->optimizeCondBranch(MI); } /// \brief Try to find the next source that share the same register file @@ -695,39 +657,37 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { /// share the same register file as \p Reg and \p SubReg. The client should /// then be capable to rewrite all intermediate PHIs to get the next source. /// \return False if no alternative sources are available. True otherwise. -bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, +bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap) { // Do not try to find a new source for a physical register. // So far we do not have any motivating example for doing that. // Thus, instead of maintaining untested code, we will revisit that if // that changes at some point. + unsigned Reg = RegSubReg.Reg; if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); - SmallVector SrcToLook; - TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); + SmallVector SrcToLook; + RegSubRegPair CurSrcPair = RegSubReg; SrcToLook.push_back(CurSrcPair); unsigned PHICount = 0; - while (!SrcToLook.empty() && PHICount < RewritePHILimit) { - TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); + do { + CurSrcPair = SrcToLook.pop_back_val(); // As explained above, do not handle physical registers - if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) + if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) return false; - CurSrcPair = Pair; - ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, - !DisableAdvCopyOpt, TII); - ValueTrackerResult Res; - bool ShouldRewrite = false; + ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); - do { - // Follow the chain of copies until we reach the top of the use-def chain - // or find a more suitable source. - Res = ValTracker.getNextSource(); + // Follow the chain of copies until we find a more suitable source, a phi + // or have to abort. + while (true) { + ValueTrackerResult Res = ValTracker.getNextSource(); + // Abort at the end of a chain (without finding a suitable source). if (!Res.isValid()) - break; + return false; // Insert the Def -> Use entry for the recently found source. ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); @@ -748,14 +708,17 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, unsigned NumSrcs = Res.getNumSources(); if (NumSrcs > 1) { PHICount++; + if (PHICount >= RewritePHILimit) { + DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); + return false; + } + for (unsigned i = 0; i < NumSrcs; ++i) - SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( - Res.getSrcReg(i), Res.getSrcSubReg(i))); + SrcToLook.push_back(Res.getSrc(i)); break; } - CurSrcPair.Reg = Res.getSrcReg(0); - CurSrcPair.SubReg = Res.getSrcSubReg(0); + CurSrcPair = Res.getSrc(0); // Do not extend the live-ranges of physical registers as they add // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, @@ -763,25 +726,21 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) return false; + // Keep following the chain if the value isn't any better yet. const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); - ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, - CurSrcPair.SubReg); - } while (!ShouldRewrite); - - // Continue looking for new sources... - if (Res.isValid()) - continue; + if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC, + CurSrcPair.SubReg)) + continue; - // Do not continue searching for a new source if the there's at least - // one use-def which cannot be rewritten. - if (!ShouldRewrite) - return false; - } + // We currently cannot deal with subreg operands on PHI instructions + // (see insertPHI()). + if (PHICount > 0 && CurSrcPair.SubReg != 0) + continue; - if (PHICount >= RewritePHILimit) { - DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); - return false; - } + // We found a suitable source, and are done with this chain. + break; + } + } while (!SrcToLook.empty()); // If we did not find a more suitable source, there is nothing to optimize. return CurSrcPair.Reg != Reg; @@ -792,51 +751,50 @@ bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, /// successfully traverse a PHI instruction and find suitable sources coming /// from its edges. By inserting a new PHI, we provide a rewritten PHI def /// suitable to be used in a new COPY instruction. -static MachineInstr * -insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - const SmallVectorImpl &SrcRegs, - MachineInstr *OrigPHI) { +static MachineInstr & +insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const SmallVectorImpl &SrcRegs, + MachineInstr &OrigPHI) { assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); - const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); - unsigned NewVR = MRI->createVirtualRegister(NewRC); - MachineBasicBlock *MBB = OrigPHI->getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), - TII->get(TargetOpcode::PHI), NewVR); + const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg); + // NewRC is only correct if no subregisters are involved. findNextSource() + // should have rejected those cases already. + assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); + unsigned NewVR = MRI.createVirtualRegister(NewRC); + MachineBasicBlock *MBB = OrigPHI.getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), + TII.get(TargetOpcode::PHI), NewVR); unsigned MBBOpIdx = 2; - for (auto RegPair : SrcRegs) { + for (const RegSubRegPair &RegPair : SrcRegs) { MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); - MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); + MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB()); // Since we're extended the lifetime of RegPair.Reg, clear the // kill flags to account for that and make RegPair.Reg reaches // the new PHI. - MRI->clearKillFlags(RegPair.Reg); + MRI.clearKillFlags(RegPair.Reg); MBBOpIdx += 2; } - return MIB; + return *MIB; } namespace { -/// \brief Helper class to rewrite the arguments of a copy-like instruction. -class CopyRewriter { +/// Interface to query instructions amenable to copy rewriting. +class Rewriter { protected: - /// The copy-like instruction. MachineInstr &CopyLike; - - /// The index of the source being rewritten. - unsigned CurrentSrcIdx = 0; - + unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: - CopyRewriter(MachineInstr &MI) : CopyLike(MI) {} - virtual ~CopyRewriter() = default; + Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} + virtual ~Rewriter() {} /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and - /// the related value that it affects (TrackReg, TrackSubReg). + /// the related value that it affects (DstReg, DstSubReg). /// A source is considered rewritable if its register class and the - /// register class of the related TrackReg may not be register + /// register class of the related DstReg may not be register /// coalescer friendly. In other words, given a copy-like instruction /// not all the arguments may be returned at rewritable source, since /// some arguments are none to be register coalescer friendly. @@ -851,137 +809,72 @@ class CopyRewriter { /// the only source this instruction has: /// (SrcReg, SrcSubReg) = (src, srcSubIdx). /// This source defines the whole definition, i.e., - /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). + /// (DstReg, DstSubReg) = (dst, dstSubIdx). /// /// The second and subsequent calls will return false, as there is only one /// rewritable source. /// /// \return True if a rewritable source has been found, false otherwise. /// The output arguments are valid if and only if true is returned. - virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, - unsigned &TrackReg, - unsigned &TrackSubReg) { - // If CurrentSrcIdx == 1, this means this function has already been called - // once. CopyLike has one definition and one argument, thus, there is - // nothing else to rewrite. - if (!CopyLike.isCopy() || CurrentSrcIdx == 1) + virtual bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) = 0; + + /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. + /// \return True if the rewriting was possible, false otherwise. + virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0; +}; + +/// Rewriter for COPY instructions. +class CopyRewriter : public Rewriter { +public: + CopyRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isCopy() && "Expected copy instruction"); + } + virtual ~CopyRewriter() = default; + + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // CurrentSrcIdx > 0 means this function has already been called. + if (CurrentSrcIdx > 0) return false; // This is the first call to getNextRewritableSource. // Move the CurrentSrcIdx to remember that we made that call. CurrentSrcIdx = 1; // The rewritable source is the argument. const MachineOperand &MOSrc = CopyLike.getOperand(1); - SrcReg = MOSrc.getReg(); - SrcSubReg = MOSrc.getSubReg(); + Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); // What we track are the alternative sources of the definition. const MachineOperand &MODef = CopyLike.getOperand(0); - TrackReg = MODef.getReg(); - TrackSubReg = MODef.getSubReg(); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); return true; } - /// \brief Rewrite the current source with \p NewReg and \p NewSubReg - /// if possible. - /// \return True if the rewriting was possible, false otherwise. - virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { - if (!CopyLike.isCopy() || CurrentSrcIdx != 1) + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 1) return false; MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); MOSrc.setReg(NewReg); MOSrc.setSubReg(NewSubReg); return true; } - - /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find - /// the new source to use for rewrite. If \p HandleMultipleSources is true and - /// multiple sources for a given \p Def are found along the way, we found a - /// PHI instructions that needs to be rewritten. - /// TODO: HandleMultipleSources should be removed once we test PHI handling - /// with coalescable copies. - TargetInstrInfo::RegSubRegPair - getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap, - bool HandleMultipleSources = true) { - TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); - do { - ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); - // If there are no entries on the map, LookupSrc is the new source. - if (!Res.isValid()) - return LookupSrc; - - // There's only one source for this definition, keep searching... - unsigned NumSrcs = Res.getNumSources(); - if (NumSrcs == 1) { - LookupSrc.Reg = Res.getSrcReg(0); - LookupSrc.SubReg = Res.getSrcSubReg(0); - continue; - } - - // TODO: Remove once multiple srcs w/ coalescable copies are supported. - if (!HandleMultipleSources) - break; - - // Multiple sources, recurse into each source to find a new source - // for it. Then, rewrite the PHI accordingly to its new edges. - SmallVector NewPHISrcs; - for (unsigned i = 0; i < NumSrcs; ++i) { - TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), - Res.getSrcSubReg(i)); - NewPHISrcs.push_back( - getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); - } - - // Build the new PHI node and return its def register as the new source. - MachineInstr *OrigPHI = const_cast(Res.getInst()); - MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); - DEBUG(dbgs() << "-- getNewSource\n"); - DEBUG(dbgs() << " Replacing: " << *OrigPHI); - DEBUG(dbgs() << " With: " << *NewPHI); - const MachineOperand &MODef = NewPHI->getOperand(0); - return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - - } while (true); - - return TargetInstrInfo::RegSubRegPair(0, 0); - } - - /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap - /// and create a new COPY instruction. More info about RewriteMap in - /// PeepholeOptimizer::findNextSource. Right now this is only used to handle - /// Uncoalescable copies, since they are copy like instructions that aren't - /// recognized by the register allocator. - virtual MachineInstr * - RewriteSource(TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap) { - return nullptr; - } }; /// \brief Helper class to rewrite uncoalescable copy like instructions /// into new COPY (coalescable friendly) instructions. -class UncoalescableRewriter : public CopyRewriter { -protected: - const TargetInstrInfo &TII; - MachineRegisterInfo &MRI; - - /// The number of defs in the bitcast - unsigned NumDefs; +class UncoalescableRewriter : public Rewriter { + unsigned NumDefs; ///< Number of defs in the bitcast. public: - UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI) - : CopyRewriter(MI), TII(TII), MRI(MRI) { + UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { NumDefs = MI.getDesc().getNumDefs(); } - /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) + /// \see See Rewriter::getNextRewritableSource() /// All such sources need to be considered rewritable in order to /// rewrite a uncoalescable copy-like instruction. This method return /// each definition that must be checked if rewritable. - bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, - unsigned &TrackReg, - unsigned &TrackSubReg) override { + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { // Find the next non-dead definition and continue from there. if (CurrentSrcIdx == NumDefs) return false; @@ -993,64 +886,27 @@ class UncoalescableRewriter : public CopyRewriter { } // What we track are the alternative sources of the definition. + Src = RegSubRegPair(0, 0); const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); - TrackReg = MODef.getReg(); - TrackSubReg = MODef.getSubReg(); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); CurrentSrcIdx++; return true; } - /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap - /// and create a new COPY instruction. More info about RewriteMap in - /// PeepholeOptimizer::findNextSource. Right now this is only used to handle - /// Uncoalescable copies, since they are copy like instructions that aren't - /// recognized by the register allocator. - MachineInstr * - RewriteSource(TargetInstrInfo::RegSubRegPair Def, - PeepholeOptimizer::RewriteMapTy &RewriteMap) override { - assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && - "We do not rewrite physical registers"); - - // Find the new source to use in the COPY rewrite. - TargetInstrInfo::RegSubRegPair NewSrc = - getNewSource(&MRI, &TII, Def, RewriteMap); - - // Insert the COPY. - const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); - unsigned NewVR = MRI.createVirtualRegister(DefRC); - - MachineInstr *NewCopy = - BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), - TII.get(TargetOpcode::COPY), NewVR) - .addReg(NewSrc.Reg, 0, NewSrc.SubReg); - - NewCopy->getOperand(0).setSubReg(Def.SubReg); - if (Def.SubReg) - NewCopy->getOperand(0).setIsUndef(); - - DEBUG(dbgs() << "-- RewriteSource\n"); - DEBUG(dbgs() << " Replacing: " << CopyLike); - DEBUG(dbgs() << " With: " << *NewCopy); - MRI.replaceRegWith(Def.Reg, NewVR); - MRI.clearKillFlags(NewVR); - - // We extended the lifetime of NewSrc.Reg, clear the kill flags to - // account for that. - MRI.clearKillFlags(NewSrc.Reg); - - return NewCopy; + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + return false; } }; -/// \brief Specialized rewriter for INSERT_SUBREG instruction. -class InsertSubregRewriter : public CopyRewriter { +/// Specialized rewriter for INSERT_SUBREG instruction. +class InsertSubregRewriter : public Rewriter { public: - InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { + InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) { assert(MI.isInsertSubreg() && "Invalid instruction"); } - /// \brief See CopyRewriter::getNextRewritableSource. + /// \see See Rewriter::getNextRewritableSource() /// Here CopyLike has the following form: /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. /// Src1 has the same register class has dst, hence, there is @@ -1058,29 +914,27 @@ class InsertSubregRewriter : public CopyRewriter { /// Src2.src2SubIdx, may not be register coalescer friendly. /// Therefore, the first call to this method returns: /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). - /// (TrackReg, TrackSubReg) = (dst, subIdx). + /// (DstReg, DstSubReg) = (dst, subIdx). /// /// Subsequence calls will return false. - bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, - unsigned &TrackReg, - unsigned &TrackSubReg) override { + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { // If we already get the only source we can rewrite, return false. if (CurrentSrcIdx == 2) return false; // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. CurrentSrcIdx = 2; const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); - SrcReg = MOInsertedReg.getReg(); - SrcSubReg = MOInsertedReg.getSubReg(); + Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg()); const MachineOperand &MODef = CopyLike.getOperand(0); // We want to track something that is compatible with the // partial definition. - TrackReg = MODef.getReg(); if (MODef.getSubReg()) // Bail if we have to compose sub-register indices. return false; - TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); + Dst = RegSubRegPair(MODef.getReg(), + (unsigned)CopyLike.getOperand(3).getImm()); return true; } @@ -1095,41 +949,39 @@ class InsertSubregRewriter : public CopyRewriter { } }; -/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. -class ExtractSubregRewriter : public CopyRewriter { +/// Specialized rewriter for EXTRACT_SUBREG instruction. +class ExtractSubregRewriter : public Rewriter { const TargetInstrInfo &TII; public: ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) - : CopyRewriter(MI), TII(TII) { + : Rewriter(MI), TII(TII) { assert(MI.isExtractSubreg() && "Invalid instruction"); } - /// \brief See CopyRewriter::getNextRewritableSource. + /// \see Rewriter::getNextRewritableSource() /// Here CopyLike has the following form: /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. /// There is only one rewritable source: Src.subIdx, /// which defines dst.dstSubIdx. - bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, - unsigned &TrackReg, - unsigned &TrackSubReg) override { + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { // If we already get the only source we can rewrite, return false. if (CurrentSrcIdx == 1) return false; // We are looking at v1 = EXTRACT_SUBREG v0, sub0. CurrentSrcIdx = 1; const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); - SrcReg = MOExtractedReg.getReg(); // If we have to compose sub-register indices, bail out. if (MOExtractedReg.getSubReg()) return false; - SrcSubReg = CopyLike.getOperand(2).getImm(); + Src = RegSubRegPair(MOExtractedReg.getReg(), + CopyLike.getOperand(2).getImm()); // We want to track something that is compatible with the definition. const MachineOperand &MODef = CopyLike.getOperand(0); - TrackReg = MODef.getReg(); - TrackSubReg = MODef.getSubReg(); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); return true; } @@ -1159,14 +1011,14 @@ class ExtractSubregRewriter : public CopyRewriter { } }; -/// \brief Specialized rewriter for REG_SEQUENCE instruction. -class RegSequenceRewriter : public CopyRewriter { +/// Specialized rewriter for REG_SEQUENCE instruction. +class RegSequenceRewriter : public Rewriter { public: - RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { + RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { assert(MI.isRegSequence() && "Invalid instruction"); } - /// \brief See CopyRewriter::getNextRewritableSource. + /// \see Rewriter::getNextRewritableSource() /// Here CopyLike has the following form: /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. /// Each call will return a different source, walking all the available @@ -1174,17 +1026,16 @@ class RegSequenceRewriter : public CopyRewriter { /// /// The first call returns: /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). - /// (TrackReg, TrackSubReg) = (dst, subIdx1). + /// (DstReg, DstSubReg) = (dst, subIdx1). /// /// The second call returns: /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). - /// (TrackReg, TrackSubReg) = (dst, subIdx2). + /// (DstReg, DstSubReg) = (dst, subIdx2). /// /// And so on, until all the sources have been traversed, then /// it returns false. - bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, - unsigned &TrackReg, - unsigned &TrackSubReg) override { + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. // If this is the first call, move to the first argument. @@ -1197,17 +1048,17 @@ class RegSequenceRewriter : public CopyRewriter { return false; } const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); - SrcReg = MOInsertedReg.getReg(); + Src.Reg = MOInsertedReg.getReg(); // If we have to compose sub-register indices, bail out. - if ((SrcSubReg = MOInsertedReg.getSubReg())) + if ((Src.SubReg = MOInsertedReg.getSubReg())) return false; // We want to track something that is compatible with the related // partial definition. - TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); const MachineOperand &MODef = CopyLike.getOperand(0); - TrackReg = MODef.getReg(); + Dst.Reg = MODef.getReg(); // If we have to compose sub-registers, bail. return MODef.getSubReg() == 0; } @@ -1227,16 +1078,14 @@ class RegSequenceRewriter : public CopyRewriter { } // end anonymous namespace -/// \brief Get the appropriated CopyRewriter for \p MI. -/// \return A pointer to a dynamically allocated CopyRewriter or nullptr -/// if no rewriter works for \p MI. -static CopyRewriter *getCopyRewriter(MachineInstr &MI, - const TargetInstrInfo &TII, - MachineRegisterInfo &MRI) { +/// Get the appropriated Rewriter for \p MI. +/// \return A pointer to a dynamically allocated Rewriter or nullptr if no +/// rewriter works for \p MI. +static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { // Handle uncoalescable copy-like instructions. - if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || - MI.isExtractSubregLike())) - return new UncoalescableRewriter(MI, TII, MRI); + if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike()) + return new UncoalescableRewriter(MI); switch (MI.getOpcode()) { default: @@ -1250,53 +1099,102 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI, case TargetOpcode::REG_SEQUENCE: return new RegSequenceRewriter(MI); } - llvm_unreachable(nullptr); } -/// \brief Optimize generic copy instructions to avoid cross -/// register bank copy. The optimization looks through a chain of -/// copies and tries to find a source that has a compatible register -/// class. -/// Two register classes are considered to be compatible if they share -/// the same register bank. +/// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find +/// the new source to use for rewrite. If \p HandleMultipleSources is true and +/// multiple sources for a given \p Def are found along the way, we found a +/// PHI instructions that needs to be rewritten. +/// TODO: HandleMultipleSources should be removed once we test PHI handling +/// with coalescable copies. +static RegSubRegPair +getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + RegSubRegPair Def, + const PeepholeOptimizer::RewriteMapTy &RewriteMap, + bool HandleMultipleSources = true) { + RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); + while (true) { + ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); + // If there are no entries on the map, LookupSrc is the new source. + if (!Res.isValid()) + return LookupSrc; + + // There's only one source for this definition, keep searching... + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs == 1) { + LookupSrc.Reg = Res.getSrcReg(0); + LookupSrc.SubReg = Res.getSrcSubReg(0); + continue; + } + + // TODO: Remove once multiple srcs w/ coalescable copies are supported. + if (!HandleMultipleSources) + break; + + // Multiple sources, recurse into each source to find a new source + // for it. Then, rewrite the PHI accordingly to its new edges. + SmallVector NewPHISrcs; + for (unsigned i = 0; i < NumSrcs; ++i) { + RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i)); + NewPHISrcs.push_back( + getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); + } + + // Build the new PHI node and return its def register as the new source. + MachineInstr &OrigPHI = const_cast(*Res.getInst()); + MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI); + DEBUG(dbgs() << "-- getNewSource\n"); + DEBUG(dbgs() << " Replacing: " << OrigPHI); + DEBUG(dbgs() << " With: " << NewPHI); + const MachineOperand &MODef = NewPHI.getOperand(0); + return RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + } + + return RegSubRegPair(0, 0); +} + +/// Optimize generic copy instructions to avoid cross register bank copy. +/// The optimization looks through a chain of copies and tries to find a source +/// that has a compatible register class. +/// Two register classes are considered to be compatible if they share the same +/// register bank. /// New copies issued by this optimization are register allocator /// friendly. This optimization does not remove any copy as it may /// overconstrain the register allocator, but replaces some operands /// when possible. /// \pre isCoalescableCopy(*MI) is true. /// \return True, when \p MI has been rewritten. False otherwise. -bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { - assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); - assert(MI->getDesc().getNumDefs() == 1 && +bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { + assert(isCoalescableCopy(MI) && "Invalid argument"); + assert(MI.getDesc().getNumDefs() == 1 && "Coalescer can understand multiple defs?!"); - const MachineOperand &MODef = MI->getOperand(0); + const MachineOperand &MODef = MI.getOperand(0); // Do not rewrite physical definitions. if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) return false; bool Changed = false; // Get the right rewriter for the current copy. - std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + std::unique_ptr CpyRewriter(getCopyRewriter(MI, *TII)); // If none exists, bail out. if (!CpyRewriter) return false; // Rewrite each rewritable source. - unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; - while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, - TrackSubReg)) { + RegSubRegPair Src; + RegSubRegPair TrackPair; + while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) { // Keep track of PHI nodes and its incoming edges when looking for sources. RewriteMapTy RewriteMap; // Try to find a more suitable source. If we failed to do so, or get the // actual source, move to the next source. - if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) + if (!findNextSource(TrackPair, RewriteMap)) continue; // Get the new source to rewrite. TODO: Only enable handling of multiple // sources (PHIs) once we have a motivating example and testcases for it. - TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); - TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( - MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); - if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) + RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap, + /*HandleMultipleSources=*/false); + if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0) continue; // Rewrite source. @@ -1315,6 +1213,47 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { return Changed; } +/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap +/// and create a new COPY instruction. More info about RewriteMap in +/// PeepholeOptimizer::findNextSource. Right now this is only used to handle +/// Uncoalescable copies, since they are copy like instructions that aren't +/// recognized by the register allocator. +MachineInstr & +PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, + RegSubRegPair Def, RewriteMapTy &RewriteMap) { + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + + // Find the new source to use in the COPY rewrite. + RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap); + + // Insert the COPY. + const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); + unsigned NewVReg = MRI->createVirtualRegister(DefRC); + + MachineInstr *NewCopy = + BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg) + .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + + if (Def.SubReg) { + NewCopy->getOperand(0).setSubReg(Def.SubReg); + NewCopy->getOperand(0).setIsUndef(); + } + + DEBUG(dbgs() << "-- RewriteSource\n"); + DEBUG(dbgs() << " Replacing: " << CopyLike); + DEBUG(dbgs() << " With: " << *NewCopy); + MRI->replaceRegWith(Def.Reg, NewVReg); + MRI->clearKillFlags(NewVReg); + + // We extended the lifetime of NewSrc.Reg, clear the kill flags to + // account for that. + MRI->clearKillFlags(NewSrc.Reg); + + return *NewCopy; +} + /// \brief Optimize copy-like instructions to create /// register coalescer friendly instruction. /// The optimization tries to kill-off the \p MI by looking @@ -1327,48 +1266,40 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { /// been removed from its parent. /// All COPY instructions created, are inserted in \p LocalMIs. bool PeepholeOptimizer::optimizeUncoalescableCopy( - MachineInstr *MI, SmallPtrSetImpl &LocalMIs) { - assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); - - // Check if we can rewrite all the values defined by this instruction. - SmallVector RewritePairs; - // Get the right rewriter for the current copy. - std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); - // If none exists, bail out. - if (!CpyRewriter) - return false; + MachineInstr &MI, SmallPtrSetImpl &LocalMIs) { + assert(isUncoalescableCopy(MI) && "Invalid argument"); + UncoalescableRewriter CpyRewriter(MI); // Rewrite each rewritable source by generating new COPYs. This works // differently from optimizeCoalescableCopy since it first makes sure that all // definitions can be rewritten. RewriteMapTy RewriteMap; - unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; - while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, - CopyDefSubReg)) { + RegSubRegPair Src; + RegSubRegPair Def; + SmallVector RewritePairs; + while (CpyRewriter.getNextRewritableSource(Src, Def)) { // If a physical register is here, this is probably for a good reason. // Do not rewrite that. - if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) + if (TargetRegisterInfo::isPhysicalRegister(Def.Reg)) return false; // If we do not know how to rewrite this definition, there is no point // in trying to kill this instruction. - TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); - if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) + if (!findNextSource(Def, RewriteMap)) return false; RewritePairs.push_back(Def); } // The change is possible for all defs, do it. - for (const auto &Def : RewritePairs) { + for (const RegSubRegPair &Def : RewritePairs) { // Rewrite the "copy" in a way the register coalescer understands. - MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); - assert(NewCopy && "Should be able to always generate a new copy"); - LocalMIs.insert(NewCopy); + MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap); + LocalMIs.insert(&NewCopy); } // MI is now dead. - MI->eraseFromParent(); + MI.eraseFromParent(); ++NumUncoalescableCopies; return true; } @@ -1377,18 +1308,18 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy( /// We only fold loads to virtual registers and the virtual register defined /// has a single use. bool PeepholeOptimizer::isLoadFoldable( - MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates) { - if (!MI->canFoldAsLoad() || !MI->mayLoad()) + MachineInstr &MI, SmallSet &FoldAsLoadDefCandidates) { + if (!MI.canFoldAsLoad() || !MI.mayLoad()) return false; - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. - if (!MI->getOperand(0).getSubReg() && + if (!MI.getOperand(0).getSubReg() && TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneNonDBGUse(Reg)) { FoldAsLoadDefCandidates.insert(Reg); @@ -1398,16 +1329,16 @@ bool PeepholeOptimizer::isLoadFoldable( } bool PeepholeOptimizer::isMoveImmediate( - MachineInstr *MI, SmallSet &ImmDefRegs, + MachineInstr &MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MI->isMoveImmediate()) + const MCInstrDesc &MCID = MI.getDesc(); + if (!MI.isMoveImmediate()) return false; if (MCID.getNumDefs() != 1) return false; - unsigned Reg = MI->getOperand(0).getReg(); + unsigned Reg = MI.getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - ImmDefMIs.insert(std::make_pair(Reg, MI)); + ImmDefMIs.insert(std::make_pair(Reg, &MI)); ImmDefRegs.insert(Reg); return true; } @@ -1418,11 +1349,11 @@ bool PeepholeOptimizer::isMoveImmediate( /// Try folding register operands that are defined by move immediate /// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::foldImmediate( - MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, +bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, + SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { - for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isDef()) continue; // Ignore dead implicit defs. @@ -1435,7 +1366,7 @@ bool PeepholeOptimizer::foldImmediate( continue; DenseMap::iterator II = ImmDefMIs.find(Reg); assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); - if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) { + if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) { ++NumImmFold; return true; } @@ -1453,32 +1384,32 @@ bool PeepholeOptimizer::foldImmediate( // only the first copy is considered. // // e.g. -// %vreg1 = COPY %vreg0 -// %vreg2 = COPY %vreg0:sub1 +// %1 = COPY %0 +// %2 = COPY %0:sub1 // -// Should replace %vreg2 uses with %vreg1:sub1 -bool PeepholeOptimizer::foldRedundantCopy( - MachineInstr *MI, SmallSet &CopySrcRegs, +// Should replace %2 uses with %1:sub1 +bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, + SmallSet &CopySrcRegs, DenseMap &CopyMIs) { - assert(MI->isCopy() && "expected a COPY machine instruction"); + assert(MI.isCopy() && "expected a COPY machine instruction"); - unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) return false; - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); if (!TargetRegisterInfo::isVirtualRegister(DstReg)) return false; if (CopySrcRegs.insert(SrcReg).second) { // First copy of this reg seen. - CopyMIs.insert(std::make_pair(SrcReg, MI)); + CopyMIs.insert(std::make_pair(SrcReg, &MI)); return false; } MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + unsigned SrcSubReg = MI.getOperand(1).getSubReg(); unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); // Can't replace different subregister extracts. @@ -1507,32 +1438,31 @@ bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { } bool PeepholeOptimizer::foldRedundantNAPhysCopy( - MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { - assert(MI->isCopy() && "expected a COPY machine instruction"); + MachineInstr &MI, DenseMap &NAPhysToVirtMIs) { + assert(MI.isCopy() && "expected a COPY machine instruction"); if (DisableNAPhysCopyOpt) return false; - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { - // %vreg = COPY %PHYSREG + // %vreg = COPY %physreg // Avoid using a datastructure which can track multiple live non-allocatable // phys->virt copies since LLVM doesn't seem to do this. - NAPhysToVirtMIs.insert({SrcReg, MI}); + NAPhysToVirtMIs.insert({SrcReg, &MI}); return false; } if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) return false; - // %PHYSREG = COPY %vreg + // %physreg = COPY %vreg auto PrevCopy = NAPhysToVirtMIs.find(DstReg); if (PrevCopy == NAPhysToVirtMIs.end()) { // We can't remove the copy: there was an intervening clobber of the // non-allocatable physical register after the copy to virtual. - DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI - << '\n'); + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << MI); return false; } @@ -1540,7 +1470,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. - DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); ++NumNAPhysCopies; return true; } @@ -1549,7 +1479,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( // register get a copy of the non-allocatable physical register, and we only // track one such copy. Avoid getting confused by this new non-allocatable // physical register definition, and remove it from the tracked copies. - DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); NAPhysToVirtMIs.erase(PrevCopy); return false; } @@ -1614,23 +1544,23 @@ bool PeepholeOptimizer::findTargetRecurrence( return false; } -/// \brief Phi instructions will eventually be lowered to copy instructions. If -/// phi is in a loop header, a recurrence may formulated around the source and -/// destination of the phi. For such case commuting operands of the instructions -/// in the recurrence may enable coalescing of the copy instruction generated -/// from the phi. For example, if there is a recurrence of +/// Phi instructions will eventually be lowered to copy instructions. +/// If phi is in a loop header, a recurrence may formulated around the source +/// and destination of the phi. For such case commuting operands of the +/// instructions in the recurrence may enable coalescing of the copy instruction +/// generated from the phi. For example, if there is a recurrence of /// /// LoopHeader: -/// %vreg1 = phi(%vreg0, %vreg100) +/// %1 = phi(%0, %100) /// LoopLatch: -/// %vreg0 = ADD %vreg2, %vreg1 +/// %0 = ADD %2, %1 /// -/// , the fact that vreg0 and vreg2 are in the same tied operands set makes +/// , the fact that %0 and %2 are in the same tied operands set makes /// the coalescing of copy instruction generated from the phi in -/// LoopHeader(i.e. %vreg1 = COPY %vreg0) impossible, because %vreg1 and -/// %vreg2 have overlapping live range. This introduces additional move -/// instruction to the final assembly. However, if we commute %vreg2 and -/// %vreg1 of ADD instruction, the redundant move instruction can be +/// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and +/// %2 have overlapping live range. This introduces additional move +/// instruction to the final assembly. However, if we commute %2 and +/// %1 of ADD instruction, the redundant move instruction can be /// avoided. bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { SmallSet TargetRegs; @@ -1662,7 +1592,7 @@ bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) { } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); @@ -1696,8 +1626,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // Track when a non-allocatable physical register is copied to a virtual // register so that useless moves can be removed. // - // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` - // without any intervening re-definition of %PHYSREG. + // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg` + // without any intervening re-definition of %physreg. DenseMap NAPhysToVirtMIs; // Set of virtual registers that are copied from. @@ -1728,27 +1658,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { } if (!MI->isCopy()) { - for (const auto &Op : MI->operands()) { + for (const MachineOperand &MO : MI->operands()) { // Visit all operands: definitions can be implicit or explicit. - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - if (Op.isDef() && isNAPhysCopy(Reg)) { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (MO.isDef() && isNAPhysCopy(Reg)) { const auto &Def = NAPhysToVirtMIs.find(Reg); if (Def != NAPhysToVirtMIs.end()) { // A new definition of the non-allocatable physical register // invalidates previous copies. - DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI - << '\n'); + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); NAPhysToVirtMIs.erase(Def); } } - } else if (Op.isRegMask()) { - const uint32_t *RegMask = Op.getRegMask(); + } else if (MO.isRegMask()) { + const uint32_t *RegMask = MO.getRegMask(); for (auto &RegMI : NAPhysToVirtMIs) { unsigned Def = RegMI.first; if (MachineOperand::clobbersPhysReg(RegMask, Def)) { - DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI - << '\n'); + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); NAPhysToVirtMIs.erase(Def); } } @@ -1764,58 +1692,57 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // don't know what's correct anymore. // // FIXME: handle explicit asm clobbers. - DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI - << '\n'); + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI); NAPhysToVirtMIs.clear(); } if ((isUncoalescableCopy(*MI) && - optimizeUncoalescableCopy(MI, LocalMIs)) || - (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) || - (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { + optimizeUncoalescableCopy(*MI, LocalMIs)) || + (MI->isCompare() && optimizeCmpInstr(*MI)) || + (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } - if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { + if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) { Changed = true; continue; } - if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { + if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) { // MI is just rewritten. Changed = true; continue; } if (MI->isCopy() && - (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || - foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { + (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); MI->eraseFromParent(); Changed = true; continue; } - if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { + if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { - Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); + Changed |= optimizeExtInstr(*MI, MBB, LocalMIs); // optimizeExtInstr might have created new instructions after MI // and before the already incremented MII. Adjust MII so that the // next iteration sees the new instructions. MII = MI; ++MII; if (SeenMoveImm) - Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); + Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs); } // Check whether MI is a load candidate for folding into a later // instruction. If MI is not a candidate, check whether we can fold an // earlier load into MI. - if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && + if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) && !FoldAsLoadDefCandidates.empty()) { // We visit each operand even after successfully folding a previous @@ -1864,7 +1791,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // the load candidates. Note: We might be able to fold *into* this // instruction, so this needs to be after the folding logic. if (MI->isLoadFoldBarrier()) { - DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); + DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); FoldAsLoadDefCandidates.clear(); } } @@ -1885,6 +1812,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() { return ValueTrackerResult(); // Otherwise, we want the whole source. const MachineOperand &Src = Def->getOperand(1); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1928,6 +1857,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { } const MachineOperand &Src = Def->getOperand(SrcIdx); + if (Src.isUndef()) + return ValueTrackerResult(); return ValueTrackerResult(Src.getReg(), Src.getSubReg()); } @@ -1957,14 +1888,14 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { // duplicate the code from the generic TII. return ValueTrackerResult(); - SmallVector RegSeqInputRegs; + SmallVector RegSeqInputRegs; if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) return ValueTrackerResult(); // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... // Check if one of the operand defines the subreg we are interested in. - for (auto &RegSeqInput : RegSeqInputRegs) { + for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { if (RegSeqInput.SubIdx == DefSubReg) { if (RegSeqInput.SubReg) // Bail if we have to compose sub registers. @@ -1995,8 +1926,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { // duplicate the code from the generic TII. return ValueTrackerResult(); - TargetInstrInfo::RegSubRegPair BaseReg; - TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; + RegSubRegPair BaseReg; + RegSubRegPairAndIdx InsertedReg; if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) return ValueTrackerResult(); @@ -2049,7 +1980,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { // duplicate the code from the generic TII. return ValueTrackerResult(); - TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; + RegSubRegPairAndIdx ExtractSubregInputReg; if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) return ValueTrackerResult(); @@ -2082,7 +2013,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { Def->getOperand(3).getImm()); } -/// \brief Explore each PHI incoming operand and return its sources +/// Explore each PHI incoming operand and return its sources. ValueTrackerResult ValueTracker::getNextSourceFromPHI() { assert(Def->isPHI() && "Invalid definition"); ValueTrackerResult Res; @@ -2094,8 +2025,12 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() { // Return all register sources for PHI instructions. for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { - auto &MO = Def->getOperand(i); + const MachineOperand &MO = Def->getOperand(i); assert(MO.isReg() && "Invalid PHI instruction"); + // We have no code to deal with undef operands. They shouldn't happen in + // normal programs anyway. + if (MO.isUndef()) + return ValueTrackerResult(); Res.addSource(MO.getReg(), MO.getSubReg()); } @@ -2116,7 +2051,7 @@ ValueTrackerResult ValueTracker::getNextSourceImpl() { return getNextSourceFromBitcast(); // All the remaining cases involve "complex" instructions. // Bail if we did not ask for the advanced tracking. - if (!UseAdvancedTracking) + if (DisableAdvCopyOpt) return ValueTrackerResult(); if (Def->isRegSequence() || Def->isRegSequenceLike()) return getNextSourceFromRegSequence(); @@ -2152,9 +2087,14 @@ ValueTrackerResult ValueTracker::getNextSource() { // If we can still move up in the use-def chain, move to the next // definition. if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { - Def = MRI.getVRegDef(Reg); - DefIdx = MRI.def_begin(Reg).getOperandNo(); - DefSubReg = Res.getSrcSubReg(0); + MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg); + if (DI != MRI.def_end()) { + Def = DI->getParent(); + DefIdx = DI.getOperandNo(); + DefSubReg = Res.getSrcSubReg(0); + } else { + Def = nullptr; + } return Res; } } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 673dc37904fe..5d86faafdd85 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -280,7 +279,7 @@ bool PostRAScheduler::enablePostRAScheduler( } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; TII = Fn.getSubtarget().getInstrInfo(); @@ -322,8 +321,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getName() - << ":BB#" << MBB.getNumber() << " ***\n"; + dbgs() << "*** DEBUG scheduling " << Fn.getName() << ":" + << printMBBReference(MBB) << " ***\n"; } #endif diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 7fbf7ddde0b6..48b48c5f6499 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -154,7 +154,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { if (WorkList.empty()) continue; - DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + DEBUG(dbgs() << printMBBReference(*MFI) << " has " << WorkList.size() << " implicit defs.\n"); Changed = true; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 57c27550e064..a8d8ad8ac7dc 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -171,7 +171,7 @@ using StackObjSet = SmallSetVector; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. bool PEI::runOnMachineFunction(MachineFunction &Fn) { - const Function* F = Fn.getFunction(); + const Function &F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -206,7 +206,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->hasFnAttribute(Attribute::Naked)) + if (!F.hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -224,8 +224,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { MachineFrameInfo &MFI = Fn.getFrameInfo(); uint64_t StackSize = MFI.getStackSize(); if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { - DiagnosticInfoStackSize DiagStackSize(*F, StackSize); - F->getContext().diagnose(DiagStackSize); + DiagnosticInfoStackSize DiagStackSize(F, StackSize); + F.getContext().diagnose(DiagStackSize); } delete RS; @@ -508,7 +508,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { assert(Fn.getProperties().hasProperty( MachineFunctionProperties::Property::NoVRegs)); - const Function *F = Fn.getFunction(); + const Function &F = Fn.getFunction(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = Fn.getFrameInfo(); MinCSFrameIndex = std::numeric_limits::max(); @@ -522,7 +522,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &Fn) { assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); // Add the code to save and restore the callee saved registers. - if (!F->hasFnAttribute(Attribute::Naked)) { + if (!F.hasFnAttribute(Attribute::Naked)) { MFI.setCalleeSavedInfoValid(true); std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -952,7 +952,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { ORE->emit([&]() { return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", - Fn.getFunction()->getSubprogram(), + Fn.getFunction().getSubprogram(), &Fn.front()) << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; }); @@ -993,7 +993,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // approach is rather similar to that of Segmented Stacks, but it uses a // different conditional check and another BIF for allocating more stack // space. - if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + if (Fn.getFunction().getCallingConv() == CallingConv::HiPE) for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForHiPEPrologue(Fn, *SaveBlock); } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index 8f19e432ab79..3318e109155b 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -33,7 +33,7 @@ It also increase the likelihood the store may become dead. bb27 ... ... %reg1037 = ADDri %reg1039, 1 - %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10 + %reg1038 = ADDrs %reg1032, %reg1039, %noreg, 10 Successors according to CFG: 0x8b03bf0 (#5) bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5): @@ -164,7 +164,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo. Stack coloring improvements: -1. Do proper LiveStackAnalysis on all stack objects including those which are +1. Do proper LiveStacks analysis on all stack objects including those which are not spill slots. 2. Reorder objects to fill in gaps between objects. e.g. 4, 1, , 4, 1, 1, 1, , 4 => 4, 1, 1, 1, 1, 4, 4 diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp new file mode 100644 index 000000000000..6b131b250be7 --- /dev/null +++ b/lib/CodeGen/ReachingDefAnalysis.cpp @@ -0,0 +1,195 @@ +//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReachingDefAnalysis.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "reaching-deps-analysis" + +char ReachingDefAnalysis::ID = 0; +INITIALIZE_PASS(ReachingDefAnalysis, DEBUG_TYPE, "ReachingDefAnalysis", false, + true) + +void ReachingDefAnalysis::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + unsigned MBBNumber = MBB->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + MBBReachingDefs[MBBNumber].resize(NumRegUnits); + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + // Default values are 'nothing happened a long time ago'. + if (LiveRegs.empty()) + LiveRegs.assign(NumRegUnits, ReachingDedDefaultVal); + + // This is the entry block. + if (MBB->pred_empty()) { + for (const auto &LI : MBB->liveins()) { + for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) { + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[*Unit] = -1; + MBBReachingDefs[MBBNumber][*Unit].push_back(LiveRegs[*Unit]); + } + } + DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock *pred : MBB->predecessors()) { + assert(unsigned(pred->getNumber()) < MBBOutRegsInfos.size() && + "Should have pre-allocated MBBInfos for all MBBs"); + const LiveRegsDefInfo &Incoming = MBBOutRegsInfos[pred->getNumber()]; + // Incoming is null if this is a backedge from a BB + // we haven't processed yet + if (Incoming.empty()) + continue; + + for (unsigned Unit = 0; Unit != NumRegUnits; ++Unit) { + // Use the most recent predecessor def for each register. + LiveRegs[Unit] = std::max(LiveRegs[Unit], Incoming[Unit]); + if ((LiveRegs[Unit] != ReachingDedDefaultVal)) + MBBReachingDefs[MBBNumber][Unit].push_back(LiveRegs[Unit]); + } + } + + DEBUG(dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" + : ": all preds known\n")); +} + +void ReachingDefAnalysis::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + assert(!LiveRegs.empty() && "Must enter basic block first."); + unsigned MBBNumber = TraversedMBB.MBB->getNumber(); + assert(MBBNumber < MBBOutRegsInfos.size() && + "Unexpected basic block number."); + // Save register clearances at end of MBB - used by enterBasicBlock(). + MBBOutRegsInfos[MBBNumber] = LiveRegs; + + // While processing the basic block, we kept `Def` relative to the start + // of the basic block for convenience. However, future use of this information + // only cares about the clearance from the end of the block, so adjust + // everything to be relative to the end of the basic block. + for (int &OutLiveReg : MBBOutRegsInfos[MBBNumber]) + OutLiveReg -= CurInstr; + LiveRegs.clear(); +} + +void ReachingDefAnalysis::processDefs(MachineInstr *MI) { + assert(!MI->isDebugValue() && "Won't process debug values"); + + unsigned MBBNumber = MI->getParent()->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + for (MCRegUnitIterator Unit(MO.getReg(), TRI); Unit.isValid(); ++Unit) { + // This instruction explicitly defines the current reg unit. + DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr << '\t' + << *MI); + + // How many instructions since this reg unit was last written? + LiveRegs[*Unit] = CurInstr; + MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr); + } + } + InstIds[MI] = CurInstr; + ++CurInstr; +} + +void ReachingDefAnalysis::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); + for (MachineInstr &MI : *TraversedMBB.MBB) { + if (!MI.isDebugValue()) + processDefs(&MI); + } + leaveBasicBlock(TraversedMBB); +} + +bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TRI = MF->getSubtarget().getRegisterInfo(); + + LiveRegs.clear(); + NumRegUnits = TRI->getNumRegUnits(); + + MBBReachingDefs.resize(mf.getNumBlockIDs()); + + DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + + // Initialize the MBBOutRegsInfos + MBBOutRegsInfos.resize(mf.getNumBlockIDs()); + + // Traverse the basic blocks. + LoopTraversal Traversal; + LoopTraversal::TraversalOrder TraversedMBBOrder = Traversal.traverse(mf); + for (LoopTraversal::TraversedMBBInfo TraversedMBB : TraversedMBBOrder) { + processBasicBlock(TraversedMBB); + } + + // Sorting all reaching defs found for a ceartin reg unit in a given BB. + for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { + for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) + std::sort(RegUnitDefs.begin(), RegUnitDefs.end()); + } + + return false; +} + +void ReachingDefAnalysis::releaseMemory() { + // Clear the internal vectors. + MBBOutRegsInfos.clear(); + MBBReachingDefs.clear(); + InstIds.clear(); +} + +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { + assert(InstIds.count(MI) && "Unexpected machine instuction."); + int InstId = InstIds[MI]; + int DefRes = ReachingDedDefaultVal; + unsigned MBBNumber = MI->getParent()->getNumber(); + assert(MBBNumber < MBBReachingDefs.size() && + "Unexpected basic block number."); + int LatestDef = ReachingDedDefaultVal; + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + for (int Def : MBBReachingDefs[MBBNumber][*Unit]) { + if (Def >= InstId) + break; + DefRes = Def; + } + LatestDef = std::max(LatestDef, DefRes); + } + return LatestDef; +} + +int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { + assert(InstIds.count(MI) && "Unexpected machine instuction."); + return InstIds[MI] - getReachingDef(MI, PhysReg); +} diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 6b67fd85667f..74c1592634aa 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -40,8 +40,8 @@ STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside // MachineVerifier. static cl::opt -VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), - cl::desc("Verify during register allocation")); + VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::Hidden, cl::desc("Verify during register allocation")); const char RegAllocBase::TimerGroupName[] = "regalloc"; const char RegAllocBase::TimerGroupDescription[] = "Register Allocation"; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index b38373d10499..1125d2c62bef 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -18,10 +18,10 @@ #include "Spiller.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -219,8 +219,8 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Intfs.push_back(Intf); } } - DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << - " interferences with " << VirtReg << "\n"); + DEBUG(dbgs() << "spilling " << printReg(PhysReg, TRI) + << " interferences with " << VirtReg << "\n"); assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 9da881005b57..6a5282cbbbff 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MCInstrDesc.h" @@ -272,7 +271,7 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) { // subreg of this register and given we don't track which // lanes are actually dead, we cannot insert a kill flag here. // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, physreg + // ... = (MO) physreg:sub1, implicit killed physreg // ... <== Here we would allow later pass to reuse physreg:sub1 // which is potentially wrong. // LR:sub0 = ... @@ -675,7 +674,7 @@ RegAllocFast::LiveRegMap::iterator RegAllocFast::reloadVirtReg(MachineInstr &MI, } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the // register would be killed immediately, and there might be a second use: - // %foo = OR %x, %x + // %foo = OR killed %x, %x // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); @@ -699,11 +698,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, unsigned OpNum, bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); + MO.setIsRenamableIfNoExtraRegAllocReq(); return MO.isKill() || Dead; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setIsRenamableIfNoExtraRegAllocReq(); MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super @@ -813,7 +814,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI, void RegAllocFast::dumpState() { for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << TRI->getName(Reg); + dbgs() << " " << printReg(Reg, TRI); switch(PhysRegState[Reg]) { case regFree: break; diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 39676fed3d0b..e4801c48efde 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -35,11 +35,11 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" @@ -105,10 +105,11 @@ static cl::opt LastChanceRecoloringMaxInterference( " interference at a time"), cl::init(8)); -static cl::opt -ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, - cl::desc("Exhaustive Search for registers bypassing the depth " - "and interference cutoffs of last chance recoloring")); +static cl::opt ExhaustiveSearch( + "exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring"), + cl::Hidden); static cl::opt EnableLocalReassignment( "enable-local-reassign", cl::Hidden, @@ -398,7 +399,7 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; - /// Enable or not the the consideration of the cost of local intervals created + /// Enable or not the consideration of the cost of local intervals created /// by a split candidate when choosing the best split candidate. bool EnableAdvancedRASplitCost; @@ -1396,30 +1397,30 @@ BlockFrequency RAGreedy::calcSpillCost() { /// Such sequences are created in 2 scenarios: /// /// Scenario #1: -/// vreg0 is evicted from physreg0 by vreg1. -/// Evictee vreg0 is intended for region splitting with split candidate -/// physreg0 (the reg vreg0 was evicted from). +/// %0 is evicted from physreg0 by %1. +/// Evictee %0 is intended for region splitting with split candidate +/// physreg0 (the reg %0 was evicted from). /// Region splitting creates a local interval because of interference with the -/// evictor vreg1 (normally region spliitting creates 2 interval, the "by reg" +/// evictor %1 (normally region spliitting creates 2 interval, the "by reg" /// and "by stack" intervals and local interval created when interference /// occurs). -/// One of the split intervals ends up evicting vreg2 from physreg1. -/// Evictee vreg2 is intended for region splitting with split candidate +/// One of the split intervals ends up evicting %2 from physreg1. +/// Evictee %2 is intended for region splitting with split candidate /// physreg1. -/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// One of the split intervals ends up evicting %3 from physreg2, etc. /// /// Scenario #2 -/// vreg0 is evicted from physreg0 by vreg1. -/// vreg2 is evicted from physreg2 by vreg3 etc. -/// Evictee vreg0 is intended for region splitting with split candidate +/// %0 is evicted from physreg0 by %1. +/// %2 is evicted from physreg2 by %3 etc. +/// Evictee %0 is intended for region splitting with split candidate /// physreg1. /// Region splitting creates a local interval because of interference with the -/// evictor vreg1. -/// One of the split intervals ends up evicting back original evictor vreg1 -/// from physreg0 (the reg vreg0 was evicted from). -/// Another evictee vreg2 is intended for region splitting with split candidate +/// evictor %1. +/// One of the split intervals ends up evicting back original evictor %1 +/// from physreg0 (the reg %0 was evicted from). +/// Another evictee %2 is intended for region splitting with split candidate /// physreg1. -/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// One of the split intervals ends up evicting %3 from physreg2, etc. /// /// \param Evictee The register considered to be split. /// \param Cand The split candidate that determines the physical register @@ -1447,7 +1448,7 @@ bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); - // The bad eviction chain occurs when either the split candidate the the + // The bad eviction chain occurs when either the split candidate the // evited reg or one of the split artifact will evict the evicting reg. if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) return false; @@ -1611,7 +1612,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // Create separate intervals for isolated blocks with multiple uses. if (!IntvIn && !IntvOut) { - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n"); if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) SE->splitSingleBlock(BI); continue; @@ -2641,7 +2642,7 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { CutOffInfo = CO_None; - LLVMContext &Ctx = MF->getFunction()->getContext(); + LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); if (Reg == ~0U && (CutOffInfo != CO_None)) { diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 5fa25d43e420..69a879701fae 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -43,9 +43,9 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); std::string FullyQualifiedName = F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp index 3aaa5a4738d5..f49ea25bbf35 100644 --- a/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/lib/CodeGen/RegUsageInfoCollector.cpp @@ -95,7 +95,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; RegMask.resize(RegMaskSize, 0xFFFFFFFF); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); @@ -127,7 +127,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, F->getCallingConv()); + TRI->getCallPreservedMask(MF, F.getCallingConv()); if (CallPreservedMask) { // Set callee saved register as preserved. for (unsigned i = 0; i < RegMaskSize; ++i) @@ -141,11 +141,11 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) - DEBUG(dbgs() << TRI->getName(PReg) << " "); + DEBUG(dbgs() << printReg(PReg, TRI) << " "); DEBUG(dbgs() << " \n----------------------------------------\n"); - PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask)); + PRUI->storeUpdateRegUsageInfo(&F, std::move(RegMask)); return false; } diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp index f6d45067816a..5b12d00e126f 100644 --- a/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -102,7 +102,7 @@ static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { } bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { - const Module *M = MF.getFunction()->getParent(); + const Module *M = MF.getFunction().getParent(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 18f9ab4ae5f9..00a2e93c71ca 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,10 +70,9 @@ STATISTIC(NumInflated , "Number of register classes inflated"); STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); -static cl::opt -EnableJoining("join-liveintervals", - cl::desc("Coalesce copies (default=true)"), - cl::init(true)); +static cl::opt EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true), cl::Hidden); static cl::opt UseTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"), @@ -228,9 +227,9 @@ namespace { /// flag. /// This can happen when undef uses were previously concealed by a copy /// which we coalesced. Example: - /// %vreg0:sub0 = ... - /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef - /// = use %vreg1:sub1 <-- hidden undef use + /// %0:sub0 = ... + /// %1 = COPY %0 <-- Coalescing COPY reveals undef + /// = use %1:sub1 <-- hidden undef use void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, MachineOperand &MO, unsigned SubRegIdx); @@ -668,7 +667,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // its other operand is coalesced to the copy dest register, see if we can // transform the copy into a noop by commuting the definition. For example, // - // A3 = op A2 B0 + // A3 = op A2 killed B0 // ... // B1 = A3 <- this copy // ... @@ -676,7 +675,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // // ==> // - // B2 = op B0 A2 + // B2 = op B0 killed A2 // ... // B1 = B2 <- now an identity copy // ... @@ -769,7 +768,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // ... // B = A // ... - // C = A + // C = killed A // ... // = B @@ -992,8 +991,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // Now ok to move copy. if (CopyLeftBB) { - DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#" - << CopyLeftBB->getNumber() << '\t' << CopyMI); + DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " + << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); // Insert new copy to CopyLeftBB. auto InsPos = CopyLeftBB->getFirstTerminator(); @@ -1011,8 +1010,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, // the deleted list. ErasedInstrs.erase(NewCopyMI); } else { - DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#" - << MBB.getNumber() << '\t' << CopyMI); + DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " + << printMBBReference(MBB) << '\t' << CopyMI); } // Remove CopyMI. @@ -1143,10 +1142,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewMI.setDebugLoc(DL); // In a situation like the following: - // %vreg0:subreg = instr ; DefMI, subreg = DstIdx - // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 - // instead of widening %vreg1 to the register class of %vreg0 simply do: - // %vreg1 = instr + // %0:subreg = instr ; DefMI, subreg = DstIdx + // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %1 to the register class of %0 simply do: + // %1 = instr const TargetRegisterClass *NewRC = CP.getNewRC(); if (DstIdx != 0) { MachineOperand &DefMO = NewMI.getOperand(0); @@ -1226,12 +1225,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // This could happen if the rematerialization instruction is rematerializing // more than actually is used in the register. // An example would be: - // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs + // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs // ; Copying only part of the register here, but the rest is undef. - // vreg2:sub_16bit = COPY vreg1:sub_16bit + // %2:sub_16bit = COPY %1:sub_16bit // ==> // ; Materialize all the constants but only using one - // vreg2 = LOAD_CONSTANTS 5, 8 + // %2 = LOAD_CONSTANTS 5, 8 // // at this point for the part that wasn't defined before we could have // subranges missing the definition. @@ -1254,11 +1253,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Make sure that the subrange for resultant undef is removed // For example: - // vreg1:sub1 = LOAD CONSTANT 1 - // vreg2 = COPY vreg1 + // %1:sub1 = LOAD CONSTANT 1 + // %2 = COPY %1 // ==> - // vreg2:sub1 = LOAD CONSTANT 1 - // ; Correct but need to remove the subrange for vreg2:sub0 + // %2:sub1 = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for %2:sub0 // ; as it is now undef if (NewIdx != 0 && DstInt.hasSubRanges()) { // The affected subregister segments can be removed. @@ -1292,15 +1291,15 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // Otherwise, variables that live through may miss some // interferences, thus creating invalid allocation. // E.g., i386 code: - // vreg1 = somedef ; vreg1 GR8 - // vreg2 = remat ; vreg2 GR32 - // CL = COPY vreg2.sub_8bit - // = somedef vreg1 ; vreg1 GR8 + // %1 = somedef ; %1 GR8 + // %2 = remat ; %2 GR32 + // CL = COPY %2.sub_8bit + // = somedef %1 ; %1 GR8 // => - // vreg1 = somedef ; vreg1 GR8 - // ECX = remat ; CL - // = somedef vreg1 ; vreg1 GR8 - // vreg1 will see the inteferences with CL but not with CH since + // %1 = somedef ; %1 GR8 + // dead ECX = remat ; implicit-def CL + // = somedef %1 ; %1 GR8 + // %1 will see the inteferences with CL but not with CH since // no live-ranges would have been created for ECX. // Fix that! SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); @@ -1353,9 +1352,9 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { // ProcessImpicitDefs may leave some copies of values, it only removes // local variables. When we have a copy like: // - // %vreg1 = COPY %vreg2 + // %1 = COPY undef %2 // - // We delete the copy and remove the corresponding value number from %vreg1. + // We delete the copy and remove the corresponding value number from %1. // Any uses of that value number are marked as . // Note that we do not query CoalescerPair here but redo isMoveInstr as the @@ -1820,20 +1819,20 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { MachineInstr *CopyMI; if (CP.isFlipped()) { // Physreg is copied into vreg - // %vregY = COPY %X - // ... //< no other def of %X here - // use %vregY + // %y = COPY %physreg_x + // ... //< no other def of %x here + // use %y // => // ... - // use %X + // use %x CopyMI = MRI->getVRegDef(SrcReg); } else { // VReg is copied into physreg: - // %vregX = def - // ... //< no other def or use of %Y here - // %Y = COPY %vregX + // %y = def + // ... //< no other def or use of %y here + // %y = COPY %physreg_x // => - // %Y = def + // %y = def // ... if (!MRI->hasOneNonDBGUse(SrcReg)) { DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); @@ -1928,7 +1927,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // // %dst:ssub0 = FOO // %src = BAR -// %dst:ssub1 = COPY %src +// %dst:ssub1 = COPY %src // // The live range of %src overlaps the %dst value defined by FOO, but // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane @@ -1943,9 +1942,9 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // is live, but never read. This can happen because we don't compute // individual live ranges per lane. // -// %dst = FOO +// %dst = FOO // %src = BAR -// %dst:ssub1 = COPY %src +// %dst:ssub1 = COPY %src // // This kind of interference is only resolved locally. If the clobbered // lane value escapes the block, the join is aborted. @@ -2288,7 +2287,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // // This adds ssub1 to the set of valid lanes in %src: // - // %src:ssub1 = FOO + // %src:ssub1 = FOO // // This leaves only ssub1 valid, making any other lanes undef: // @@ -2377,7 +2376,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { if (OtherV.ErasableImplicitDef && DefMI && DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def - << " extends into BB#" << DefMI->getParent()->getNumber() + << " extends into " << printMBBReference(*DefMI->getParent()) << ", keeping it.\n"); OtherV.ErasableImplicitDef = false; } @@ -2426,9 +2425,9 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // // 1 %dst:ssub0 = FOO <-- OtherVNI // 2 %src = BAR <-- VNI - // 3 %dst:ssub1 = COPY %src <-- Eliminate this copy. - // 4 BAZ %dst - // 5 QUUX %src + // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy. + // 4 BAZ killed %dst + // 5 QUUX killed %src // // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace // handles this complex value mapping. @@ -2438,7 +2437,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // If the other live range is killed by DefMI and the live ranges are still // overlapping, it must be because we're looking at an early clobber def: // - // %dst = ASM %src + // %dst = ASM killed %src // // In this case, it is illegal to merge the two live ranges since the early // clobber def would clobber %src before it was read. @@ -2683,7 +2682,7 @@ void JoinVals::pruneValues(JoinVals &Other, if (!Def.isBlock()) { if (changeInstrs) { // Remove flags. This def is now a partial redef. - // Also remove flags since the joined live range will + // Also remove dead flags since the joined live range will // continue past this instruction. for (MachineOperand &MO : Indexes->getInstructionFromIndex(Def)->operands()) { diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index b5c97fe77e1e..bc1af1594c20 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -587,7 +587,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, for (auto I = Defs.begin(); I != Defs.end(); ) { LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot()); - // If the the def is all that is live after the instruction, then in case + // If the def is all that is live after the instruction, then in case // of a subregister def we need a read-undef flag. unsigned RegUnit = I->RegUnit; if (TargetRegisterInfo::isVirtualRegister(RegUnit) && diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 758a81fa5f73..97967124add6 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -213,7 +213,7 @@ void RegScavenger::forward() { continue; if (!isRegUsed(Reg)) { // Check if it's partial live: e.g. - // D0 = insert_subreg D0, S0 + // D0 = insert_subreg undef D0, S0 // ... D0 // The problem is the insert_subreg could be eliminated. The use of // D0 is using a partially undef value. This is not *incorrect* since @@ -288,8 +288,8 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (unsigned Reg : *RC) { if (!isRegUsed(Reg)) { - DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) << - "\n"); + DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI) + << "\n"); return Reg; } } @@ -561,15 +561,15 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // If we found an unused register there is no reason to spill it. if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n"); return SReg; } ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); Scavenged.Restore = &*std::prev(UseMI); - DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << - "\n"); + DEBUG(dbgs() << "Scavenged register (with spill): " << printReg(SReg, TRI) + << "\n"); return SReg; } @@ -599,7 +599,7 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, Scavenged.Restore = &*std::prev(SpillBefore); LiveUnits.removeReg(Reg); DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI) - << " until " << *SpillBefore); + << " until " << *SpillBefore); } else { DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI) << '\n'); } diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index fa74d4185299..4e42deb406e1 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -97,7 +97,7 @@ void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg)) - OS << TRI->getName(PReg) << " "; + OS << printReg(PReg, TRI) << " "; } OS << "\n"; } diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 72b7960f327b..1e1f36a35ecc 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -10,27 +10,27 @@ /// Rename independent subregisters looks for virtual registers with /// independently used subregisters and renames them to new virtual registers. /// Example: In the following: -/// %vreg0:sub0 = ... -/// %vreg0:sub1 = ... -/// use %vreg0:sub0 -/// %vreg0:sub0 = ... -/// use %vreg0:sub0 -/// use %vreg0:sub1 +/// %0:sub0 = ... +/// %0:sub1 = ... +/// use %0:sub0 +/// %0:sub0 = ... +/// use %0:sub0 +/// use %0:sub1 /// sub0 and sub1 are never used together, and we have two independent sub0 /// definitions. This pass will rename to: -/// %vreg0:sub0 = ... -/// %vreg1:sub1 = ... -/// use %vreg1:sub1 -/// %vreg2:sub1 = ... -/// use %vreg2:sub1 -/// use %vreg0:sub0 +/// %0:sub0 = ... +/// %1:sub1 = ... +/// use %1:sub1 +/// %2:sub1 = ... +/// use %2:sub1 +/// use %0:sub0 // //===----------------------------------------------------------------------===// #include "LiveRangeUtils.h" #include "PHIEliminationUtils.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp index 01b3db43b283..f1885aa74285 100644 --- a/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -51,7 +51,7 @@ namespace { ++NumFunctionsReset; MF.reset(); if (EmitFallbackDiag) { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); DiagnosticInfoISelFallback DiagFallback(F); F.getContext().diagnose(DiagFallback); } diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index e68f6e10a2ad..51233be521be 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -558,7 +558,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Replace alloc with the new location. replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - /*Deref=*/false, -Offset); + DIExpression::NoDeref, -Offset, DIExpression::NoDeref); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); @@ -573,7 +573,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (Size == 0) Size = 1; // Don't create zero-sized stack objects. - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref, + -Offset, DIExpression::NoDeref); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -663,7 +664,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0, + DIExpression::NoDeref); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index b1a485149103..9249fa84b38b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,7 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -114,7 +114,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))) { + Type::getVoidTy(mf.getFunction().getContext()))) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -776,7 +776,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); - RPTracker->recedeSkipDebugValues(); + if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI) + RPTracker->recedeSkipDebugValues(); assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); RPTracker->recede(RegOpers); } @@ -1043,7 +1044,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, } void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n'); + DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); LiveRegs.init(*TRI); LiveRegs.addLiveOuts(MBB); diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 6c753bbb8faa..37c4a470bd0a 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -14,14 +14,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 097ff63e12b4..8cab6aaf1a29 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -161,7 +161,7 @@ namespace { DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); + ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); MaximumLegalStoreInBits = 0; for (MVT VT : MVT::all_valuetypes()) @@ -496,12 +496,22 @@ namespace { /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns - /// the type of the loaded value to be extended. LoadedVT returns the type - /// of the original loaded value. NarrowLoad returns whether the load would - /// need to be narrowed in order to match. + /// the type of the loaded value to be extended. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad); + EVT LoadResultTy, EVT &ExtVT); + + /// Helper function to calculate whether the given Load can have its + /// width reduced to ExtVT. + bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt = 0); + + /// Used by BackwardsPropagateMask to find suitable loads. + bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl &Loads, + SmallPtrSetImpl &NodeWithConsts, + ConstantSDNode *Mask, SDNode *&UncombinedNode); + /// Attempt to propagate a given AND node back to load leaves so that they + /// can be combined into narrow loads. + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); /// Helper function for MergeConsecutiveStores which merges the /// component store chains. @@ -1912,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || - isConstantFPBuildVectorOrConstantFP(NewCT)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || - isConstantFPBuildVectorOrConstantFP(NewCF)) && - "Failed to constant fold a binop with constant operands"); + if (!NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } @@ -2153,7 +2165,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) - if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && + N1.getResNo() == 0) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); @@ -2672,7 +2685,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && - DAG.isKnownToBeAPowerOfTwo(N1)) { + DAG.isKnownToBeAPowerOfTwo(N1) && + (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDLoc DL(N); SDValue LogBase2 = BuildLogBase2(N1, DL); AddToWorklist(LogBase2.getNode()); @@ -2802,8 +2816,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE;) { - SDNode *User = *UI++; + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; if (User == Node || User->use_empty()) continue; // Convert the other matching node(s), too; @@ -2929,7 +2943,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. Targets may check function attributes for size/speed // trade-offs. - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -3000,7 +3014,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv x, c) -> alternate - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; @@ -3059,7 +3073,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } } - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. @@ -3097,19 +3111,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhs x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (isOneConstant(N1)) { - SDLoc DL(N); + if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -3137,6 +3158,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhu x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -3550,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // TODO: What is the 'or' equivalent of this fold? // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) - if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE && + if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && + IsInteger && CC0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || (isAllOnesConstant(LR) && isNullConstant(RR)))) { SDValue One = DAG.getConstant(1, DL, OpVT); @@ -3614,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && VT.getSizeInBits() <= 64) { if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { - APInt ADDC = ADDI->getAPIntValue(); - if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal // immediate for an add, but it is legal if its top c2 bits are set, // transform the ADD so the immediate doesn't need to be materialized // in a register. - if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + APInt SRLC = SRLI->getAPIntValue(); + if (ADDC.getMinSignedBits() <= 64 && + SRLC.ult(VT.getSizeInBits()) && + !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), - SRLI->getZExtValue()); + SRLC.getZExtValue()); if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { @@ -3693,22 +3726,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad) { + EVT LoadResultTy, EVT &ExtVT) { if (!AndC->getAPIntValue().isMask()) return false; unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - LoadedVT = LoadN->getMemoryVT(); + EVT LoadedVT = LoadN->getMemoryVT(); if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { // ZEXTLOAD will match without needing to change the size of the value being // loaded. - NarrowLoad = false; return true; } @@ -3728,10 +3759,200 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) return false; - NarrowLoad = true; return true; } +bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt) { + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(LoadN, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return false; + + // Don't change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Verify that we are actually reducing a load width here. + if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LoadN->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && + LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) + return false; + + // It's not possible to generate a constant of extended or untyped type. + EVT PtrType = LoadN->getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + return true; +} + +bool DAGCombiner::SearchForAndLoads(SDNode *N, + SmallPtrSetImpl &Loads, + SmallPtrSetImpl &NodesWithConsts, + ConstantSDNode *Mask, + SDNode *&NodeToMask) { + // Recursively search for the operands, looking for loads which can be + // narrowed. + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getValueType().isVector()) + return false; + + // Some constants may need fixing up later if they are too large. + if (auto *C = dyn_cast(Op)) { + if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && + (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) + NodesWithConsts.insert(N); + continue; + } + + if (!Op.hasOneUse()) + return false; + + switch(Op.getOpcode()) { + case ISD::LOAD: { + auto *Load = cast(Op); + EVT ExtVT; + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && + isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { + + // ZEXTLOAD is already small enough. + if (Load->getExtensionType() == ISD::ZEXTLOAD && + ExtVT.bitsGE(Load->getMemoryVT())) + continue; + + // Use LE to convert equal sized loads to zext. + if (ExtVT.bitsLE(Load->getMemoryVT())) + Loads.insert(Load); + + continue; + } + return false; + } + case ISD::ZERO_EXTEND: + case ISD::AssertZext: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); + + // We can accept extending nodes if the mask is wider or an equal + // width to the original type. + if (ExtVT.bitsGE(VT)) + continue; + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::AND: + if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, + NodeToMask)) + return false; + continue; + } + + // Allow one node which will masked along with any loads found. + if (NodeToMask) + return false; + NodeToMask = Op.getNode(); + } + return true; +} + +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { + auto *Mask = dyn_cast(N->getOperand(1)); + if (!Mask) + return false; + + if (!Mask->getAPIntValue().isMask()) + return false; + + // No need to do anything if the and directly uses a load. + if (isa(N->getOperand(0))) + return false; + + SmallPtrSet Loads; + SmallPtrSet NodesWithConsts; + SDNode *FixupNode = nullptr; + if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { + if (Loads.size() == 0) + return false; + + DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); + SDValue MaskOp = N->getOperand(1); + + // If it exists, fixup the single node we allow in the tree that needs + // masking. + if (FixupNode) { + DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), + MaskOp); + } + + // Narrow any constants that need it. + for (auto *LogicN : NodesWithConsts) { + SDValue Op0 = LogicN->getOperand(0); + SDValue Op1 = LogicN->getOperand(1); + + if (isa(Op0)) + std::swap(Op0, Op1); + + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); + + DAG.UpdateNodeOperands(LogicN, Op0, And); + } + + // Create narrow loads. + for (auto *Load : Loads) { + DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); + SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), + SDValue(Load, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); + SDValue NewLoad = ReduceLoadWidth(And.getNode()); + assert(NewLoad && + "Shouldn't be masking the load if it can't be narrowed"); + CombineTo(Load, NewLoad, NewLoad.getValue(1)); + } + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); + return true; + } + return false; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3787,11 +4008,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // reassociate and if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) return RAND; + + // Try to convert a constant mask AND into a shuffle clear mask. + if (VT.isVector()) + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + // fold (and (or x, C), D) -> D if (C & D) == D - if (N1C && N0.getOpcode() == ISD::OR) - if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) - if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())) - return N1; + auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { + return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); + }; + if (N0.getOpcode() == ISD::OR && + matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) + return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); @@ -3923,55 +4152,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { - bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; - LoadSDNode *LN0 = HasAnyExt - ? cast(N0.getOperand(0)) - : cast(N0); - if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - auto NarrowLoad = false; - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) { - if (!NarrowLoad) { - SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), ExtVT, - LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(LN0, NewLoad, NewLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } else { - EVT PtrType = LN0->getOperand(1).getValueType(); - - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - // For big endian targets, we need to add an offset to the pointer - // to load the correct bytes. For little endian systems, we merely - // need to read fewer bytes from the same pointer. - if (DAG.getDataLayout().isBigEndian()) { - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - SDLoc DL(LN0); - NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, - NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); - } + if (SDValue Res = ReduceLoadWidth(N)) { + LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND + ? cast(N0.getOperand(0)) : cast(N0); - AddToWorklist(NewPtr.getNode()); + AddToWorklist(N); + CombineTo(LN0, Res, Res.getValue(1)); + return SDValue(N, 0); + } + } - SDValue Load = DAG.getExtLoad( - ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, - LN0->getPointerInfo(), ExtVT, Alignment, - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - AddToWorklist(N); - CombineTo(LN0, Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + if (Level >= AfterLegalizeTypes) { + // Attempt to propagate the AND back up to the leaves which, if they're + // loads, can be combined to narrow loads and the AND node can be removed. + // Perform after legalization so that extend nodes will already be + // combined into the loads. + if (BackwardsPropagateMask(N, DAG)) { + return SDValue(N, 0); } } @@ -4507,16 +4704,16 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) != 0. - if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) { - if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { - if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) { - if (SDValue COR = - DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) - return DAG.getNode( - ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); - return SDValue(); - } + auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { + return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); + }; + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { + if (SDValue COR = DAG.FoldConstantArithmetic( + ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { + SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); + AddToWorklist(IOR.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); } } @@ -4700,6 +4897,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; + // Check for truncated rotate. + if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { + assert(LHS.getValueType() == RHS.getValueType()); + if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), + SDValue(Rot, 0)).getNode(); + } + } + // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. @@ -5018,7 +5225,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; @@ -5202,21 +5409,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); } - // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) - if (N1C && N0.getOpcode() == ISD::XOR) { - if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); - return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), - DAG.getConstant(N1C->getAPIntValue() ^ - N00C->getAPIntValue(), DL, VT)); - } - if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { - SDLoc DL(N); - return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), - DAG.getConstant(N1C->getAPIntValue() ^ - N01C->getAPIntValue(), DL, VT)); - } - } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) unsigned OpSizeInBits = VT.getScalarSizeInBits(); @@ -6534,6 +6726,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDValue DataLo, DataHi; std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue Scale = MSC->getScale(); SDValue BasePtr = MSC->getBasePtr(); SDValue IndexLo, IndexHi; std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); @@ -6543,11 +6736,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); - SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -6667,6 +6860,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + SDValue Scale = MGT->getScale(); SDValue BasePtr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue IndexLo, IndexHi; @@ -6677,13 +6871,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); + MMO); - SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); + MMO); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -7606,7 +7800,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); AddToWorklist(Op.getNode()); - return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + // Transfer the debug info; the new node is equivalent to N0. + DAG.transferDbgValues(N0, ZExtOrTrunc); + return ZExtOrTrunc; } } @@ -7694,11 +7891,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!N0.hasOneUse()) { if (N0.getOpcode() == ISD::AND) { auto *AndC = cast(N0.getOperand(1)); - auto NarrowLoad = false; EVT LoadResultTy = AndC->getValueType(0); - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) + EVT ExtVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) DoXform = false; } if (DoXform) @@ -8021,8 +8216,9 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of -/// bits of new type. If the result is to be extended, also fold the extension -/// to form a extending load. +/// bits of new type. Also narrow the load if the result is masked with an AND +/// to effectively produce a smaller type. If the result is to be extended, also +/// fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -8059,21 +8255,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { else ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - ShiftAmt); - } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) - return SDValue(); - - unsigned EVTBits = ExtVT.getSizeInBits(); + } else if (Opc == ISD::AND) { + // An AND with a constant mask is the same as a truncate + zero-extend. + auto AndC = dyn_cast(N->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isMask()) + return SDValue(); - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!ExtVT.isRound()) - return SDValue(); + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + } unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); + unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); @@ -8110,42 +8307,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } } - // If we haven't found a load, we can't narrow it. Don't transform one with - // multiple uses, this would require adding a new load. - if (!isa(N0) || !N0.hasOneUse()) + // If we haven't found a load, we can't narrow it. + if (!isa(N0)) return SDValue(); - // Don't change the width of a volatile load. LoadSDNode *LN0 = cast(N0); - if (LN0->isVolatile()) - return SDValue(); - - // Verify that we are actually reducing a load width here. - if (LN0->getMemoryVT().getSizeInBits() < EVTBits) - return SDValue(); - - // For the transform to be legal, the load must produce only two values - // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the - // transformation is not equivalent, and the downstream logic to replace - // uses gets things wrong. - if (LN0->getNumValues() > 2) - return SDValue(); - - // If the load that we're shrinking is an extload and we're not just - // discarding the extension we can't simply shrink the load. Bail. - // TODO: It would be possible to merge the extensions in some cases. - if (LN0->getExtensionType() != ISD::NON_EXTLOAD && - LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) - return SDValue(); - - if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) - return SDValue(); - - EVT PtrType = N0.getOperand(1).getValueType(); - - if (PtrType == MVT::Untyped || PtrType.isExtended()) - // It's not possible to generate a constant of extended or untyped type. + if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to @@ -8156,6 +8323,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } + EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); @@ -8614,6 +8782,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + // fold (truncate (extract_subvector(ext x))) -> + // (extract_subvector x) + // TODO: This can be generalized to cover cases where the truncate and extract + // do not fully cancel each other out. + if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND || + N00.getOpcode() == ISD::ANY_EXTEND) { + if (N00.getOperand(0)->getValueType(0).getVectorElementType() == + VT.getVectorElementType()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, + N00.getOperand(0), N0.getOperand(1)); + } + } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; @@ -8634,11 +8818,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); + + // A BUILD_PAIR is always having the least significant part in elt 0 and the + // most significant part in elt 1. So when combining into one large load, we + // need to consider the endianness. + if (DAG.getDataLayout().isBigEndian()) + std::swap(LD1, LD2); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + unsigned LD1Bytes = LD1VT.getStoreSize(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); @@ -10045,7 +10236,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { case ISD::SETLT: case ISD::SETLE: std::swap(TrueOpnd, FalseOpnd); - // Fall through + LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETUGT: case ISD::SETOGE: @@ -10399,7 +10590,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { // value in one SSE register, but instruction selection cannot handle // FCOPYSIGN on SSE registers yet. EVT N1VT = N1->getValueType(0); - EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + EVT N1Op0VT = N1->getOperand(0).getValueType(); return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); } return false; @@ -11369,6 +11560,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); + AddToWorklist(Result.getNode()); return true; } @@ -12621,8 +12813,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; - unsigned SizeInBits = NumStores * ElementSizeBytes * 8; + int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; EVT StoreTy; @@ -12644,18 +12836,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (MemVT != Val.getValueType()) { Val = peekThroughBitcast(Val); // Deal with constants of wrong size. - if (ElementSizeBytes * 8 != Val.getValueSizeInBits()) { + if (ElementSizeBits != Val.getValueSizeInBits()) { EVT IntMemVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); - if (auto *CFP = dyn_cast(Val)) - Val = DAG.getConstant( - CFP->getValueAPF().bitcastToAPInt().zextOrTrunc( - 8 * ElementSizeBytes), - SDLoc(CFP), IntMemVT); - else if (auto *C = dyn_cast(Val)) - Val = DAG.getConstant( - C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes), - SDLoc(C), IntMemVT); + if (isa(Val)) { + // Not clear how to truncate FP values. + return false; + } else if (auto *C = dyn_cast(Val)) + Val = DAG.getConstant(C->getAPIntValue() + .zextOrTrunc(Val.getValueSizeInBits()) + .zextOrTrunc(ElementSizeBits), + SDLoc(C), IntMemVT); } // Make sure correctly size type is the correct type. Val = DAG.getBitcast(MemVT, Val); @@ -12716,11 +12907,19 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes * 8; + StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); + StoreInt |= C->getAPIntValue() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); + StoreInt |= C->getValueAPF() + .bitcastToAPInt() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); + // If fp truncation is necessary give up for now. + if (MemVT.getSizeInBits() != ElementSizeBits) + return false; } else { llvm_unreachable("Invalid constant element type"); } @@ -12766,7 +12965,7 @@ void DAGCombiner::getStoreMergeCandidates( StoreSDNode *St, SmallVectorImpl &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); EVT MemVT = St->getMemoryVT(); SDValue Val = peekThroughBitcast(St->getValue()); @@ -12787,7 +12986,7 @@ void DAGCombiner::getStoreMergeCandidates( EVT LoadVT; if (IsLoadSrc) { auto *Ld = cast(Val); - LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); // Load and store should be the same type. if (MemVT != LoadVT) @@ -12806,7 +13005,7 @@ void DAGCombiner::getStoreMergeCandidates( return false; // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast(Val)) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + auto LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) @@ -12830,7 +13029,7 @@ void DAGCombiner::getStoreMergeCandidates( Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; } - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); + Ptr = BaseIndexOffset::match(Other, DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -12914,13 +13113,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return false; EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + int64_t ElementSizeBytes = MemVT.getStoreSize(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) return false; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. @@ -13070,7 +13269,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && @@ -13203,7 +13402,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); // If this is not the first ptr that we check. int64_t LdOffset = 0; if (LdBasePtr.getBase().getNode()) { @@ -14038,6 +14237,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(1); ConstantSDNode *ConstEltNo = dyn_cast(EltNo); + // extract_vector_elt of out-of-bounds element -> UNDEF + if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) + return DAG.getUNDEF(NVT); + // extract_vector_elt (build_vector x, y), 1 -> y if (ConstEltNo && InVec.getOpcode() == ISD::BUILD_VECTOR && @@ -14744,6 +14947,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // If this is a splat of a bitcast from another vector, change to a + // concat_vector. + // For example: + // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> + // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) + // + // If X is a build_vector itself, the concat can become a larger build_vector. + // TODO: Maybe this is useful for non-splat too? + if (!LegalOperations) { + if (SDValue Splat = cast(N)->getSplatValue()) { + Splat = peekThroughBitcast(Splat); + EVT SrcVT = Splat.getValueType(); + if (SrcVT.isVector()) { + unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), NumElts); + SmallVector Ops(N->getNumOperands(), Splat); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops); + return DAG.getBitcast(VT, Concat); + } + } + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -14946,7 +15172,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). if (In->getOpcode() == ISD::BITCAST && - !In->getOperand(0)->getValueType(0).isVector()) { + !In->getOperand(0).getValueType().isVector()) { SDValue Scalar = In->getOperand(0); // If the bitcast type isn't legal, it might be a trunc of a legal type; @@ -14993,7 +15219,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { bool FoundMinVT = false; for (const SDValue &Op : N->ops()) if (ISD::BUILD_VECTOR == Op.getOpcode()) { - EVT OpSVT = Op.getOperand(0)->getValueType(0); + EVT OpSVT = Op.getOperand(0).getValueType(); MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; FoundMinVT = true; } @@ -15726,6 +15952,84 @@ static SDValue combineShuffleOfSplat(ArrayRef UserMask, NewMask); } +/// If the shuffle mask is taking exactly one element from the first vector +/// operand and passing through all other elements from the second vector +/// operand, return the index of the mask element that is choosing an element +/// from the first operand. Otherwise, return -1. +static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef Mask) { + int MaskSize = Mask.size(); + int EltFromOp0 = -1; + // TODO: This does not match if there are undef elements in the shuffle mask. + // Should we ignore undefs in the shuffle mask instead? The trade-off is + // removing an instruction (a shuffle), but losing the knowledge that some + // vector lanes are not needed. + for (int i = 0; i != MaskSize; ++i) { + if (Mask[i] >= 0 && Mask[i] < MaskSize) { + // We're looking for a shuffle of exactly one element from operand 0. + if (EltFromOp0 != -1) + return -1; + EltFromOp0 = i; + } else if (Mask[i] != i + MaskSize) { + // Nothing from operand 1 can change lanes. + return -1; + } + } + return EltFromOp0; +} + +/// If a shuffle inserts exactly one element from a source vector operand into +/// another vector operand and we can access the specified element as a scalar, +/// then we can eliminate the shuffle. +static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + // First, check if we are taking one element of a vector and shuffling that + // element into another vector. + ArrayRef Mask = Shuf->getMask(); + SmallVector CommutedMask(Mask.begin(), Mask.end()); + SDValue Op0 = Shuf->getOperand(0); + SDValue Op1 = Shuf->getOperand(1); + int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); + if (ShufOp0Index == -1) { + // Commute mask and check again. + ShuffleVectorSDNode::commuteMask(CommutedMask); + ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); + if (ShufOp0Index == -1) + return SDValue(); + // Commute operands to match the commuted shuffle mask. + std::swap(Op0, Op1); + Mask = CommutedMask; + } + + // The shuffle inserts exactly one element from operand 0 into operand 1. + // Now see if we can access that element as a scalar via a real insert element + // instruction. + // TODO: We can try harder to locate the element as a scalar. Examples: it + // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. + assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && + "Shuffle mask value must be from operand 0"); + if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + + auto *InsIndexC = dyn_cast(Op0.getOperand(2)); + if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) + return SDValue(); + + // There's an existing insertelement with constant insertion index, so we + // don't need to check the legality/profitability of a replacement operation + // that differs at most in the constant value. The target should be able to + // lower any of those in a similar way. If not, legalization will expand this + // to a scalar-to-vector plus shuffle. + // + // Note that the shuffle may move the scalar from the position that the insert + // element used. Therefore, our new insert element occurs at the shuffle's + // mask index value, not the insert's index value. + // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' + SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), + Op0.getOperand(2).getValueType()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), + Op1, Op0.getOperand(1), NewInsIndex); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -15776,6 +16080,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) return V; + if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) + return InsElt; + // A shuffle of a single vector that is a splat can always be folded. if (auto *N0Shuf = dyn_cast(N0)) if (N1->isUndef() && N0Shuf->isSplat()) @@ -16248,6 +16555,8 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = peekThroughBitcast(N->getOperand(1)); @@ -16258,9 +16567,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (LegalOperations) return SDValue(); - if (N->getOpcode() != ISD::AND) - return SDValue(); - if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); @@ -16349,10 +16655,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) return Fold; - // Try to convert a constant mask AND into a shuffle clear mask. - if (SDValue Shuffle = XformToShuffleWithZero(N)) - return Shuffle; - // Type legalization might introduce new shuffles in the DAG. // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) // -> (shuffle (VBinOp (A, B)), Undef, Mask). @@ -16885,7 +17187,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -16931,7 +17233,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -17173,43 +17475,6 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } -/// Return true if base is a frame index, which is known not to alias with -/// anything but itself. Provides base object and offset as results. -static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, - const GlobalValue *&GV, const void *&CV) { - // Assume it is a primitive operation. - Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; - - // If it's an adding a simple constant then integrate the offset. - if (Base.getOpcode() == ISD::ADD) { - if (ConstantSDNode *C = dyn_cast(Base.getOperand(1))) { - Base = Base.getOperand(0); - Offset += C->getSExtValue(); - } - } - - // Return the underlying GlobalValue, and update the Offset. Return false - // for GlobalAddressSDNode since the same GlobalAddress may be represented - // by multiple nodes with different offsets. - if (GlobalAddressSDNode *G = dyn_cast(Base)) { - GV = G->getGlobal(); - Offset += G->getOffset(); - return false; - } - - // Return the underlying Constant value, and update the Offset. Return false - // for ConstantSDNodes since the same constant pool entry may be represented - // by multiple nodes with different offsets. - if (ConstantPoolSDNode *C = dyn_cast(Base)) { - CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() - : (const void *)C->getConstVal(); - Offset += C->getOffset(); - return false; - } - // If it's any of the following then it can't alias with anything but itself. - return isa(Base); -} - /// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. @@ -17231,65 +17496,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); int64_t PtrDiff; - if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) - return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); - - // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be - // able to calculate their relative offset if at least one arises - // from an alloca. However, these allocas cannot overlap and we - // can infer there is no alias. - if (auto *A = dyn_cast(BasePtr0.getBase())) - if (auto *B = dyn_cast(BasePtr1.getBase())) { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // If the base are the same frame index but the we couldn't find a - // constant offset, (indices are different) be conservative. - if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || - !MFI.isFixedObjectIndex(B->getIndex()))) - return false; - } + if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast(BasePtr0.getBase())) + if (auto *B = dyn_cast(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) + return false; + } - // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis - // modified to use BaseIndexOffset. - - // Gather base node and offset information. - SDValue Base0, Base1; - int64_t Offset0, Offset1; - const GlobalValue *GV0, *GV1; - const void *CV0, *CV1; - bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(), - Base0, Offset0, GV0, CV0); - bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(), - Base1, Offset1, GV1, CV1); - - // If they have the same base address, then check to see if they overlap. - if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) - return !((Offset0 + NumBytes0) <= Offset1 || - (Offset1 + NumBytes1) <= Offset0); - - // It is possible for different frame indices to alias each other, mostly - // when tail call optimization reuses return address slots for arguments. - // To catch this case, look up the actual index of frame indices to compute - // the real alias relationship. - if (IsFrameIndex0 && IsFrameIndex1) { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - Offset0 += MFI.getObjectOffset(cast(Base0)->getIndex()); - Offset1 += MFI.getObjectOffset(cast(Base1)->getIndex()); - return !((Offset0 + NumBytes0) <= Offset1 || - (Offset1 + NumBytes1) <= Offset0); - } - - // Otherwise, if we know what the bases are, and they aren't identical, then - // we know they cannot alias. - if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1)) - return false; + bool IsFI0 = isa(BasePtr0.getBase()); + bool IsFI1 = isa(BasePtr1.getBase()); + bool IsGV0 = isa(BasePtr0.getBase()); + bool IsGV1 = isa(BasePtr1.getBase()); + bool IsCV0 = isa(BasePtr0.getBase()); + bool IsCV1 = isa(BasePtr1.getBase()); + + // If of mismatched base types or checkable indices we can check + // they do not alias. + if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || + (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && + (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) + return false; + } - // If we know required SrcValue1 and SrcValue2 have relatively large alignment - // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by - // splitting vector types. + // If we know required SrcValue1 and SrcValue2 have relatively large + // alignment compared to the size and offset of the access, we may be able + // to prove they do not alias. This check is conservative for now to catch + // cases created by splitting vector types. int64_t SrcValOffset0 = Op0->getSrcValueOffset(); int64_t SrcValOffset1 = Op1->getSrcValueOffset(); unsigned OrigAlignment0 = Op0->getOriginalAlignment(); @@ -17299,8 +17545,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; - // There is no overlap between these relatively aligned accesses of similar - // size. Return no alias. + // There is no overlap between these relatively aligned accesses of + // similar size. Return no alias. if ((OffAlign0 + NumBytes0) <= OffAlign1 || (OffAlign1 + NumBytes1) <= OffAlign0) return false; @@ -17463,7 +17709,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -17489,7 +17735,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); // Check that the base pointer is the same as the original one. if (!BasePtr.equalBaseIndex(Ptr, DAG)) diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index d3c94b5f9e6b..3c856914053b 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -2051,11 +2051,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::const_iterator I = SuccBB->begin(); - const auto *PN = dyn_cast(I); ++I) { - + for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. - if (PN->use_empty()) + if (PN.use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, @@ -2064,7 +2062,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { @@ -2073,11 +2071,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { } } - const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DbgLoc = PN->getDebugLoc(); + DbgLoc = PN.getDebugLoc(); if (const auto *Inst = dyn_cast(PHIOp)) DbgLoc = Inst->getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index fa89b20f23db..81347fa4bd46 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -26,7 +25,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -259,20 +257,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - for (BasicBlock::const_iterator I = BB.begin(); - const PHINode *PN = dyn_cast(I); ++I) { - if (PN->use_empty()) continue; + for (const PHINode &PN : BB.phis()) { + if (PN.use_empty()) + continue; // Skip empty types - if (PN->getType()->isEmptyTy()) + if (PN.getType()->isEmptyTy()) continue; - DebugLoc DL = PN->getDebugLoc(); - unsigned PHIReg = ValueMap[PN]; + DebugLoc DL = PN.getDebugLoc(); + unsigned PHIReg = ValueMap[&PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector ValueVTs; - ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs); for (EVT VT : ValueVTs) { unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6974e7006ce2..b69c362db676 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -624,13 +624,23 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { assert(!StVT.isVector() && "Vector Stores are handled in LegalizeVectorOps"); + SDValue Result; + // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && - "Do not know how to expand this store!"); - Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + if (TLI.isTypeLegal(StVT)) { + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + } else { + // The in-memory type isn't legal. Truncate to the type it would promote + // to, and then do a truncstore. + Value = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), StVT), + Value); + Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + StVT, Alignment, MMOFlags, AAInfo); + } + ReplaceNode(SDValue(Node, 0), Result); break; } @@ -2004,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain) && - (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; @@ -2955,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ZERO_EXTEND: LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, DAG.getValueType(AtomicType)); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); ExtRes = LHS; break; case ISD::ANY_EXTEND: LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); break; default: llvm_unreachable("Invalid atomic op extension"); @@ -3922,6 +3932,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector Results; SDLoc dl(Node); + // FIXME: Check flags on the node to see if we can use a finite call. + bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -4016,33 +4028,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FLOG: case ISD::STRICT_FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: case ISD::STRICT_FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4088,9 +4135,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FPOW: case ISD::STRICT_FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index eaf177d0661b..e28a3aa47ca3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1887,7 +1887,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { SDLoc DL(N); SDValue Promoted = GetPromotedFloat(Val); - EVT VT = ST->getOperand(1)->getValueType(0); + EVT VT = ST->getOperand(1).getValueType(); EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); SDValue NewVal; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 03a20dbca496..eaa827309b0b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -501,7 +501,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), - N->getIndex()}; + N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -573,8 +573,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, LHS.getValueType()); return DAG.getNode(ISD::VSELECT, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } @@ -601,20 +599,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (LHS.getValueType() != RHS.getValueType()) { - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && - !LHS.getValueType().isVector()) - LHS = GetPromotedInteger(LHS); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && - !RHS.getValueType().isVector()) - RHS = GetPromotedInteger(RHS); - } - // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, - N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); @@ -774,7 +761,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); - llvm_unreachable("Not implemented"); + + // We need to sign-extend the operands so the carry value computed by the + // wide operation will be equivalent to the carry value computed by the + // narrow operation. + // An ADDCARRY can generate carry only if any of the operands has its + // most significant bit set. Sign extension propagates the most significant + // bit into the higher bits which means the extra bit that the narrow + // addition would need (i.e. the carry) will be propagated through the higher + // bits of the wide addition. + // A SUBCARRY can generate borrow only if LHS < RHS and this property will be + // preserved by sign extension. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + + EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)}; + + // Do the arithmetic in the wide type. + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs), + LHS, RHS, N->getOperand(2)); + + // Update the users of the original carry/borrow value. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + return SDValue(Res.getNode(), 0); } SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { @@ -1209,24 +1219,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, // When the data operand has illegal type, we should legalize the data // operand first. The mask will be promoted/splitted/widened according to // the data operand type. - if (TLI.isTypeLegal(DataVT)) + if (TLI.isTypeLegal(DataVT)) { Mask = PromoteTargetBoolean(Mask, DataVT); - else { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) - return PromoteIntOp_MSTORE(N, 3); - - else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) - return WidenVecOp_MSTORE(N, 3); - - else { - assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); - return SplitVecOp_MSTORE(N, 3); - } + // Update in place. + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } + + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); + if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } else { // Data operand assert(OpNo == 3 && "Unexpected operand for promotion"); DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } @@ -1253,6 +1262,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, // The Mask EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1273,6 +1285,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, // The Mask EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -3227,8 +3242,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getTruncStore( Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); @@ -3263,8 +3277,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3465,7 +3478,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); @@ -3474,15 +3486,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); + // If the input type is legal and we can promote it to a legal type with the + // same element size, go ahead do that to create a new concat. + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeLegal) { + EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); + if (TLI.isTypeLegal(InPromotedTy)) { + SmallVector Ops(NumOperands); + for (unsigned i = 0; i < NumOperands; ++i) { + Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, + N->getOperand(i)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); + } + } + // Take the elements from the first vector. SmallVector Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { SDValue Op = N->getOperand(i); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + EVT SclrTy = Op.getValueType().getVectorElementType(); + assert(NumElem == Op.getValueType().getVectorNumElements() && + "Unexpected number of elements"); + for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op, + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 88c5dddfec44..4438ee7878b8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -17,10 +17,8 @@ #include "SDNodeDbgValue.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -226,7 +224,7 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - DEBUG(dbgs() << "Legalizing node: "; N->dump()); + DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); if (IgnoreNodeResults(N)) { DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; @@ -298,7 +296,7 @@ bool DAGTypeLegalizer::run() { continue; const auto Op = N->getOperand(i); - DEBUG(dbgs() << "Analyzing operand: "; Op.dump()); + DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: @@ -447,7 +445,7 @@ bool DAGTypeLegalizer::run() { if (!isTypeLegal(Node.getValueType(i)) && !TLI.isTypeLegal(Node.getValueType(i))) { dbgs() << "Result type " << i << " illegal: "; - Node.dump(); + Node.dump(&DAG); Failed = true; } @@ -457,7 +455,7 @@ bool DAGTypeLegalizer::run() { !isTypeLegal(Node.getOperand(i).getValueType()) && !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { dbgs() << "Operand type " << i << " illegal: "; - Node.getOperand(i).dump(); + Node.getOperand(i).dump(&DAG); Failed = true; } @@ -1147,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } -/// Widen the given target boolean to a target boolean of the given type. -/// The boolean vector is widened and then promoted to match the target boolean -/// type of the given ValVT. -SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, - bool WithZeroes) { - SDLoc dl(Bool); - EVT BoolVT = Bool.getValueType(); - - assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && - TLI.isTypeLegal(ValVT) && - "Unexpected types in WidenTargetBoolean"); - EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), - ValVT.getVectorNumElements()); - Bool = ModifyToType(Bool, WideVT, WithZeroes); - return PromoteTargetBoolean(Bool, ValVT); -} - /// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c221cb30299a..64cb80e0d853 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -183,10 +183,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); - /// Modify Bit Vector to match SetCC result type of ValVT. - /// The bit vector is widened with zeroes when WithZeroes is true. - SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); - void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -623,7 +619,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_VSETCC(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -732,7 +727,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); - SDValue WidenVecRes_VSETCC(SDNode* N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f3306151d864..993465ae9dc2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 69438113b744..eda73dbec4c2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -49,6 +49,8 @@ using namespace llvm; +#define DEBUG_TYPE "legalizevectorops" + namespace { class VectorLegalizer { @@ -137,14 +139,14 @@ class VectorLegalizer { /// \brief Implements [SU]INT_TO_FP vector promotion. /// - /// This is a [zs]ext of the input operand to the next size up. + /// This is a [zs]ext of the input operand to a larger integer type. SDValue PromoteINT_TO_FP(SDValue Op); /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. /// - /// It is promoted to the next size up integer type. The result is then + /// It is promoted to a larger integer type. The result is then /// truncated back to the original type. - SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + SDValue PromoteFP_TO_INT(SDValue Op); public: VectorLegalizer(SelectionDAG& dag) : @@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); @@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandLoad(Op)); } + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast(Op.getNode()); EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); - if (StVT.isVector() && ST->isTruncatingStore()) + if (StVT.isVector() && ST->isTruncatingStore()) { + DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } + } } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; @@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; } + DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: @@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; break; case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); break; case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom legalization\n"); if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; } case TargetLowering::Expand: @@ -418,7 +431,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: // Promote the operation by extending the operand. - return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + return PromoteFP_TO_INT(Op); } // There are currently two cases of vector promotion: @@ -459,20 +472,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) { SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. - EVT VT = Op.getOperand(0).getValueType(); - assert(Op.getNode()->getNumValues() == 1 && - "Can't promote a vector with multiple results!"); - - // Normal getTypeToPromoteTo() doesn't work here, as that will promote - // by widening the vector w/ the same element width and twice the number - // of elements. We want the other way around, the same number of elements, - // each twice the width. - // - // Increase the bitwidth of the element to the next pow-of-two - // (which is greater than 8 bits). + MVT VT = Op.getOperand(0).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); - EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); - assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector Operands(Op.getNumOperands()); @@ -492,29 +496,30 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { - assert(Op.getNode()->getNumValues() == 1 && - "Can't promote a vector with multiple results!"); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { + MVT VT = Op.getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); - EVT NewVT; - unsigned NewOpc; - while (true) { - NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); - assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { - NewOpc = ISD::FP_TO_SINT; - break; - } - if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { - NewOpc = ISD::FP_TO_UINT; - break; - } - } + unsigned NewOpc = Op->getOpcode(); + // Change FP_TO_UINT to FP_TO_SINT if possible. + // TODO: Should we only do this if FP_TO_UINT itself isn't legal? + if (NewOpc == ISD::FP_TO_UINT && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; - SDLoc loc(Op); - SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); + SDLoc dl(Op); + SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, NVT, Promoted, + DAG.getValueType(VT.getScalarType())); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); } SDValue VectorLegalizer::ExpandLoad(SDValue Op) { @@ -554,7 +559,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { unsigned Offset = 0; unsigned RemainingBytes = SrcVT.getStoreSize(); SmallVector LoadVals; - while (RemainingBytes > 0) { SDValue ScalarLoad; unsigned LoadBytes = WideBytes; @@ -580,9 +584,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { RemainingBytes -= LoadBytes; Offset += LoadBytes; - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(LoadBytes, dl, - BasePTR.getValueType())); + + BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -659,35 +662,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue VectorLegalizer::ExpandStore(SDValue Op) { StoreSDNode *ST = cast(Op.getNode()); - - EVT StVT = ST->getMemoryVT(); - EVT MemSclVT = StVT.getScalarType(); - unsigned ScalarSize = MemSclVT.getSizeInBits(); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) { - // FIXME: This is completely broken and inconsistent with ExpandLoad - // handling. - - // For sub-byte element sizes, this ends up with 0 stride between elements, - // so the same element just gets re-written to the same location. There seem - // to be tests explicitly testing for this broken behavior though. tests - // for this broken behavior. - - LLVMContext &Ctx = *DAG.getContext(); - - EVT NewMemVT - = EVT::getVectorVT(Ctx, - MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), - StVT.getVectorNumElements()); - - SDValue NewVectorStore = DAG.getTruncStore( - ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), - ST->getPointerInfo(), NewMemVT, ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); - ST = cast(NewVectorStore.getNode()); - } - SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index cdc83ccf10df..5d0ef0d3436d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -169,9 +169,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, } SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + SDValue Op = N->getOperand(0); + if (Op.getValueType().isVector() + && Op.getValueType().getVectorNumElements() == 1 + && !isSimpleLegalType(Op.getValueType())) + Op = GetScalarizedVector(Op); EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BITCAST, SDLoc(N), - NewVT, N->getOperand(0)); + NewVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { @@ -331,7 +336,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { // At least try the common case where the boolean is generated by a // comparison. if (Cond->getOpcode() == ISD::SETCC) { - EVT OpVT = Cond->getOperand(0)->getValueType(0); + EVT OpVT = Cond->getOperand(0).getValueType(); ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); VecBool = TLI.getBooleanContents(OpVT); } else @@ -1054,34 +1059,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, if (CustomLowerNode(N, N->getValueType(0), true)) return; - // Spill the vector to the stack. + // Make the vector elements byte-addressable if they aren't already. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); + if (VecVT.getScalarSizeInBits() < 8) { + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + // Extend the element type to match if needed. + if (EltVT.bitsGT(Elt.getValueType())) + Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); + } + + // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = - DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, + MachinePointerInfo::getUnknownStack(MF), EltVT); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueSizeInBits() / 8; + unsigned IncrementSize = LoVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize)); + + // If we adjusted the original type, we need to truncate the results. + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + if (LoVT != Lo.getValueType()) + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); + if (HiVT != Hi.getValueType()) + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -1116,8 +1144,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, Alignment, MMOFlags, AAInfo); @@ -1210,6 +1237,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); unsigned Alignment = MGT->getOriginalAlignment(); // Split Mask operand @@ -1241,11 +1269,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1526,14 +1554,14 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType())) Res = SplitVecOp_TruncateHelper(N); else Res = SplitVecOp_UnaryOp(N); @@ -1748,30 +1776,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); EVT EltVT = VecVT.getVectorElementType(); - if (EltVT.getSizeInBits() < 8) { - SmallVector ElementOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { - ElementOps.push_back(DAG.getAnyExtOrTrunc( - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, - DAG.getConstant(i, dl, MVT::i8)), - dl, MVT::i8)); - } - + if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); - Vec = DAG.getBuildVector(VecVT, dl, ElementOps); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); } // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT); + return DAG.getExtLoad( + ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -1793,6 +1816,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); @@ -1825,7 +1849,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); @@ -1835,7 +1859,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1881,9 +1905,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); - MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); - MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); - // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = @@ -1921,6 +1942,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); SDValue Index = N->getIndex(); + SDValue Scale = N->getScale(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); @@ -1956,7 +1978,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1968,7 +1990,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. - SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); } @@ -1991,6 +2013,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + // Scalarize if the split halves are not byte-sized. + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) + return TLI.scalarizeVectorStore(N, DAG); + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) @@ -2001,8 +2027,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, @@ -2913,25 +2938,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - EVT BoolVT = getSetCCResultType(WidenVT); - - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } + // The mask should be widened as well + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), @@ -2947,12 +2958,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); + SDValue Scale = N->getScale(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen the Index operand SDValue Index = N->getIndex(); @@ -2960,7 +2976,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -3236,19 +3252,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } -SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, - InOp1, InOp2, N->getOperand(2)); -} - SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); @@ -3279,7 +3282,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } -SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); @@ -3384,11 +3387,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = N->getOperand(0); - // If some legalization strategy other than widening is used on the operand, - // we can't safely assume that just extending the low lanes is the correct - // transformation. - if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) - return WidenVecOp_Convert(N); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); InOp = GetWidenedVector(InOp); assert(VT.getVectorNumElements() < InOp.getValueType().getVectorNumElements() && @@ -3450,20 +3451,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { - // Since the result is legal and the input is illegal, it is unlikely that we - // can fix the input to a legal type so unroll the convert into some scalar - // code and create a nasty build vector. + // Since the result is legal and the input is illegal. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); + unsigned Opcode = N->getOpcode(); + + // See if a widened result type would be legal, if so widen the node. + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + InVT.getVectorNumElements()); + if (TLI.isTypeLegal(WideVT)) { + SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getIntPtrConstant(0, dl)); + } + EVT InEltVT = InVT.getVectorElementType(); - unsigned Opcode = N->getOpcode(); + // Unroll the convert into some scalar code and create a nasty build vector. SmallVector Ops(NumElts); for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode( @@ -3516,8 +3528,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, @@ -3543,6 +3557,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // vector type. StoreSDNode *ST = cast(N); + if (!ST->getMemoryVT().getScalarType().isByteSized()) + return TLI.scalarizeVectorStore(ST, DAG); + SmallVector StChain; if (ST->isTruncatingStore()) GenWidenVectorTruncStores(StChain, ST); @@ -3556,6 +3573,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 3 && "Can widen only data operand of mstore"); MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); @@ -3564,25 +3582,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - // The mask should be widened as well. - EVT BoolVT = getSetCCResultType(WideVal.getValueType()); - // We can't use ModifyToType() because we should fill the mask with - // zeroes. - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; + // The mask should be widened as well. + EVT WideVT = WideVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } assert(Mask.getValueType().getVectorNumElements() == WideVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); @@ -3596,15 +3602,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { MaskedScatterSDNode *MSC = cast(N); SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue Scale = MSC->getScale(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); - unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well. - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), NumElts); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen index. SDValue Index = MSC->getIndex(); @@ -3613,7 +3623,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, + Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), dl, Ops, MSC->getMemOperand()); @@ -3623,6 +3634,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3632,18 +3644,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - N->getValueType(0).getVectorNumElements()); + VT.getVectorNumElements()); SDValue CC = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, VT); } @@ -3806,8 +3823,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, while (LdWidth > 0) { unsigned Increment = NewVTWidth / 8; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); SDValue L; if (LdWidth < NewVTWidth) { @@ -3929,10 +3945,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, Align, MMOFlags, AAInfo); @@ -3987,9 +4000,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store. @@ -4008,9 +4020,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; @@ -4053,10 +4063,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 379f0dcef513..7f369c746d24 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -252,6 +252,7 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { if (!ResourcesModel->canReserveResources(&TII->get( SU->getNode()->getMachineOpcode()))) return false; + break; case TargetOpcode::EXTRACT_SUBREG: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index acbae1bae33b..49f304c8cc86 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -346,9 +346,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); CurCycle = 0; IssueCount = 0; @@ -1430,10 +1429,12 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - DEBUG(dbgs() << " Interfering reg " << - (LRegs[0] == TRI->getNumRegs() ? "CallResource" - : TRI->getName(LRegs[0])) - << " SU #" << CurSU->NodeNum << '\n'); + DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) + dbgs() << "CallResource"; + else + dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); std::pair LRegsPair = LRegsMap.insert(std::make_pair(CurSU, LRegs)); if (LRegsPair.second) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 54c1531a018e..07b46b9183ab 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -93,9 +93,8 @@ class ScheduleDAGVLIW : public ScheduleDAGSDNodes { /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGVLIW::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. BuildSchedGraph(AA); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 22e7885d0050..045d0ffc1f35 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -903,13 +903,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr) { + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); - Context = &MF->getFunction()->getContext(); + LibInfo = LibraryInfo; + Context = &MF->getFunction().getContext(); } SelectionDAG::~SelectionDAG() { @@ -1167,7 +1168,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); - NewSDValueDbgMsg(V, "Creating constant: ", this); return V; } @@ -1188,13 +1188,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, N = newSDNode(isT, isO, Elt, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); } SDValue Result(N, 0); if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); - NewSDValueDbgMsg(Result, "Creating constant: ", this); return Result; } @@ -1332,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction()->optForSize() + Alignment = MF->getFunction().optForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -2442,6 +2442,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SMULO: case ISD::UMULO: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. @@ -2463,27 +2464,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero <<= *ShAmt; - Known.One <<= *ShAmt; + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero <<= Shift; + Known.One <<= Shift; // Low bits are known zero. - Known.Zero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(Shift); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero.lshrInPlace(*ShAmt); - Known.One.lshrInPlace(*ShAmt); + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero.lshrInPlace(Shift); + Known.One.lshrInPlace(Shift); // High bits are known zero. - Known.Zero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(Shift); + } else if (auto *BV = dyn_cast(Op.getOperand(1))) { + // If the shift amount is a vector of constants see if we can bound + // the number of upper zero bits. + unsigned ShiftAmountMin = BitWidth; + for (unsigned i = 0; i != BV->getNumOperands(); ++i) { + if (auto *C = dyn_cast(BV->getOperand(i))) { + const APInt &ShAmt = C->getAPIntValue(); + if (ShAmt.ult(BitWidth)) { + ShiftAmountMin = std::min(ShiftAmountMin, + ShAmt.getZExtValue()); + continue; + } + } + // Don't know anything. + ShiftAmountMin = 0; + break; + } + + Known.Zero.setHighBits(ShiftAmountMin); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + unsigned Shift = ShAmt->getZExtValue(); // Sign extend known zero/one bit (else is unknown). - Known.Zero.ashrInPlace(*ShAmt); - Known.One.ashrInPlace(*ShAmt); + Known.Zero.ashrInPlace(Shift); + Known.One.ashrInPlace(Shift); } break; case ISD::SIGN_EXTEND_INREG: { @@ -3729,6 +3752,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::TRUNCATE: + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::ABS: @@ -4429,7 +4455,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -5079,8 +5105,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction()->optForMinSize(); - return MF.getFunction()->optForSize(); + return MF.getFunction().optForMinSize(); + return MF.getFunction().optForSize(); } static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, @@ -5755,21 +5781,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem, unsigned Size) { + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + MachineMemOperand::Flags Flags, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); - MachineFunction &MF = getMachineFunction(); - auto Flags = MachineMemOperand::MONone; - if (WriteMem) - Flags |= MachineMemOperand::MOStore; - if (ReadMem) - Flags |= MachineMemOperand::MOLoad; - if (Vol) - Flags |= MachineMemOperand::MOVolatile; if (!Size) Size = MemVT.getStoreSize(); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); @@ -5821,7 +5841,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast(Ptr)) @@ -5832,7 +5853,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, if (Ptr.getOpcode() != ISD::ADD || !isa(Ptr.getOperand(1)) || !isa(Ptr.getOperand(0))) - return MachinePointerInfo(); + return Info; int FI = cast(Ptr.getOperand(0))->getIndex(); return MachinePointerInfo::getFixedStack( @@ -5844,14 +5865,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast(OffsetOp)) - return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.isUndef()) - return InferPointerInfo(DAG, Ptr); - return MachinePointerInfo(); + return InferPointerInfo(Info, DAG, Ptr); + return Info; } SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, @@ -5871,7 +5893,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr, Offset); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -5923,7 +5945,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -5990,7 +6014,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -6023,7 +6047,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, @@ -6040,7 +6066,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -6088,7 +6114,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, @@ -6114,7 +6142,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6140,7 +6170,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, @@ -6169,13 +6201,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6201,16 +6235,21 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValueType(0).getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa(N->getScale()) && + cast(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6233,10 +6272,15 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa(N->getScale()) && + cast(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6317,7 +6361,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -6370,7 +6416,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -7088,6 +7136,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, void SelectionDAG::salvageDebugInfo(SDNode &N) { if (!N.getHasDebugValue()) return; + + SmallVector ClonedDVs; for (auto DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; @@ -7106,17 +7156,21 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { // DW_OP_stack_value. auto *DIExpr = DV->getExpression(); DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::NoDeref, DIExpression::WithStackValue); SDDbgValue *Clone = getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - AddDbgValue(Clone, N0.getNode(), false); DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); } } } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, Dbg->getSDNode(), false); } namespace { @@ -7901,11 +7955,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - - auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); - auto LocDecomp = BaseIndexOffset::match(Loc, *this); + auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); + auto LocDecomp = BaseIndexOffset::match(LD, *this); int64_t Offset = 0; if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 544da362be69..da1574f60524 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -21,6 +21,9 @@ using namespace llvm; bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { + // Conservatively fail if we a match failed.. + if (!Base.getNode() || !Other.Base.getNode()) + return false; // Initial Offset difference. Off = Other.Offset - Offset; @@ -37,6 +40,23 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, return true; } + // Match Constants + if (auto *A = dyn_cast(Base)) + if (auto *B = dyn_cast(Other.Base)) { + bool IsMatch = + A->isMachineConstantPoolEntry() == B->isMachineConstantPoolEntry(); + if (IsMatch) { + if (A->isMachineConstantPoolEntry()) + IsMatch = A->getMachineCPVal() == B->getMachineCPVal(); + else + IsMatch = A->getConstVal() == B->getConstVal(); + } + if (IsMatch) { + Off += B->getOffset() - A->getOffset(); + return true; + } + } + const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // Match non-equal FrameIndexes - If both frame indices are fixed @@ -55,13 +75,29 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, } /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { +BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, + const SelectionDAG &DAG) { + SDValue Ptr = N->getBasePtr(); + // (((B + I*M) + c)) + c ... SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); SDValue Index = SDValue(); int64_t Offset = 0; bool IsIndexSignExt = false; + // pre-inc/pre-dec ops are components of EA. + if (N->getAddressingMode() == ISD::PRE_INC) { + if (auto *C = dyn_cast(N->getOffset())) + Offset += C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } else if (N->getAddressingMode() == ISD::PRE_DEC) { + if (auto *C = dyn_cast(N->getOffset())) + Offset -= C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } + // Consume constant adds & ors with appropriate masking. while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { if (auto *C = dyn_cast(Base->getOperand(1))) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f49e22b8288d..1295b83fc6ca 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -128,11 +128,11 @@ using namespace llvm; static unsigned LimitFloatPrecision; static cl::opt -LimitFPPrecision("limit-float-precision", - cl::desc("Generate low-precision inline sequences " - "for some float libcalls"), - cl::location(LimitFloatPrecision), - cl::init(0)); + LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), cl::Hidden, + cl::init(0)); static cl::opt SwitchPeelThreshold( "switch-peel-threshold", cl::Hidden, cl::init(66), @@ -1472,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; - ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, + F->getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), @@ -1484,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - // An aggregate return value cannot wrap around the address space, so - // offsets to its parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), - RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc()), - Flags); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo()); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1578,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT(TLI.getPointerTy(DL)))); } - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = - DAG.getMachineFunction().getFunction()->getCallingConv(); + DAG.getMachineFunction().getFunction().getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); @@ -1774,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa(BOp) || isa(BOp)) || - BOpc != Opc || !BOp->hasOneUse() || + BOpc != unsigned(Opc) || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { @@ -2115,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { @@ -2149,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2169,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2202,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2212,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -3869,7 +3867,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SelectionDAGBuilder* SDB) { + SDValue &Scale, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -3899,6 +3897,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); @@ -3928,8 +3930,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3937,10 +3940,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO); DAG.setRoot(Scatter); @@ -4027,8 +4031,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory(MemoryLocation( @@ -4046,10 +4051,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Alignment, AAInfo, Ranges); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); @@ -4140,7 +4146,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = @@ -4176,7 +4183,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = @@ -4215,7 +4222,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, + DAG.getMachineFunction(), + Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || @@ -4241,11 +4250,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.vol, - Info.readMem, Info.writeMem, Info.size); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, + Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, + Info.flags, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4766,8 +4774,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->optForSize() || + const Function &F = DAG.getMachineFunction().getFunction(); + if (!F.optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4854,6 +4862,13 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } + if (!Op && N.getNode()) + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + if (!Op) { // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); @@ -4889,13 +4904,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } - if (!Op && N.getNode()) - // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast(N.getNode())) - if (FrameIndexSDNode *FINode = - dyn_cast(LNode->getBasePtr().getNode())) - Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Op) return false; @@ -5000,14 +5008,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + const auto &MCI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + unsigned Align = MCI.getAlignment(); if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), @@ -5016,13 +5027,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { + const auto &MSI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + unsigned Align = MSI.getAlignment(); if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); @@ -5030,14 +5042,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { + const auto &MMI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + unsigned Align = MMI.getAlignment(); if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memmove DAG + // node. SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -5640,7 +5655,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - const Module &M = *MF.getFunction()->getParent(); + const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); @@ -5651,6 +5666,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; @@ -5745,10 +5762,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::gcroot: { - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - (void)F; - assert(F->hasGC() && + assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); @@ -5822,19 +5836,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast(I.getArgOperand(1))->getZExtValue(); - Ops[0] = getRoot(); + auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; + Ops[0] = DAG.getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - false, /* volatile */ - rw==0, /* read */ - rw==1)); /* write */ + SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, + DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + Flags); + + // Chain the prefetch in parallell with any pending loads, to stay out of + // the way of later optimizations. + PendingLoads.push_back(Result); + Result = getRoot(); + DAG.setRoot(Result); return nullptr; } case Intrinsic::lifetime_start: @@ -8597,7 +8616,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -8751,7 +8772,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional AssertOp = None; @@ -8938,17 +8961,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::const_iterator I = SuccBB->begin(); - const PHINode *PN = dyn_cast(I); ++I) { + for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. - if (PN->use_empty()) continue; + if (PN.use_empty()) + continue; // Skip empty types - if (PN->getType()->isEmptyTy()) + if (PN.getType()->isEmptyTy()) continue; unsigned Reg; - const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); if (const Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; @@ -8975,7 +8998,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -9350,10 +9373,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second. + // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; - return a.Bits > b.Bits; + if (a.Bits != b.Bits) + return a.Bits > b.Bits; + return a.Mask < b.Mask; }); for (auto &CB : CBV) { @@ -9542,10 +9567,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by probability so the most likely case will be checked first. + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Prob > b.Prob; + return a.Prob != b.Prob ? + a.Prob > b.Prob : + a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible @@ -9857,7 +9887,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( // Don't perform if there is only one cluster or optimizing for size. if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || TM.getOptLevel() == CodeGenOpt::None || - SwitchMBB->getParent()->getFunction()->optForMinSize()) + SwitchMBB->getParent()->getFunction().optForMinSize()) return SwitchMBB; BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); @@ -10009,7 +10039,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction()->optForMinSize()) { + !DefaultMBB->getParent()->getFunction().optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index cb37137d547f..c3d782802eba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -212,7 +212,7 @@ namespace llvm { IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; @@ -228,7 +228,7 @@ namespace llvm { if (IS.OptLevel == SavedOptLevel) return; DEBUG(dbgs() << "\nRestoring optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; @@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort > 0 requires -fast-isel"); - const Function &Fn = *mf.getFunction(); + const Function &Fn = mf.getFunction(); MF = &mf; // Reset the target options before resetting the optimization @@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this); + CurDAG->init(*MF, *ORE, this, LibInfo); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -730,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) + << " '" << BlockName << "'\n"; + CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -743,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -758,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -775,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } { @@ -786,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -795,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -808,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" - << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -821,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -834,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -851,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -919,9 +938,9 @@ class ISelUpdater : public SelectionDAG::DAGUpdateListener { } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: BB#" - << FuncInfo->MBB->getNumber() - << " '" << FuncInfo->MBB->getName() << "'\n"); + DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -1426,13 +1445,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(); - const PHINode *PN = dyn_cast(I); ++I) - FuncInfo->ComputePHILiveOutRegInfo(PN); + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->ComputePHILiveOutRegInfo(&PN); } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(); - const PHINode *PN = dyn_cast(I); ++I) - FuncInfo->InvalidatePHILiveOutRegInfo(PN); + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->InvalidatePHILiveOutRegInfo(&PN); } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -3098,7 +3115,16 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } case OPC_RecordMemRef: - MatchedMemRefs.push_back(cast(N)->getMemOperand()); + if (auto *MN = dyn_cast(N)) + MatchedMemRefs.push_back(MN->getMemOperand()); + else { + DEBUG( + dbgs() << "Expected MemSDNode "; + N->dump(CurDAG); + dbgs() << '\n' + ); + } + continue; case OPC_CaptureGlueInput: @@ -3544,7 +3570,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, Ops.push_back(InputGlue); // Create the node. - SDNode *Res = nullptr; + MachineSDNode *Res = nullptr; bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); if (!IsMorphNodeTo) { @@ -3570,7 +3596,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, "Chain node replaced during MorphNode"); Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); }); - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); + Res = cast(MorphNode(NodeToMatch, TargetOpc, VTList, + Ops, EmitNodeInfo)); } // If the node had chain/glue results, update our notion of the current @@ -3626,13 +3653,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } - cast(Res) - ->setMemRefs(MemRefs, MemRefs + NumMemRefs); + Res->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG(dbgs() << " " - << (IsMorphNodeTo ? "Morphed" : "Created") - << " node: "; Res->dump(CurDAG); dbgs() << "\n"); + DEBUG( + if (!MatchedMemRefs.empty() && Res->memoperands_empty()) + dbgs() << " Dropping mem operands\n"; + dbgs() << " " + << (IsMorphNodeTo ? "Morphed" : "Created") + << " node: "; + Res->dump(CurDAG); + + dbgs() << '\n'; + ); // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { @@ -3742,6 +3775,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } +bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); + auto *C = dyn_cast(N->getOperand(1)); + if (!C) + return false; + + // Detect when "or" is used to add an offset to a stack object. + if (auto *FN = dyn_cast(N->getOperand(0))) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + unsigned A = MFI.getObjectAlignment(FN->getIndex()); + assert(isPowerOf2_32(A) && "Unexpected alignment"); + int32_t Off = C->getSExtValue(); + // If the alleged offset fits in the zero bits guaranteed by + // the alignment, then this or is really an add. + return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + } + return false; +} + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 9f9e1f937c2a..be4ab094bf49 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -16,11 +16,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 9d778d979d87..3f64b49e3555 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -96,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, NumSlotsAllocatedForStatepoints++; MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - unsigned SpillSize = ValueType.getSizeInBits() / 8; + unsigned SpillSize = ValueType.getStoreSize(); assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); // First look for a previously created stack slot which is not in diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b84293668e25..13216d3d9455 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const { /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - AttributeList CallerAttrs = F->getAttributes(); + AttributeList CallerAttrs = F.getAttributes(); if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) .removeAttribute(Attribute::NoAlias) .hasAttributes()) @@ -580,7 +580,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. + // If the LHS already has zeros where RHSC does, this 'and' is dead. if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); @@ -1220,6 +1220,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Sign, ShAmt)); } } + // If this is a bitcast, let computeKnownBits handle it. Only do this on a + // recursive call where Known may be useful to the caller. + if (Depth > 0) { + TLO.DAG.computeKnownBits(Op, Known, Depth); + return false; + } break; case ISD::ADD: case ISD::MUL: @@ -2963,7 +2969,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const { - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -3413,9 +3419,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, return DAG.getMergeValues({ Value, NewChain }, SL); } -// FIXME: This relies on each element having a byte size, otherwise the stride -// is 0 and just overwrites the same location. ExpandStore currently expects -// this broken behavior. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const { SDLoc SL(ST); @@ -3432,13 +3435,40 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - EVT PtrVT = BasePtr.getValueType(); - - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!MemSclVT.isByteSized()) { + unsigned NumBits = StVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); + SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); + SDValue ShiftAmount = + DAG.getConstant(Idx * MemSclVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); + CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); + } + + return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), + ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); + } + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + assert (Stride && "Zero stride!"); // Extract each of the elements from the original vector and save them into // memory individually. SmallVector Stores; @@ -3446,8 +3476,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, DAG.getConstant(Idx, SL, IdxVT)); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Idx * Stride, SL, PtrVT)); + SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( @@ -3471,6 +3500,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); auto &MF = DAG.getMachineFunction(); + if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3495,7 +3525,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. MVT RegVT = getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned LoadedBytes = LoadedVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; @@ -3525,9 +3555,9 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); // Increment the pointers. Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, - StackPtrIncrement); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); } // The last copy may be partial. Do an extending load. @@ -3581,8 +3611,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3591,8 +3621,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3650,7 +3680,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); EVT PtrVT = Ptr.getValueType(); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned StoredBytes = StoredVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; @@ -3683,9 +3713,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, - StackPtr, StackPtrIncrement); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); } // The last store may be partial. Do a truncating store. On big-endian @@ -3731,9 +3760,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - EVT PtrVT = Ptr.getValueType(); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(IncrementSize, dl, PtrVT)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, @@ -3772,7 +3799,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); } else - Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT); + Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } @@ -3802,7 +3829,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); EVT EltVT = VecVT.getVectorElementType(); @@ -3817,7 +3844,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); - return DAG.getNode(ISD::ADD, dl, IdxVT, Index, VecPtr); + return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index d6eca14b7610..8e87c0634654 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -248,6 +248,9 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, for (const MachineOperand &MO : MI.operands()) { bool UseOrDefCSR = false; if (MO.isReg()) { + // Ignore instructions like DBG_VALUE which don't read/def the register. + if (!MO.isDef() && !MO.readsReg()) + continue; unsigned PhysReg = MO.getReg(); if (!PhysReg) continue; @@ -263,7 +266,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, } } } - if (UseOrDefCSR || MO.isFI()) { + // Skip FrameIndex operands in DBG_VALUE instructions. + if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) { DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" << MO.isFI() << "): " << MI << '\n'); return true; @@ -445,7 +449,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF, } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) + if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); @@ -558,16 +562,17 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { switch (EnableShrinkWrapOpt) { case cl::BOU_UNSET: return TFI->enableShrinkWrapping(MF) && - // Windows with CFI has some limitations that make it impossible - // to use shrink-wrapping. - !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - // Sanitizers look at the value of the stack at the location - // of the crash. Since a crash can happen anywhere, the - // frame must be lowered before anything else happen for the - // sanitizers to be able to get a correct stack frame. - !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || - MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || - MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); + // Windows with CFI has some limitations that make it impossible + // to use shrink-wrapping. + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + // Sanitizers look at the value of the stack at the location + // of the crash. Since a crash can happen anywhere, the + // frame must be lowered before anything else happen for the + // sanitizers to be able to get a correct stack frame. + !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress)); // If EnableShrinkWrap is set, it takes precedence on whatever the // target sets. The rational is that we assume we want to test // something related to shrink-wrapping. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 25a1c37b145d..ea74c777e1e2 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,7 +10,6 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -264,7 +263,7 @@ LLVM_DUMP_METHOD void SlotIndexes::dump() const { } for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) - dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + dbgs() << "%bb." << i << "\t[" << MBBRanges[i].first << ';' << MBBRanges[i].second << ")\n"; } #endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 59c5798ab49e..c99c3b09d88a 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -729,7 +729,8 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); - DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + DEBUG(dbgs() << " enterIntvAtEnd " << printMBBReference(MBB) << ", " + << Last); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); @@ -808,7 +809,8 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); SlotIndex Start = LIS.getMBBStartIdx(&MBB); - DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); + DEBUG(dbgs() << " leaveIntvAtTop " << printMBBReference(MBB) << ", " + << Start); VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { @@ -906,15 +908,15 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, // MBB isn't in a loop, it doesn't get any better. All dominators have a // higher frequency by definition. if (!Loop) { - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " at depth 0\n"); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " at depth 0\n"); return MBB; } // We'll never be able to exit the DefLoop. if (Loop == DefLoop) { - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " in the same loop\n"); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " in the same loop\n"); return MBB; } @@ -923,8 +925,8 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, if (Depth < BestDepth) { BestMBB = MBB; BestDepth = Depth; - DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" - << MBB->getNumber() << " at depth " << Depth << '\n'); + DEBUG(dbgs() << "Def in " << printMBBReference(*DefMBB) << " dominates " + << printMBBReference(*MBB) << " at depth " << Depth << '\n'); } // Leave loop by going to the immediate dominator of the loop header. @@ -1063,7 +1065,7 @@ void SplitEditor::hoistCopies() { DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def << " for parent " << ParentVNI->id << '@' << ParentVNI->def - << " hoist to BB#" << Dom.first->getNumber() << ' ' + << " hoist to " << printMBBReference(*Dom.first) << ' ' << Dom.second << '\n'); } @@ -1173,7 +1175,7 @@ bool SplitEditor::transferValues() { if (Start != BlockStart) { VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); - DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + DEBUG(dbgs() << ':' << VNI->id << "*" << printMBBReference(*MBB)); // MBB has its own def. Is it also live-out? if (BlockEnd <= End) LRC.setLiveOutValue(&*MBB, VNI); @@ -1186,7 +1188,7 @@ bool SplitEditor::transferValues() { // Handle the live-in blocks covered by [Start;End). assert(Start <= BlockStart && "Expected live-in block"); while (BlockStart < End) { - DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + DEBUG(dbgs() << ">" << printMBBReference(*MBB)); BlockEnd = LIS.getMBBEndIdx(&*MBB); if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. @@ -1329,7 +1331,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { unsigned RegIdx = RegAssign.lookup(Idx); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI.reg); - DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + DEBUG(dbgs() << " rewr " << printMBBReference(*MI->getParent()) << '\t' << Idx << ':' << RegIdx << '\t' << *MI); // Extend liveness to Idx if the instruction reads reg. @@ -1375,9 +1377,9 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { continue; // The problem here can be that the new register may have been created // for a partially defined original register. For example: - // %vreg827:subreg_hireg = ... + // %0:subreg_hireg = ... // ... - // %vreg828 = COPY %vreg827 + // %1 = COPY %0 if (S.empty()) continue; SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, @@ -1563,9 +1565,9 @@ void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); - DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop - << ") intf " << LeaveBefore << '-' << EnterAfter - << ", live-through " << IntvIn << " -> " << IntvOut); + DEBUG(dbgs() << "%bb." << MBBNum << " [" << Start << ';' << Stop << ") intf " + << LeaveBefore << '-' << EnterAfter << ", live-through " + << IntvIn << " -> " << IntvOut); assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); @@ -1665,7 +1667,7 @@ void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-in " << IntvIn << ", leave before " << LeaveBefore << (BI.LiveOut ? ", stack-out" : ", killed in block")); @@ -1757,7 +1759,7 @@ void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, SlotIndex Start, Stop; std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + DEBUG(dbgs() << printMBBReference(*BI.MBB) << " [" << Start << ';' << Stop << "), uses " << BI.FirstInstr << '-' << BI.LastInstr << ", reg-out " << IntvOut << ", enter after " << EnterAfter << (BI.LiveIn ? ", stack-in" : ", defined in block")); diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 0a7be1d12fa2..608845498b48 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -739,7 +739,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { } else { for (auto Slot : slots) { DEBUG(dbgs() << "Found a use of slot #" << Slot); - DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index "); + DEBUG(dbgs() << " at " << printMBBReference(*MBB) << " index "); DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); if (Allocation) { @@ -1129,8 +1129,7 @@ void StackColoring::expungeSlotMap(DenseMap &SlotRemap, bool StackColoring::runOnMachineFunction(MachineFunction &Func) { DEBUG(dbgs() << "********** Stack Coloring **********\n" - << "********** Function: " - << ((const Value*)Func.getFunction())->getName() << '\n'); + << "********** Function: " << Func.getName() << '\n'); MF = &Func; MFI = &MF->getFrameInfo(); Indexes = &getAnalysis(); @@ -1170,7 +1169,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || - skipFunction(*Func.getFunction())) { + skipFunction(Func.getFunction())) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 1fa4c2f4d9d2..e66a25bec911 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" static cl::opt StackMapVersion( - "stackmap-version", cl::init(3), + "stackmap-version", cl::init(3), cl::Hidden, cl::desc("Specify the stackmap encoding version (default = 3)")); const char *StackMaps::WSMP = "Stack Maps: "; @@ -193,14 +193,14 @@ void StackMaps::print(raw_ostream &OS) { case Location::Register: OS << "Register "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; break; case Location::Direct: OS << "Direct "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; if (Loc.Offset) @@ -209,7 +209,7 @@ void StackMaps::print(raw_ostream &OS) { case Location::Indirect: OS << "Indirect "; if (TRI) - OS << TRI->getName(Loc.Reg); + OS << printReg(Loc.Reg, TRI); else OS << Loc.Reg; OS << "+" << Loc.Offset; @@ -233,7 +233,7 @@ void StackMaps::print(raw_ostream &OS) { for (const auto &LO : LiveOuts) { OS << WSMP << "\t\tLO " << Idx << ": "; if (TRI) - OS << TRI->getName(LO.Reg); + OS << printReg(LO.Reg, TRI); else OS << LO.Reg; OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte " diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index e33400288639..62cef95a4af2 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -385,8 +385,12 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { + // If the target wants to XOR the frame pointer into the guard value, it's + // impossible to emit the check in IR, so the target *must* support stack + // protection in SDAG. bool SupportsSelectionDAGSP = - EnableSelectionDAGSP && !TM->Options.EnableFastISel; + TLI->useStackGuardXorFP() || + (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 89a9526ddbbc..8fc7a4a32842 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 131b9a22768e..25cd7802264e 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -7,14 +7,17 @@ // //===----------------------------------------------------------------------===// // -// This pass duplicates basic blocks ending in unconditional branches into -// the tails of their predecessors, using the TailDuplicator utility class. +/// \file This pass duplicates basic blocks ending in unconditional branches +/// into the tails of their predecessors, using the TailDuplicator utility +/// class. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/Pass.h" @@ -24,38 +27,55 @@ using namespace llvm; namespace { -/// Perform tail duplication. Delegates to TailDuplicator -class TailDuplicatePass : public MachineFunctionPass { +class TailDuplicateBase : public MachineFunctionPass { TailDuplicator Duplicator; - + bool PreRegAlloc; public: - static char ID; - - explicit TailDuplicatePass() : MachineFunctionPass(ID) {} + TailDuplicateBase(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +class TailDuplicate : public TailDuplicateBase { +public: + static char ID; + TailDuplicate() : TailDuplicateBase(ID, false) { + initializeTailDuplicatePass(*PassRegistry::getPassRegistry()); + } +}; + +class EarlyTailDuplicate : public TailDuplicateBase { +public: + static char ID; + EarlyTailDuplicate() : TailDuplicateBase(ID, true) { + initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); + } }; } // end anonymous namespace -char TailDuplicatePass::ID = 0; +char TailDuplicate::ID; +char EarlyTailDuplicate::ID; -char &llvm::TailDuplicateID = TailDuplicatePass::ID; +char &llvm::TailDuplicateID = TailDuplicate::ID; +char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID; -INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false) +INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication", + "Early Tail Duplication", false, false) -bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) +bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) return false; auto MBPI = &getAnalysis(); - - // TODO: Querying isSSA() to determine pre-/post-regalloc is fragile, better - // split this into two passes instead. - bool PreRegAlloc = MF.getRegInfo().isSSA(); - Duplicator.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ false); + Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false); bool MadeChange = false; while (Duplicator.tailDuplicateBlocks()) @@ -63,8 +83,3 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } - -void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); -} diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index 7adf9b037b5f..f51c884839b3 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -111,9 +111,10 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { } } if (!Found) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " missing input from predecessor BB#" - << PredBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + << *MI; + dbgs() << " missing input from predecessor " + << printMBBReference(*PredBB) << '\n'; llvm_unreachable(nullptr); } } @@ -121,15 +122,16 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); if (CheckExtra && !Preds.count(PHIBB)) { - dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " - << *MI; - dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() - << '\n'; + dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) + << ": " << *MI; + dbgs() << " extra input from predecessor " + << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); } if (PHIBB->getNumber() < 0) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " + << *MI; + dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n'; llvm_unreachable(nullptr); } } @@ -548,7 +550,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, unsigned MaxDuplicateCount; if (TailDupSize == 0 && TailDuplicateSize.getNumOccurrences() == 0 && - MF->getFunction()->optForSize()) + MF->getFunction().optForSize()) MaxDuplicateCount = 1; else if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; @@ -783,7 +785,8 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl &TDBBs, SmallVectorImpl &Copies) { - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) + << '\n'); DenseSet UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 6f1a0038ee58..b2151eb49655 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -32,7 +32,7 @@ TargetFrameLowering::~TargetFrameLowering() = default; /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { - auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim"); + auto Attr = MF.getFunction().getFnAttribute("no-frame-pointer-elim"); return Attr.getValueAsString() == "true"; } @@ -82,7 +82,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, return; // In Naked functions we aren't going to save any registers. - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return; // Functions which call __builtin_unwind_init get all their registers saved. @@ -99,7 +99,7 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( const MachineFunction &MF) const { // When HHVM function is called, the stack is skewed as the return address // is removed from the stack before we enter the function. - if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM)) + if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM)) return MF.getTarget().getPointerSize(); return 0; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index db925f803db6..bd90ed5b55b8 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -1151,6 +1151,8 @@ bool TargetInstrInfo::getRegSequenceInputs( for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; OpIdx += 2) { const MachineOperand &MOReg = MI.getOperand(OpIdx); + if (MOReg.isUndef()) + continue; const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); @@ -1174,6 +1176,8 @@ bool TargetInstrInfo::getExtractSubregInputs( // Def = EXTRACT_SUBREG v0.sub1, sub0. assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); const MachineOperand &MOReg = MI.getOperand(1); + if (MOReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(2); assert(MOSubIdx.isImm() && "The subindex of the extract_subreg is not an immediate"); @@ -1198,6 +1202,8 @@ bool TargetInstrInfo::getInsertSubregInputs( assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOSubIdx = MI.getOperand(3); assert(MOSubIdx.isImm() && "One of the subindex of the reg_sequence is not an immediate"); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3c684974df86..4dcb705934c5 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -89,6 +89,21 @@ static cl::opt OptsizeJumpTableDensity( cl::desc("Minimum density for building a jump table in " "an optsize function")); +static bool darwinHasSinCos(const Triple &TT) { + assert(TT.isOSDarwin() && "should be called with darwin triple"); + // Don't bother with 32 bit x86. + if (TT.getArch() == Triple::x86) + return false; + // Macos < 10.9 has no sincos_stret. + if (TT.isMacOSX()) + return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit(); + // iOS < 7.0 has no sincos_stret. + if (TT.isiOS()) + return !TT.isOSVersionLT(7, 0); + // Any other darwin such as WatchOS/TvOS is new enough. + return true; +} + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -100,44 +115,65 @@ static cl::opt MinPercentageForPredictableBranch( "or false to assume that the condition is predictable"), cl::Hidden); -/// InitLibcallNames - Set default libcall names. -static void InitLibcallNames(const char **Names, const Triple &TT) { +void TargetLoweringBase::InitLibcalls(const Triple &TT) { #define HANDLE_LIBCALL(code, name) \ - Names[RTLIB::code] = name; + setLibcallName(RTLIB::code, name); #include "llvm/CodeGen/RuntimeLibcalls.def" #undef HANDLE_LIBCALL + // Initialize calling conventions to their default. + for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) + setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); // A few names are different on particular architectures or environments. if (TT.isOSDarwin()) { // For f16/f32 conversions, Darwin uses the standard naming scheme, instead // of the gnueabi-style __gnu_*_ieee. // FIXME: What about other targets? - Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; - Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; + setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); + + // Some darwins have an optimized __bzero/bzero function. + switch (TT.getArch()) { + case Triple::x86: + case Triple::x86_64: + if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) + setLibcallName(RTLIB::BZERO, "__bzero"); + break; + case Triple::aarch64: + setLibcallName(RTLIB::BZERO, "bzero"); + break; + default: + break; + } + + if (darwinHasSinCos(TT)) { + setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); + setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret"); + if (TT.isWatchABI()) { + setLibcallCallingConv(RTLIB::SINCOS_STRET_F32, + CallingConv::ARM_AAPCS_VFP); + setLibcallCallingConv(RTLIB::SINCOS_STRET_F64, + CallingConv::ARM_AAPCS_VFP); + } + } } else { - Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; - Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee"); } if (TT.isGNUEnvironment() || TT.isOSFuchsia()) { - Names[RTLIB::SINCOS_F32] = "sincosf"; - Names[RTLIB::SINCOS_F64] = "sincos"; - Names[RTLIB::SINCOS_F80] = "sincosl"; - Names[RTLIB::SINCOS_F128] = "sincosl"; - Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + setLibcallName(RTLIB::SINCOS_F80, "sincosl"); + setLibcallName(RTLIB::SINCOS_F128, "sincosl"); + setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } if (TT.isOSOpenBSD()) { - Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; + setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr); } } -/// Set default libcall CallingConvs. -static void InitLibcallCallingConvs(CallingConv::ID *CCs) { - for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) - CCs[LC] = CallingConv::C; -} - /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { @@ -156,6 +192,9 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { return FPEXT_F64_F128; else if (RetVT == MVT::ppcf128) return FPEXT_F64_PPCF128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::f128) + return FPEXT_F80_F128; } return UNKNOWN_LIBCALL; @@ -191,6 +230,9 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { return FPROUND_F128_F64; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F64; + } else if (RetVT == MVT::f80) { + if (OpVT == MVT::f128) + return FPROUND_F128_F80; } return UNKNOWN_LIBCALL; @@ -520,12 +562,12 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { MaxAtomicSizeInBitsSupported = 1024; MinCmpXchgSizeInBits = 0; + SupportsUnalignedAtomics = false; std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr); - InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); + InitLibcalls(TM.getTargetTriple()); InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); } void TargetLoweringBase::initActions() { @@ -1591,8 +1633,8 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { /// Get the reciprocal estimate attribute string for a function that will /// override the target defaults. static StringRef getRecipEstimateForFunc(MachineFunction &MF) { - const Function *F = MF.getFunction(); - return F->getFnAttribute("reciprocal-estimates").getValueAsString(); + const Function &F = MF.getFunction(); + return F.getFnAttribute("reciprocal-estimates").getValueAsString(); } /// Construct a string for the given reciprocal operation of the given type. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 910ca4682b92..0e90df901fd6 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1250,33 +1250,60 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler()); } +void TargetLoweringObjectFileCOFF::emitLinkerFlagsForUsed( + raw_ostream &OS, const GlobalValue *GV) const { + emitLinkerFlagsForUsedCOFF(OS, GV, getTargetTriple(), getMangler()); +} + //===----------------------------------------------------------------------===// // Wasm //===----------------------------------------------------------------------===// -static void checkWasmComdat(const GlobalValue *GV) { +static const Comdat *getWasmComdat(const GlobalValue *GV) { const Comdat *C = GV->getComdat(); if (!C) - return; + return nullptr; - // TODO(sbc): At some point we may need COMDAT support but currently - // they are not supported. - report_fatal_error("WebAssembly doesn't support COMDATs, '" + C->getName() + - "' cannot be lowered."); + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("WebAssembly COMDATs only support " + "SelectionKind::Any, '" + C->getName() + "' cannot be " + "lowered."); + + return C; +} + +static SectionKind getWasmKindForNamedSection(StringRef Name, SectionKind K) { + // If we're told we have function data, then use that. + if (K.isText()) + return SectionKind::getText(); + + // Otherwise, ignore whatever section type the generic impl detected and use + // a plain data section. + return SectionKind::getData(); } MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef Name = GO->getSection(); - checkWasmComdat(GO); - return getContext().getWasmSection(Name, SectionKind::getData()); + + Kind = getWasmKindForNamedSection(Name, Kind); + + StringRef Group = ""; + if (const Comdat *C = getWasmComdat(GO)) { + Group = C->getName(); + } + + return getContext().getWasmSection(Name, Kind, Group, + MCContext::GenericSectionID); } static MCSectionWasm *selectWasmSectionForGlobal( MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang, const TargetMachine &TM, bool EmitUniqueSection, unsigned *NextUniqueID) { StringRef Group = ""; - checkWasmComdat(GO); + if (const Comdat *C = getWasmComdat(GO)) { + Group = C->getName(); + } bool UniqueSectionNames = TM.getUniqueSectionNames(); SmallString<128> Name = getSectionPrefixForGlobal(Kind); @@ -1348,6 +1375,18 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( void TargetLoweringObjectFileWasm::InitializeWasm() { StaticCtorSection = getContext().getWasmSection(".init_array", SectionKind::getData()); - StaticDtorSection = - getContext().getWasmSection(".fini_array", SectionKind::getData()); +} + +MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return Priority == UINT16_MAX ? + StaticCtorSection : + getContext().getWasmSection(".init_array." + utostr(Priority), + SectionKind::getData()); +} + +MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + llvm_unreachable("@llvm.global_dtors should have been lowered already"); + return nullptr; } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 98e07bedb366..853e71d0efa5 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -28,7 +28,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { return true; // Check to see if we should eliminate non-leaf frame pointers. - if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf")) + if (MF.getFunction().hasFnAttribute("no-frame-pointer-elim-non-leaf")) return MF.getFrameInfo().hasCalls(); return false; diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 3f2a31a69cfa..4a10c249c3ce 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -93,11 +93,11 @@ static cl::opt DisablePartialLibcallInlining("disable-partial-libcall-inli static cl::opt EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), - cl::init(false)); -static cl::opt EnableMergeICmps( - "enable-mergeicmps", - cl::desc("Merge ICmp chains into a single memcmp"), - cl::init(false)); + cl::init(false), cl::Hidden); +static cl::opt + EnableMergeICmps("enable-mergeicmps", + cl::desc("Merge ICmp chains into a single memcmp"), + cl::init(false), cl::Hidden); static cl::opt PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt PrintISelInput("print-isel-input", cl::Hidden, @@ -123,14 +123,13 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt - EnableGlobalISel("global-isel", cl::Hidden, - cl::desc("Enable the \"global\" instruction selector")); +static cl::opt EnableGlobalISelOption( + "global-isel", cl::Hidden, + cl::desc("Enable the \"global\" instruction selector")); -static cl::opt -PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, - cl::desc("Print machine instrs"), - cl::value_desc("pass-name"), cl::init("option-unspecified")); +static cl::opt PrintMachineInstrs( + "print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified"), cl::Hidden); static cl::opt EnableGlobalISelAbort( "global-isel-abort", cl::Hidden, @@ -176,22 +175,22 @@ const char *StopBeforeOptName = "stop-before"; static cl::opt StartAfterOpt(StringRef(StartAfterOptName), cl::desc("Resume compilation after a specific pass"), - cl::value_desc("pass-name"), cl::init("")); + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); static cl::opt StartBeforeOpt(StringRef(StartBeforeOptName), cl::desc("Resume compilation before a specific pass"), - cl::value_desc("pass-name"), cl::init("")); + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); static cl::opt StopAfterOpt(StringRef(StopAfterOptName), cl::desc("Stop compilation after a specific pass"), - cl::value_desc("pass-name"), cl::init("")); + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); static cl::opt StopBeforeOpt(StringRef(StopBeforeOptName), cl::desc("Stop compilation before a specific pass"), - cl::value_desc("pass-name"), cl::init("")); + cl::value_desc("pass-name"), cl::init(""), cl::Hidden); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. @@ -227,7 +226,7 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &TailDuplicateID) return applyDisable(TargetID, DisableTailDuplicate); - if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + if (StandardID == &EarlyTailDuplicateID) return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) @@ -242,13 +241,13 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &EarlyIfConverterID) return applyDisable(TargetID, DisableEarlyIfConversion); - if (StandardID == &MachineLICMID) + if (StandardID == &EarlyMachineLICMID) return applyDisable(TargetID, DisableMachineLICM); if (StandardID == &MachineCSEID) return applyDisable(TargetID, DisableMachineCSE); - if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisablePostRAMachineLICM); if (StandardID == &MachineSinkingID) @@ -268,10 +267,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", "Target Pass Configuration", false, false) char TargetPassConfig::ID = 0; -// Pseudo Pass IDs. -char TargetPassConfig::EarlyTailDuplicateID = 0; -char TargetPassConfig::PostRAMachineLICMID = 0; - namespace { struct InsertedPass { @@ -367,10 +362,6 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); - // Substitute Pseudo Pass IDs for real ones. - substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); - substitutePass(&PostRAMachineLICMID, &MachineLICMID); - if (StringRef(PrintMachineInstrs.getValue()).equals("")) TM.Options.PrintMachineCode = true; @@ -705,16 +696,18 @@ void TargetPassConfig::addISelPrepare() { } bool TargetPassConfig::addCoreISelPasses() { - // Enable FastISel with -fast, but allow that to be overridden. + // Enable FastISel with -fast-isel, but allow that to be overridden. TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); if (EnableFastISelOption == cl::BOU_TRUE || (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) TM->setFastISel(true); - // Ask the target for an isel. - // Enable GlobalISel if the target wants to, but allow that to be overriden. - if (EnableGlobalISel == cl::BOU_TRUE || - (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) { + // Ask the target for an instruction selector. + // Explicitly enabling fast-isel should override implicitly enabled + // global-isel. + if (EnableGlobalISelOption == cl::BOU_TRUE || + (EnableGlobalISelOption == cl::BOU_UNSET && + TM->Options.EnableGlobalISel && EnableFastISelOption != cl::BOU_TRUE)) { if (addIRTranslator()) return true; @@ -767,10 +760,9 @@ bool TargetPassConfig::addISelPasses() { /// -regalloc=... command line option. static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static cl::opt > -RegAlloc("regalloc", - cl::init(&useDefaultRegisterAllocator), - cl::desc("Register allocator to use")); + RegisterPassParser> + RegAlloc("regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); /// Add the complete set of target-independent postISel code generator passes. /// @@ -904,6 +896,9 @@ void TargetPassConfig::addMachinePasses() { if (EnableMachineOutliner) PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); + // Add passes that directly emit MI after all other MI passes. + addPreEmitPass2(); + AddingMachinePasses = false; } @@ -935,7 +930,7 @@ void TargetPassConfig::addMachineSSAOptimization() { // loop info, just like LICM and CSE below. addILPOpts(); - addPass(&MachineLICMID, false); + addPass(&EarlyMachineLICMID, false); addPass(&MachineCSEID, false); addPass(&MachineSinkingID); @@ -1087,7 +1082,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? - addPass(&PostRAMachineLICMID); + addPass(&MachineLICMID); } } @@ -1129,13 +1124,13 @@ void TargetPassConfig::addBlockPlacement() { //===---------------------------------------------------------------------===// /// GlobalISel Configuration //===---------------------------------------------------------------------===// - -bool TargetPassConfig::isGlobalISelEnabled() const { - return false; -} - bool TargetPassConfig::isGlobalISelAbortEnabled() const { - return EnableGlobalISelAbort == 1; + if (EnableGlobalISelAbort.getNumOccurrences() > 0) + return EnableGlobalISelAbort == 1; + + // When no abort behaviour is specified, we don't abort if the target says + // that GISel is enabled. + return !TM->Options.EnableGlobalISel; } bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const { diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 4e28c4781c2b..f03c3b8300f3 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -92,11 +93,15 @@ Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, else if (TargetRegisterInfo::isStackSlot(Reg)) OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; + OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + else if (!TRI) + OS << '%' << "physreg" << Reg; + else if (Reg < TRI->getNumRegs()) { + OS << '%'; + printLowerCase(TRI->getName(Reg), OS); + } else + llvm_unreachable("Register kind is unsupported."); + if (SubIdx) { if (TRI) OS << ':' << TRI->getSubRegIndexName(SubIdx); @@ -132,13 +137,28 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); } else { OS << printRegUnit(Unit, TRI); } }); } +Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { + if (RegInfo.getRegClassOrNull(Reg)) + OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + else if (RegInfo.getRegBankOrNull(Reg)) + OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); + else { + OS << "_"; + assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && + "Generic registers must have a valid type"); + } + }); +} + } // end namespace llvm /// getAllocatableClass - Return the maximal subclass of the given register @@ -368,50 +388,55 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); - - // Hints with HintType != 0 were set by target-dependent code. - // Such targets must provide their own implementation of - // TRI::getRegAllocationHints to interpret those hint types. - assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); - - // Target-independent hints are either a physical or a virtual register. - unsigned Phys = Hint.second; - if (VRM && isVirtualRegister(Phys)) - Phys = VRM->getPhys(Phys); - - // Check that Phys is a valid hint in VirtReg's register class. - if (!isPhysicalRegister(Phys)) - return false; - if (MRI.isReserved(Phys)) - return false; - // Check that Phys is in the allocation order. We shouldn't heed hints - // from VirtReg's register class if they aren't in the allocation order. The - // target probably has a reason for removing the register. - if (!is_contained(Order, Phys)) - return false; - - // All clear, tell the register allocator to prefer this register. - Hints.push_back(Phys); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); + + // First hint may be a target hint. + bool Skip = (Hints_MRI.first != 0); + for (auto Reg : Hints_MRI.second) { + if (Skip) { + Skip = false; + continue; + } + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Reg; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + continue; + if (MRI.isReserved(Phys)) + continue; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (!is_contained(Order, Phys)) + continue; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); + } return false; } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { - return !MF.getFunction()->hasFnAttribute("no-realign-stack"); + return !MF.getFunction().hasFnAttribute("no-realign-stack"); } bool TargetRegisterInfo::needsStackRealignment( const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) || - F->hasFnAttribute(Attribute::StackAlignment)); - if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { + F.hasFnAttribute(Attribute::StackAlignment)); + if (F.hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) return true; - DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n"); + DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n"); } return false; } diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 1a317cd865f0..a72d7ebaed4f 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -38,6 +38,10 @@ bool TargetSubtargetInfo::enableAtomicExpand() const { return true; } +bool TargetSubtargetInfo::enableIndirectBrExpand() const { + return false; +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } @@ -111,3 +115,6 @@ std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { TSchedModel.computeInstrRThroughput(MCI.getOpcode()); return createSchedInfoStr(Latency, RThroughput); } + +void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 650912f56a37..774b76f84b7f 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -35,7 +35,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -110,6 +110,10 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // Set of already processed instructions in the current block. SmallPtrSet Processed; + // Set of instructions converted to three-address by target and then sunk + // down current basic block. + SmallPtrSet SunkInstrs; + // A map from virtual registers to physical registers which are likely targets // to be coalesced to due to copies from physical registers to virtual // registers. e.g. v1024 = move r0. @@ -454,8 +458,8 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, /// For example, in this code: /// /// %reg1034 = copy %reg1024 -/// %reg1035 = copy %reg1025 -/// %reg1036 = add %reg1034, %reg1035 +/// %reg1035 = copy killed %reg1025 +/// %reg1036 = add killed %reg1034, killed %reg1035 /// /// %reg1034 is not considered to be killed, since it is copied from a /// register which is not killed. Treating it as not killed lets the @@ -587,31 +591,31 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // general, we want no uses between this instruction and the definition of // the two-address register. // e.g. - // %reg1028 = EXTRACT_SUBREG %reg1027, 1 - // %reg1029 = MOV8rr %reg1028 - // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS - // insert => %reg1030 = MOV8rr %reg1028 - // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags + // insert => %reg1030 = MOV8rr %reg1028 + // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags // In this case, it might not be possible to coalesce the second MOV8rr // instruction if the first one is coalesced. So it would be profitable to // commute it: - // %reg1028 = EXTRACT_SUBREG %reg1027, 1 - // %reg1029 = MOV8rr %reg1028 - // %reg1029 = SHR8ri %reg1029, 7, %EFLAGS - // insert => %reg1030 = MOV8rr %reg1029 - // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS + // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1 + // %reg1029 = MOV8rr %reg1028 + // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags + // insert => %reg1030 = MOV8rr %reg1029 + // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags if (!isPlainlyKilled(MI, regC, LIS)) return false; // Ok, we have something like: - // %reg1030 = ADD8rr %reg1028, %reg1029, %EFLAGS + // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags // let's see if it's worth commuting it. // Look for situations like this: - // %reg1024 = MOV r1 - // %reg1025 = MOV r0 - // %reg1026 = ADD %reg1024, %reg1025 + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. unsigned ToRegA = getMappedReg(regA, DstRegMap); @@ -709,9 +713,9 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, bool TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: - // %reg1024 = MOV r1 - // %reg1025 = MOV r0 - // %reg1026 = ADD %reg1024, %reg1025 + // %reg1024 = MOV r1 + // %reg1025 = MOV r0 + // %reg1026 = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. unsigned FromRegB = getMappedReg(RegB, SrcRegMap); @@ -756,6 +760,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, mi = NewMI; nmi = std::next(mi); } + else + SunkInstrs.insert(NewMI); // Update source and destination register maps. SrcRegMap.erase(RegA); @@ -1460,7 +1466,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); - // Deal with uses immediately - simply rewrite the src operand. + // Deal with undef uses immediately - simply rewrite the src operand. if (SrcMO.isUndef() && !DstMO.getSubReg()) { // Constrain the DstReg register class if required. if (TargetRegisterInfo::isVirtualRegister(DstReg)) @@ -1655,6 +1661,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { else AA = nullptr; OptLevel = TM.getOptLevel(); + // Disable optimizations if requested. We cannot skip the whole pass as some + // fixups are necessary for correctness. + if (skipFunction(Func.getFunction())) + OptLevel = CodeGenOpt::None; bool MadeChange = false; @@ -1674,10 +1684,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { SrcRegMap.clear(); DstRegMap.clear(); Processed.clear(); + SunkInstrs.clear(); for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); mi != me; ) { MachineBasicBlock::iterator nmi = std::next(mi); - if (mi->isDebugValue()) { + // Don't revisit an instruction previously converted by target. It may + // contain undef register operands (%noreg), which are not handled. + if (mi->isDebugValue() || SunkInstrs.count(&*mi)) { mi = nmi; continue; } @@ -1765,8 +1778,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { /// /// Becomes: /// -/// %dst:ssub0 = COPY %v1 -/// %dst:ssub1 = COPY %v2 +/// undef %dst:ssub0 = COPY %v1 +/// %dst:ssub1 = COPY %v2 void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -1790,7 +1803,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineOperand &UseMO = MI.getOperand(i); unsigned SrcReg = UseMO.getReg(); unsigned SubIdx = MI.getOperand(i+1).getImm(); - // Nothing needs to be inserted for operands. + // Nothing needs to be inserted for undef operands. if (UseMO.isUndef()) continue; @@ -1812,7 +1825,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { .addReg(DstReg, RegState::Define, SubIdx) .add(UseMO); - // The first def needs an flag because there is no live register + // The first def needs an undef flag because there is no live register // before it. if (!DefEmitted) { CopyMI->getOperand(0).setIsUndef(true); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 1533abde87ef..13f7e83f3dd0 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -21,8 +21,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -380,8 +380,8 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { ++NumIdCopies; // Copies like: - // %R0 = COPY %R0 - // %AL = COPY %AL, %EAX + // %r0 = COPY undef %r0 + // %al = COPY %al, implicit-def %eax // give us additional liveness information: The target (super-)register // must not be valid before this point. Replace the COPY with a KILL // instruction to maintain this information. @@ -488,7 +488,7 @@ void VirtRegRewriter::rewrite() { if (SubReg != 0) { if (NoSubRegLiveness) { // A virtual register kill refers to the whole register, so we may - // have to add operands for the super-register. A + // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) @@ -513,9 +513,9 @@ void VirtRegRewriter::rewrite() { } } - // The and flags only make sense for + // The def undef and def internal flags only make sense for // sub-register defs, and we are substituting a full physreg. An - // operand from the SuperKills list will represent the + // implicit killed operand from the SuperKills list will represent the // partial read of the super-register. if (MO.isDef()) { MO.setIsUndef(false); @@ -530,6 +530,7 @@ void VirtRegRewriter::rewrite() { // Rewrite. Note we could have used MachineOperand::substPhysReg(), but // we need the inlining here. MO.setReg(PhysReg); + MO.setIsRenamableIfNoExtraRegAllocReq(); } // Add any missing super-register kills after rewriting the whole diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index 7ad84734203d..0b16a113640d 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -838,17 +838,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { for (auto &BBMapping : Orig2Clone) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; - for (Instruction &OldI : *OldBlock) { - auto *OldPN = dyn_cast(&OldI); - if (!OldPN) - break; - UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true); + for (PHINode &OldPN : OldBlock->phis()) { + UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true); } - for (Instruction &NewI : *NewBlock) { - auto *NewPN = dyn_cast(&NewI); - if (!NewPN) - break; - UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false); + for (PHINode &NewPN : NewBlock->phis()) { + UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false); } } @@ -858,17 +852,13 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { BasicBlock *OldBlock = BBMapping.first; BasicBlock *NewBlock = BBMapping.second; for (BasicBlock *SuccBB : successors(NewBlock)) { - for (Instruction &SuccI : *SuccBB) { - auto *SuccPN = dyn_cast(&SuccI); - if (!SuccPN) - break; - + for (PHINode &SuccPN : SuccBB->phis()) { // Ok, we have a PHI node. Figure out what the incoming value was for // the OldBlock. - int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock); + int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock); if (OldBlockIdx == -1) break; - Value *IV = SuccPN->getIncomingValue(OldBlockIdx); + Value *IV = SuccPN.getIncomingValue(OldBlockIdx); // Remap the value if necessary. if (auto *Inst = dyn_cast(IV)) { @@ -877,7 +867,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { IV = I->second; } - SuccPN->addIncoming(IV, NewBlock); + SuccPN.addIncoming(IV, NewBlock); } } } diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 60ac24e62a48..3d83afcf1fc5 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -142,7 +142,7 @@ void XRayInstrumentation::prependRetWithPatchableExit( } bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { - auto &F = *MF.getFunction(); + auto &F = MF.getFunction(); auto InstrAttr = F.getFnAttribute("function-instrument"); bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && InstrAttr.isStringAttribute() && diff --git a/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp new file mode 100644 index 000000000000..8828671d9be9 --- /dev/null +++ b/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp @@ -0,0 +1,101 @@ +//===- AppendingTypeTableBuilder.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +TypeIndex AppendingTypeTableBuilder::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); +} + +AppendingTypeTableBuilder::AppendingTypeTableBuilder(BumpPtrAllocator &Storage) + : RecordStorage(Storage) {} + +AppendingTypeTableBuilder::~AppendingTypeTableBuilder() = default; + +Optional AppendingTypeTableBuilder::getFirst() { + if (empty()) + return None; + + return TypeIndex(TypeIndex::FirstNonSimpleIndex); +} + +Optional AppendingTypeTableBuilder::getNext(TypeIndex Prev) { + if (++Prev == nextTypeIndex()) + return None; + return Prev; +} + +CVType AppendingTypeTableBuilder::getType(TypeIndex Index) { + CVType Type; + Type.RecordData = SeenRecords[Index.toArrayIndex()]; + const RecordPrefix *P = + reinterpret_cast(Type.RecordData.data()); + Type.Type = static_cast(uint16_t(P->RecordKind)); + return Type; +} + +StringRef AppendingTypeTableBuilder::getTypeName(TypeIndex Index) { + llvm_unreachable("Method not implemented"); +} + +bool AppendingTypeTableBuilder::contains(TypeIndex Index) { + if (Index.isSimple() || Index.isNoneType()) + return false; + + return Index.toArrayIndex() < SeenRecords.size(); +} + +uint32_t AppendingTypeTableBuilder::size() { return SeenRecords.size(); } + +uint32_t AppendingTypeTableBuilder::capacity() { return SeenRecords.size(); } + +ArrayRef> AppendingTypeTableBuilder::records() const { + return SeenRecords; +} + +void AppendingTypeTableBuilder::reset() { SeenRecords.clear(); } + +TypeIndex +AppendingTypeTableBuilder::insertRecordBytes(ArrayRef &Record) { + TypeIndex NewTI = nextTypeIndex(); + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Record = ArrayRef(Stable, Record.size()); + SeenRecords.push_back(Record); + return NewTI; +} + +TypeIndex +AppendingTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { + TypeIndex TI; + auto Fragments = Builder.end(nextTypeIndex()); + assert(!Fragments.empty()); + for (auto C : Fragments) + TI = insertRecordBytes(C.RecordData); + return TI; +} diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index b0cefe64fddf..0515788d85ef 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -1,6 +1,8 @@ add_llvm_library(LLVMDebugInfoCodeView + AppendingTypeTableBuilder.cpp CodeViewError.cpp CodeViewRecordIO.cpp + ContinuationRecordBuilder.cpp CVSymbolVisitor.cpp CVTypeVisitor.cpp DebugChecksumsSubsection.cpp @@ -17,10 +19,13 @@ add_llvm_library(LLVMDebugInfoCodeView DebugSymbolsSubsection.cpp EnumTables.cpp Formatters.cpp + GlobalTypeTableBuilder.cpp LazyRandomTypeCollection.cpp Line.cpp + MergingTypeTableBuilder.cpp RecordName.cpp RecordSerialization.cpp + SimpleTypeSerializer.cpp StringsAndChecksums.cpp SymbolRecordMapping.cpp SymbolDumper.cpp @@ -28,8 +33,8 @@ add_llvm_library(LLVMDebugInfoCodeView TypeDumpVisitor.cpp TypeIndex.cpp TypeIndexDiscovery.cpp + TypeHashing.cpp TypeRecordMapping.cpp - TypeSerializer.cpp TypeStreamMerger.cpp TypeTableCollection.cpp diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp index e0c7ef58c304..44a67743169e 100644 --- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp @@ -11,7 +11,6 @@ #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h" -#include "llvm/Support/BinaryByteStream.h" using namespace llvm; using namespace llvm::codeview; diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index 79b9fdefd40e..a4182a3b2fa1 100644 --- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" diff --git a/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp new file mode 100644 index 000000000000..f180fc6990fc --- /dev/null +++ b/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp @@ -0,0 +1,259 @@ +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +struct ContinuationRecord { + ulittle16_t Kind{uint16_t(TypeLeafKind::LF_INDEX)}; + ulittle16_t Size{0}; + ulittle32_t IndexRef{0xB0C0B0C0}; +}; + +struct SegmentInjection { + SegmentInjection(TypeLeafKind Kind) { Prefix.RecordKind = Kind; } + + ContinuationRecord Cont; + RecordPrefix Prefix; +}; +} // namespace + +static void addPadding(BinaryStreamWriter &Writer) { + uint32_t Align = Writer.getOffset() % 4; + if (Align == 0) + return; + + int PaddingBytes = 4 - Align; + while (PaddingBytes > 0) { + uint8_t Pad = static_cast(LF_PAD0 + PaddingBytes); + cantFail(Writer.writeInteger(Pad)); + --PaddingBytes; + } +} + +static SegmentInjection InjectFieldList(TypeLeafKind::LF_FIELDLIST); +static SegmentInjection InjectMethodOverloadList(TypeLeafKind::LF_METHODLIST); + +static constexpr uint32_t ContinuationLength = sizeof(ContinuationRecord); +static constexpr uint32_t MaxSegmentLength = + MaxRecordLength - ContinuationLength; + +static inline TypeLeafKind getTypeLeafKind(ContinuationRecordKind CK) { + return (CK == ContinuationRecordKind::FieldList) ? LF_FIELDLIST + : LF_METHODLIST; +} + +ContinuationRecordBuilder::ContinuationRecordBuilder() + : SegmentWriter(Buffer), Mapping(SegmentWriter) {} + +ContinuationRecordBuilder::~ContinuationRecordBuilder() {} + +void ContinuationRecordBuilder::begin(ContinuationRecordKind RecordKind) { + assert(!Kind.hasValue()); + Kind = RecordKind; + Buffer.clear(); + SegmentWriter.setOffset(0); + SegmentOffsets.clear(); + SegmentOffsets.push_back(0); + assert(SegmentWriter.getOffset() == 0); + assert(SegmentWriter.getLength() == 0); + + const SegmentInjection *FLI = + (RecordKind == ContinuationRecordKind::FieldList) + ? &InjectFieldList + : &InjectMethodOverloadList; + const uint8_t *FLIB = reinterpret_cast(FLI); + InjectedSegmentBytes = + ArrayRef(FLIB, FLIB + sizeof(SegmentInjection)); + + CVType Type; + Type.Type = getTypeLeafKind(RecordKind); + cantFail(Mapping.visitTypeBegin(Type)); + + // Seed the first trecord with an appropriate record prefix. + RecordPrefix Prefix; + Prefix.RecordLen = 0; + Prefix.RecordKind = Type.Type; + cantFail(SegmentWriter.writeObject(Prefix)); +} + +template +void ContinuationRecordBuilder::writeMemberType(RecordType &Record) { + assert(Kind.hasValue()); + + uint32_t OriginalOffset = SegmentWriter.getOffset(); + CVMemberRecord CVMR; + CVMR.Kind = static_cast(Record.getKind()); + + // Member Records aren't length-prefixed, they only have a 2-byte TypeLeafKind + // at the beginning. + cantFail(SegmentWriter.writeEnum(CVMR.Kind)); + + // Let the Mapping handle the rest. + cantFail(Mapping.visitMemberBegin(CVMR)); + cantFail(Mapping.visitKnownMember(CVMR, Record)); + cantFail(Mapping.visitMemberEnd(CVMR)); + + // Make sure it's padded to 4 bytes. + addPadding(SegmentWriter); + assert(getCurrentSegmentLength() % 4 == 0); + + // The maximum length of a single segment is 64KB minus the size to insert a + // continuation. So if we are over that, inject a continuation between the + // previous member and the member that was just written, then end the previous + // segment after the continuation and begin a new one with the just-written + // member. + if (getCurrentSegmentLength() > MaxSegmentLength) { + // We need to inject some bytes before the member we just wrote but after + // the previous member. Save off the length of the member we just wrote so + // that we can do some sanity checking on it. + uint32_t MemberLength = SegmentWriter.getOffset() - OriginalOffset; + (void) MemberLength; + insertSegmentEnd(OriginalOffset); + // Since this member now becomes a new top-level record, it should have + // gotten a RecordPrefix injected, and that RecordPrefix + the member we + // just wrote should now constitute the entirety of the current "new" + // segment. + assert(getCurrentSegmentLength() == MemberLength + sizeof(RecordPrefix)); + } + + assert(getCurrentSegmentLength() % 4 == 0); + assert(getCurrentSegmentLength() <= MaxSegmentLength); +} + +uint32_t ContinuationRecordBuilder::getCurrentSegmentLength() const { + return SegmentWriter.getOffset() - SegmentOffsets.back(); +} + +void ContinuationRecordBuilder::insertSegmentEnd(uint32_t Offset) { + uint32_t SegmentBegin = SegmentOffsets.back(); + (void)SegmentBegin; + assert(Offset > SegmentBegin); + assert(Offset - SegmentBegin <= MaxSegmentLength); + + // We need to make space for the continuation record. For now we can't fill + // out the length or the TypeIndex of the back-reference, but we need the + // space to at least be there. + Buffer.insert(Offset, InjectedSegmentBytes); + + uint32_t NewSegmentBegin = Offset + ContinuationLength; + uint32_t SegmentLength = NewSegmentBegin - SegmentOffsets.back(); + (void) SegmentLength; + + assert(SegmentLength % 4 == 0); + assert(SegmentLength <= MaxRecordLength); + SegmentOffsets.push_back(NewSegmentBegin); + + // Seek to the end so that we can keep writing against the new segment. + SegmentWriter.setOffset(SegmentWriter.getLength()); + assert(SegmentWriter.bytesRemaining() == 0); +} + +CVType ContinuationRecordBuilder::createSegmentRecord( + uint32_t OffBegin, uint32_t OffEnd, Optional RefersTo) { + assert(OffEnd - OffBegin <= USHRT_MAX); + + MutableArrayRef Data = Buffer.data(); + Data = Data.slice(OffBegin, OffEnd - OffBegin); + + CVType Type; + Type.Type = getTypeLeafKind(*Kind); + Type.RecordData = Data; + + // Write the length to the RecordPrefix, making sure it does not include + // sizeof(RecordPrefix.Length) + RecordPrefix *Prefix = reinterpret_cast(Data.data()); + assert(Prefix->RecordKind == Type.Type); + Prefix->RecordLen = Data.size() - sizeof(RecordPrefix::RecordLen); + + if (RefersTo.hasValue()) { + auto Continuation = Data.take_back(ContinuationLength); + ContinuationRecord *CR = + reinterpret_cast(Continuation.data()); + assert(CR->Kind == TypeLeafKind::LF_INDEX); + assert(CR->IndexRef == 0xB0C0B0C0); + CR->IndexRef = RefersTo->getIndex(); + } + + return Type; +} + +std::vector ContinuationRecordBuilder::end(TypeIndex Index) { + CVType Type; + Type.Type = getTypeLeafKind(*Kind); + cantFail(Mapping.visitTypeEnd(Type)); + + // We're now done, and we have a series of segments each beginning at an + // offset specified in the SegmentOffsets array. We now need to iterate + // over each segment and post-process them in the following two ways: + // 1) Each top-level record has a RecordPrefix whose type is either + // LF_FIELDLIST or LF_METHODLIST, but the Length field is still 0. + // Those should all be set to the correct length now. + // 2) Each continuation record has an IndexRef field which we set to the + // magic value 0xB0C0B0C0. Now that the caller has told us the TypeIndex + // they want this sequence to start from, we can go through and update + // each one. + // + // Logically, the sequence of records we've built up looks like this: + // + // SegmentOffsets[0]: (Initially: uninitialized) + // SegmentOffsets[0]+2: LF_FIELDLIST + // SegmentOffsets[0]+4: Member[0] + // SegmentOffsets[0]+?: ... + // SegmentOffsets[0]+?: Member[4] + // SegmentOffsets[1]-8: LF_INDEX + // SegmentOffsets[1]-6: 0 + // SegmentOffsets[1]-4: (Initially: 0xB0C0B0C0) + // + // SegmentOffsets[1]: (Initially: uninitialized) + // SegmentOffsets[1]+2: LF_FIELDLIST + // SegmentOffsets[1]+4: Member[0] + // SegmentOffsets[1]+?: ... + // SegmentOffsets[1]+?: Member[s] + // SegmentOffsets[2]-8: LF_INDEX + // SegmentOffsets[2]-6: 0 + // SegmentOffsets[2]-4: (Initially: 0xB0C0B0C0) + // + // ... + // + // SegmentOffsets[N]: (Initially: uninitialized) + // SegmentOffsets[N]+2: LF_FIELDLIST + // SegmentOffsets[N]+4: Member[0] + // SegmentOffsets[N]+?: ... + // SegmentOffsets[N]+?: Member[t] + // + // And this is the way we have laid them out in the serialization buffer. But + // we cannot actually commit them to the underlying stream this way, due to + // the topological sorting requirement of a type stream (specifically, + // TypeIndex references can only point backwards, not forwards). So the + // sequence that we return to the caller contains the records in reverse + // order, which is the proper order for committing the serialized records. + + std::vector Types; + Types.reserve(SegmentOffsets.size()); + + auto SO = makeArrayRef(SegmentOffsets); + + uint32_t End = SegmentWriter.getOffset(); + + Optional RefersTo; + for (uint32_t Offset : reverse(SO)) { + Types.push_back(createSegmentRecord(Offset, End, RefersTo)); + + End = Offset; + RefersTo = Index++; + } + + Kind.reset(); + return Types; +} + +// Explicitly instantiate the member function for each known type so that we can +// implement this in the cpp file. +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + template void llvm::codeview::ContinuationRecordBuilder::writeMemberType( \ + Name##Record &Record); +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" diff --git a/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp new file mode 100644 index 000000000000..3ecd684c1e39 --- /dev/null +++ b/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -0,0 +1,127 @@ +//===- GlobalTypeTableBuilder.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +TypeIndex GlobalTypeTableBuilder::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); +} + +GlobalTypeTableBuilder::GlobalTypeTableBuilder(BumpPtrAllocator &Storage) + : RecordStorage(Storage) { + SeenRecords.reserve(4096); +} + +GlobalTypeTableBuilder::~GlobalTypeTableBuilder() = default; + +Optional GlobalTypeTableBuilder::getFirst() { + if (empty()) + return None; + + return TypeIndex(TypeIndex::FirstNonSimpleIndex); +} + +Optional GlobalTypeTableBuilder::getNext(TypeIndex Prev) { + if (++Prev == nextTypeIndex()) + return None; + return Prev; +} + +CVType GlobalTypeTableBuilder::getType(TypeIndex Index) { + CVType Type; + Type.RecordData = SeenRecords[Index.toArrayIndex()]; + const RecordPrefix *P = + reinterpret_cast(Type.RecordData.data()); + Type.Type = static_cast(uint16_t(P->RecordKind)); + return Type; +} + +StringRef GlobalTypeTableBuilder::getTypeName(TypeIndex Index) { + llvm_unreachable("Method not implemented"); +} + +bool GlobalTypeTableBuilder::contains(TypeIndex Index) { + if (Index.isSimple() || Index.isNoneType()) + return false; + + return Index.toArrayIndex() < SeenRecords.size(); +} + +uint32_t GlobalTypeTableBuilder::size() { return SeenRecords.size(); } + +uint32_t GlobalTypeTableBuilder::capacity() { return SeenRecords.size(); } + +ArrayRef> GlobalTypeTableBuilder::records() const { + return SeenRecords; +} + +ArrayRef GlobalTypeTableBuilder::hashes() const { + return SeenHashes; +} + +void GlobalTypeTableBuilder::reset() { + HashedRecords.clear(); + SeenRecords.clear(); +} + +static inline ArrayRef stabilize(BumpPtrAllocator &Alloc, + ArrayRef Data) { + uint8_t *Stable = Alloc.Allocate(Data.size()); + memcpy(Stable, Data.data(), Data.size()); + return makeArrayRef(Stable, Data.size()); +} + +TypeIndex GlobalTypeTableBuilder::insertRecordAs(GloballyHashedType Hash, + CreateRecord Create) { + auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); + + if (Result.second) { + ArrayRef RecordData = stabilize(RecordStorage, Create()); + SeenRecords.push_back(RecordData); + SeenHashes.push_back(Hash); + } + + // Update the caller's copy of Record to point a stable copy. + return Result.first->second; +} + +TypeIndex GlobalTypeTableBuilder::insertRecordBytes(ArrayRef Record) { + GloballyHashedType GHT = + GloballyHashedType::hashType(Record, SeenHashes, SeenHashes); + return insertRecordAs(GHT, [Record]() { return Record; }); +} + +TypeIndex +GlobalTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { + TypeIndex TI; + auto Fragments = Builder.end(nextTypeIndex()); + assert(!Fragments.empty()); + for (auto C : Fragments) + TI = insertRecordBytes(C.RecordData); + return TI; +} diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index bad291e83818..ca8007411cad 100644 --- a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -58,21 +58,27 @@ LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t NumRecords) : LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {} -void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { +void LazyRandomTypeCollection::reset(BinaryStreamReader &Reader, + uint32_t RecordCountHint) { Count = 0; PartialOffsets = PartialOffsetArray(); - BinaryStreamReader Reader(Data, support::little); - error(Reader.readArray(Types, Reader.getLength())); + error(Reader.readArray(Types, Reader.bytesRemaining())); // Clear and then resize, to make sure existing data gets destroyed. Records.clear(); Records.resize(RecordCountHint); } +void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { + BinaryStreamReader Reader(Data, support::little); + reset(Reader, RecordCountHint); +} + void LazyRandomTypeCollection::reset(ArrayRef Data, uint32_t RecordCountHint) { - reset(toStringRef(Data), RecordCountHint); + BinaryStreamReader Reader(Data, support::little); + reset(Reader, RecordCountHint); } uint32_t LazyRandomTypeCollection::getOffsetOfType(TypeIndex Index) { diff --git a/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp new file mode 100644 index 000000000000..8aee4aa2e2ae --- /dev/null +++ b/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -0,0 +1,128 @@ +//===- MergingTypeTableBuilder.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::codeview; + +TypeIndex MergingTypeTableBuilder::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); +} + +MergingTypeTableBuilder::MergingTypeTableBuilder(BumpPtrAllocator &Storage) + : RecordStorage(Storage) { + SeenRecords.reserve(4096); +} + +MergingTypeTableBuilder::~MergingTypeTableBuilder() = default; + +Optional MergingTypeTableBuilder::getFirst() { + if (empty()) + return None; + + return TypeIndex(TypeIndex::FirstNonSimpleIndex); +} + +Optional MergingTypeTableBuilder::getNext(TypeIndex Prev) { + if (++Prev == nextTypeIndex()) + return None; + return Prev; +} + +CVType MergingTypeTableBuilder::getType(TypeIndex Index) { + CVType Type; + Type.RecordData = SeenRecords[Index.toArrayIndex()]; + const RecordPrefix *P = + reinterpret_cast(Type.RecordData.data()); + Type.Type = static_cast(uint16_t(P->RecordKind)); + return Type; +} + +StringRef MergingTypeTableBuilder::getTypeName(TypeIndex Index) { + llvm_unreachable("Method not implemented"); +} + +bool MergingTypeTableBuilder::contains(TypeIndex Index) { + if (Index.isSimple() || Index.isNoneType()) + return false; + + return Index.toArrayIndex() < SeenRecords.size(); +} + +uint32_t MergingTypeTableBuilder::size() { return SeenRecords.size(); } + +uint32_t MergingTypeTableBuilder::capacity() { return SeenRecords.size(); } + +ArrayRef> MergingTypeTableBuilder::records() const { + return SeenRecords; +} + +void MergingTypeTableBuilder::reset() { + HashedRecords.clear(); + SeenRecords.clear(); +} + +static inline ArrayRef stabilize(BumpPtrAllocator &Alloc, + ArrayRef Data) { + uint8_t *Stable = Alloc.Allocate(Data.size()); + memcpy(Stable, Data.data(), Data.size()); + return makeArrayRef(Stable, Data.size()); +} + +TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash, + ArrayRef &Record) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + LocallyHashedType WeakHash{Hash, Record}; + auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); + + if (Result.second) { + ArrayRef RecordData = stabilize(RecordStorage, Record); + Result.first->first.RecordData = RecordData; + SeenRecords.push_back(RecordData); + } + + // Update the caller's copy of Record to point a stable copy. + TypeIndex ActualTI = Result.first->second; + Record = SeenRecords[ActualTI.toArrayIndex()]; + return ActualTI; +} + +TypeIndex +MergingTypeTableBuilder::insertRecordBytes(ArrayRef &Record) { + return insertRecordAs(hash_value(Record), Record); +} + +TypeIndex +MergingTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { + TypeIndex TI; + auto Fragments = Builder.end(nextTypeIndex()); + assert(!Fragments.empty()); + for (auto C : Fragments) + TI = insertRecordBytes(C.RecordData); + return TI; +} diff --git a/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp new file mode 100644 index 000000000000..d28b7c3c2d83 --- /dev/null +++ b/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp @@ -0,0 +1,62 @@ +#include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" + +using namespace llvm; +using namespace llvm::codeview; + +static void writeRecordPrefix(BinaryStreamWriter &Writer, TypeLeafKind Kind) { + RecordPrefix Prefix; + Prefix.RecordKind = Kind; + Prefix.RecordLen = 0; + cantFail(Writer.writeObject(Prefix)); +} + +static void addPadding(BinaryStreamWriter &Writer) { + uint32_t Align = Writer.getOffset() % 4; + if (Align == 0) + return; + + int PaddingBytes = 4 - Align; + while (PaddingBytes > 0) { + uint8_t Pad = static_cast(LF_PAD0 + PaddingBytes); + cantFail(Writer.writeInteger(Pad)); + --PaddingBytes; + } +} + +SimpleTypeSerializer::SimpleTypeSerializer() : ScratchBuffer(MaxRecordLength) {} + +SimpleTypeSerializer::~SimpleTypeSerializer() {} + +template +ArrayRef SimpleTypeSerializer::serialize(T &Record) { + BinaryStreamWriter Writer(ScratchBuffer, support::little); + TypeRecordMapping Mapping(Writer); + + CVType CVT; + CVT.Type = static_cast(Record.getKind()); + + writeRecordPrefix(Writer, CVT.Type); + + cantFail(Mapping.visitTypeBegin(CVT)); + cantFail(Mapping.visitKnownRecord(CVT, Record)); + cantFail(Mapping.visitTypeEnd(CVT)); + + addPadding(Writer); + + RecordPrefix *Prefix = reinterpret_cast(ScratchBuffer.data()); + + Prefix->RecordKind = CVT.kind(); + Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t); + + return {ScratchBuffer.data(), Writer.getOffset()}; +} + +// Explicitly instantiate the member function for each known type so that we can +// implement this in the cpp file. +#define TYPE_RECORD(EnumName, EnumVal, Name) \ + template ArrayRef llvm::codeview::SimpleTypeSerializer::serialize( \ + Name##Record &Record); +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index e64404be6dc0..df75f52661e1 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/CodeView/SymbolDumper.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index e18a35ca1f38..e7998b8732fe 100644 --- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -15,7 +15,6 @@ #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/lib/DebugInfo/CodeView/TypeHashing.cpp b/lib/DebugInfo/CodeView/TypeHashing.cpp new file mode 100644 index 000000000000..f5b28b2a2070 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -0,0 +1,74 @@ +//===- TypeHashing.cpp -------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" + +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +using namespace llvm::codeview; + +LocallyHashedType DenseMapInfo::Empty{0, {}}; +LocallyHashedType DenseMapInfo::Tombstone{hash_code(-1), {}}; + +static std::array EmptyHash; +static std::array TombstoneHash = { + {0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + +GloballyHashedType DenseMapInfo::Empty{EmptyHash}; +GloballyHashedType DenseMapInfo::Tombstone{TombstoneHash}; + +LocallyHashedType LocallyHashedType::hashType(ArrayRef RecordData) { + return {llvm::hash_value(RecordData), RecordData}; +} + +GloballyHashedType +GloballyHashedType::hashType(ArrayRef RecordData, + ArrayRef PreviousTypes, + ArrayRef PreviousIds) { + SmallVector Refs; + discoverTypeIndices(RecordData, Refs); + SHA1 S; + S.init(); + uint32_t Off = 0; + RecordData = RecordData.drop_front(sizeof(RecordPrefix)); + for (const auto &Ref : Refs) { + // Hash any data that comes before this TiRef. + uint32_t PreLen = Ref.Offset - Off; + ArrayRef PreData = RecordData.slice(Off, PreLen); + S.update(PreData); + auto Prev = (Ref.Kind == TiRefKind::IndexRef) ? PreviousIds : PreviousTypes; + + auto RefData = RecordData.slice(Ref.Offset, Ref.Count * sizeof(TypeIndex)); + // For each type index referenced, add in the previously computed hash + // value of that type. + ArrayRef Indices( + reinterpret_cast(RefData.data()), Ref.Count); + for (TypeIndex TI : Indices) { + ArrayRef BytesToHash; + if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) { + const uint8_t *IndexBytes = reinterpret_cast(&TI); + BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); + } else { + BytesToHash = Prev[TI.toArrayIndex()].Hash; + } + S.update(BytesToHash); + } + + Off = Ref.Offset + Ref.Count * sizeof(TypeIndex); + } + + // Don't forget to add in any trailing bytes. + auto TrailingBytes = RecordData.drop_front(Off); + S.update(TrailingBytes); + + return {S.final()}; +} diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index c23fadc23048..d283e9e6d2f1 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -392,6 +392,9 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, case SymbolKind::S_LOCAL: Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type break; + case SymbolKind::S_REGISTER: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type; + break; case SymbolKind::S_CONSTANT: Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type break; diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 114f6fd2897e..9b8a6053da84 100644 --- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -426,7 +426,8 @@ Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR, OneMethodRecord &Record) { - MapOneMethodRecord Mapper(false); + const bool IsFromOverloadList = (TypeKind == LF_METHODLIST); + MapOneMethodRecord Mapper(IsFromOverloadList); return Mapper(IO, Record); } diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp deleted file mode 100644 index 003c13b4a20d..000000000000 --- a/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ /dev/null @@ -1,389 +0,0 @@ -//===- TypeSerialzier.cpp -------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/TypeSerializer.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include -#include -#include -#include - -using namespace llvm; -using namespace llvm::codeview; - -namespace { - -struct HashedType { - uint64_t Hash; - const uint8_t *Data; - unsigned Size; // FIXME: Go to uint16_t? - TypeIndex Index; -}; - -/// Wrapper around a poitner to a HashedType. Hash and equality operations are -/// based on data in the pointee. -struct HashedTypePtr { - HashedTypePtr() = default; - HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} - - HashedType *Ptr = nullptr; -}; - -} // end anonymous namespace - -namespace llvm { - -template <> struct DenseMapInfo { - static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } - - static inline HashedTypePtr getTombstoneKey() { - return HashedTypePtr(reinterpret_cast(1)); - } - - static unsigned getHashValue(HashedTypePtr Val) { - assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); - return Val.Ptr->Hash; - } - - static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { - HashedType *LHS = LHSP.Ptr; - HashedType *RHS = RHSP.Ptr; - if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) - return LHS == RHS; - if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) - return false; - return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; - } -}; - -} // end namespace llvm - -/// Private implementation so that we don't leak our DenseMap instantiations to -/// users. -class llvm::codeview::TypeHasher { -private: - /// Storage for type record provided by the caller. Records will outlive the - /// hasher object, so they should be allocated here. - BumpPtrAllocator &RecordStorage; - - /// Storage for hash keys. These only need to live as long as the hashing - /// operation. - BumpPtrAllocator KeyStorage; - - /// Hash table. We really want a DenseMap, TypeIndex> here, - /// but DenseMap is inefficient when the keys are long (like type records) - /// because it recomputes the hash value of every key when it grows. This - /// value type stores the hash out of line in KeyStorage, so that table - /// entries are small and easy to rehash. - DenseSet HashedRecords; - -public: - TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} - - void reset() { HashedRecords.clear(); } - - /// Takes the bytes of type record, inserts them into the hash table, saves - /// them, and returns a pointer to an identical stable type record along with - /// its type index in the destination stream. - TypeIndex getOrCreateRecord(ArrayRef &Record, TypeIndex TI); -}; - -TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record, - TypeIndex TI) { - assert(Record.size() < UINT32_MAX && "Record too big"); - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - // Compute the hash up front so we can store it in the key. - HashedType TempHashedType = {hash_value(Record), Record.data(), - unsigned(Record.size()), TI}; - auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); - HashedType *&Hashed = Result.first->Ptr; - - if (Result.second) { - // This was a new type record. We need stable storage for both the key and - // the record. The record should outlive the hashing operation. - Hashed = KeyStorage.Allocate(); - *Hashed = TempHashedType; - - uint8_t *Stable = RecordStorage.Allocate(Record.size()); - memcpy(Stable, Record.data(), Record.size()); - Hashed->Data = Stable; - assert(Hashed->Size == Record.size()); - } - - // Update the caller's copy of Record to point a stable copy. - Record = ArrayRef(Hashed->Data, Hashed->Size); - return Hashed->Index; -} - -TypeIndex TypeSerializer::nextTypeIndex() const { - return TypeIndex::fromArrayIndex(SeenRecords.size()); -} - -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; -} - -MutableArrayRef TypeSerializer::getCurrentSubRecordData() { - assert(isInFieldList()); - return getCurrentRecordData().drop_front(CurrentSegment.length()); -} - -MutableArrayRef TypeSerializer::getCurrentRecordData() { - return MutableArrayRef(RecordBuffer).take_front(Writer.getOffset()); -} - -Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) { - RecordPrefix Prefix; - Prefix.RecordKind = Kind; - Prefix.RecordLen = 0; - if (auto EC = Writer.writeObject(Prefix)) - return EC; - return Error::success(); -} - -Expected> -TypeSerializer::addPadding(MutableArrayRef Record) { - uint32_t Align = Record.size() % 4; - if (Align == 0) - return Record; - - int PaddingBytes = 4 - Align; - int N = PaddingBytes; - while (PaddingBytes > 0) { - uint8_t Pad = static_cast(LF_PAD0 + PaddingBytes); - if (auto EC = Writer.writeInteger(Pad)) - return std::move(EC); - --PaddingBytes; - } - return MutableArrayRef(Record.data(), Record.size() + N); -} - -TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash) - : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), - Stream(RecordBuffer, support::little), Writer(Stream), - Mapping(Writer) { - // RecordBuffer needs to be able to hold enough data so that if we are 1 - // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, - // we won't overflow. - if (Hash) - Hasher = llvm::make_unique(Storage); -} - -TypeSerializer::~TypeSerializer() = default; - -ArrayRef> TypeSerializer::records() const { - return SeenRecords; -} - -void TypeSerializer::reset() { - if (Hasher) - Hasher->reset(); - Writer.setOffset(0); - CurrentSegment = RecordSegment(); - FieldListSegments.clear(); - TypeKind.reset(); - MemberKind.reset(); - SeenRecords.clear(); -} - -TypeIndex TypeSerializer::insertRecordBytes(ArrayRef &Record) { - assert(!TypeKind.hasValue() && "Already in a type mapping!"); - assert(Writer.getOffset() == 0 && "Stream has data already!"); - - if (Hasher) { - TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex()); - if (nextTypeIndex() == ActualTI) - SeenRecords.push_back(Record); - return ActualTI; - } - - TypeIndex NewTI = nextTypeIndex(); - uint8_t *Stable = RecordStorage.Allocate(Record.size()); - memcpy(Stable, Record.data(), Record.size()); - Record = ArrayRef(Stable, Record.size()); - SeenRecords.push_back(Record); - return NewTI; -} - -TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) { - assert(!TypeKind.hasValue() && "Already in a type mapping!"); - assert(Writer.getOffset() == 0 && "Stream has data already!"); - - TypeIndex TI; - ArrayRef OriginalData = Record.OriginalRecord.RecordData; - if (Record.Mappings.empty()) { - // This record did not remap any type indices. Just write it. - return insertRecordBytes(OriginalData); - } - - // At least one type index was remapped. Before we can hash it we have to - // copy the full record bytes, re-write each type index, then hash the copy. - // We do this in temporary storage since only the DenseMap can decide whether - // this record already exists, and if it does we don't want the memory to - // stick around. - RemapStorage.resize(OriginalData.size()); - ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size()); - uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix); - for (const auto &M : Record.Mappings) { - // First 4 bytes of every record are the record prefix, but the mapping - // offset is relative to the content which starts after. - *(TypeIndex *)(ContentBegin + M.first) = M.second; - } - auto RemapRef = makeArrayRef(RemapStorage); - return insertRecordBytes(RemapRef); -} - -Error TypeSerializer::visitTypeBegin(CVType &Record) { - assert(!TypeKind.hasValue() && "Already in a type mapping!"); - assert(Writer.getOffset() == 0 && "Stream has data already!"); - - if (auto EC = writeRecordPrefix(Record.kind())) - return EC; - - TypeKind = Record.kind(); - if (auto EC = Mapping.visitTypeBegin(Record)) - return EC; - - return Error::success(); -} - -Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { - assert(TypeKind.hasValue() && "Not in a type mapping!"); - if (auto EC = Mapping.visitTypeEnd(Record)) - return std::move(EC); - - // Update the record's length and fill out the CVType members to point to - // the stable memory holding the record's data. - auto ThisRecordData = getCurrentRecordData(); - auto ExpectedData = addPadding(ThisRecordData); - if (!ExpectedData) - return ExpectedData.takeError(); - ThisRecordData = *ExpectedData; - - RecordPrefix *Prefix = - reinterpret_cast(ThisRecordData.data()); - Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); - - Record.Type = *TypeKind; - Record.RecordData = ThisRecordData; - - // insertRecordBytes assumes we're not in a mapping, so do this first. - TypeKind.reset(); - Writer.setOffset(0); - - TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData); - - // Write out each additional segment in reverse order, and update each - // record's continuation index to point to the previous one. - for (auto X : reverse(FieldListSegments)) { - auto CIBytes = X.take_back(sizeof(uint32_t)); - support::ulittle32_t *CI = - reinterpret_cast(CIBytes.data()); - assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); - *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytes(X); - } - - FieldListSegments.clear(); - CurrentSegment.SubRecords.clear(); - - return InsertedTypeIndex; -} - -Error TypeSerializer::visitTypeEnd(CVType &Record) { - auto ExpectedIndex = visitTypeEndGetIndex(Record); - if (!ExpectedIndex) - return ExpectedIndex.takeError(); - return Error::success(); -} - -Error TypeSerializer::visitMemberBegin(CVMemberRecord &Record) { - assert(isInFieldList() && "Not in a field list!"); - assert(!MemberKind.hasValue() && "Already in a member record!"); - MemberKind = Record.Kind; - - if (auto EC = Mapping.visitMemberBegin(Record)) - return EC; - - return Error::success(); -} - -Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) { - if (auto EC = Mapping.visitMemberEnd(Record)) - return EC; - - // Check if this subrecord makes the current segment not fit in 64K minus - // the space for a continuation record (8 bytes). If the segment does not - // fit, insert a continuation record. - if (Writer.getOffset() > MaxRecordLength - ContinuationLength) { - MutableArrayRef Data = getCurrentRecordData(); - SubRecord LastSubRecord = CurrentSegment.SubRecords.back(); - uint32_t CopySize = CurrentSegment.length() - LastSubRecord.Size; - auto CopyData = Data.take_front(CopySize); - auto LeftOverData = Data.drop_front(CopySize); - assert(LastSubRecord.Size == LeftOverData.size()); - - // Allocate stable storage for the record and copy the old record plus - // continuation over. - uint16_t LengthWithSize = CopySize + ContinuationLength; - assert(LengthWithSize <= MaxRecordLength); - RecordPrefix *Prefix = reinterpret_cast(CopyData.data()); - Prefix->RecordLen = LengthWithSize - sizeof(uint16_t); - - uint8_t *SegmentBytes = RecordStorage.Allocate(LengthWithSize); - auto SavedSegment = MutableArrayRef(SegmentBytes, LengthWithSize); - MutableBinaryByteStream CS(SavedSegment, support::little); - BinaryStreamWriter CW(CS); - if (auto EC = CW.writeBytes(CopyData)) - return EC; - if (auto EC = CW.writeEnum(TypeLeafKind::LF_INDEX)) - return EC; - if (auto EC = CW.writeInteger(0)) - return EC; - if (auto EC = CW.writeInteger(0xB0C0B0C0)) - return EC; - FieldListSegments.push_back(SavedSegment); - - // Write a new placeholder record prefix to mark the start of this new - // top-level record. - Writer.setOffset(0); - if (auto EC = writeRecordPrefix(TypeLeafKind::LF_FIELDLIST)) - return EC; - - // Then move over the subrecord that overflowed the old segment to the - // beginning of this segment. Note that we have to use memmove here - // instead of Writer.writeBytes(), because the new and old locations - // could overlap. - ::memmove(Stream.data().data() + sizeof(RecordPrefix), LeftOverData.data(), - LeftOverData.size()); - // And point the segment writer at the end of that subrecord. - Writer.setOffset(LeftOverData.size() + sizeof(RecordPrefix)); - - CurrentSegment.SubRecords.clear(); - CurrentSegment.SubRecords.push_back(LastSubRecord); - } - - // Update the CVMemberRecord since we may have shifted around or gotten - // padded. - Record.Data = getCurrentSubRecordData(); - - MemberKind.reset(); - return Error::success(); -} diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index bff3516203a0..f1ebd23c563f 100644 --- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -10,13 +10,12 @@ #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ScopedPrinter.h" using namespace llvm; using namespace llvm::codeview; @@ -64,12 +63,27 @@ class TypeStreamMerger { static const TypeIndex Untranslated; - Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + // Local hashing entry points + Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, + MergingTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes); - Error mergeIdRecords(TypeTableBuilder &Dest, + Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids); - Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types); + Error mergeTypeRecords(MergingTypeTableBuilder &Dest, + const CVTypeArray &Types); + + // Global hashing entry points + Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, + GlobalTypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes, + ArrayRef Hashes); + Error mergeIdRecords(GlobalTypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids, + ArrayRef Hashes); + Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, + ArrayRef Hashes); private: Error doit(const CVTypeArray &Types); @@ -83,6 +97,16 @@ class TypeStreamMerger { bool remapTypeIndex(TypeIndex &Idx); bool remapItemIndex(TypeIndex &Idx); + bool hasTypeStream() const { + return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); + } + + bool hasIdStream() const { + return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); + } + + ArrayRef serializeRemapped(const RemappedType &Record); + bool remapIndices(RemappedType &Record, ArrayRef Refs); bool remapIndex(TypeIndex &Idx, ArrayRef Map); @@ -96,25 +120,23 @@ class TypeStreamMerger { return llvm::make_error(cv_error_code::corrupt_record); } - Error writeRecord(TypeTableBuilder &Dest, const RemappedType &Record, - bool RemapSuccess) { - TypeIndex DestIdx = Untranslated; - if (RemapSuccess) - DestIdx = Dest.writeSerializedRecord(Record); - addMapping(DestIdx); - return Error::success(); - } - Optional LastError; + bool UseGlobalHashes = false; + bool IsSecondPass = false; unsigned NumBadIndices = 0; TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; - TypeTableBuilder *DestIdStream = nullptr; - TypeTableBuilder *DestTypeStream = nullptr; + MergingTypeTableBuilder *DestIdStream = nullptr; + MergingTypeTableBuilder *DestTypeStream = nullptr; + + GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; + GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; + + ArrayRef GlobalHashes; // If we're only mapping id records, this array contains the mapping for // type records. @@ -123,10 +145,35 @@ class TypeStreamMerger { /// Map from source type index to destination type index. Indexed by source /// type index minus 0x1000. SmallVectorImpl &IndexMap; + + /// Temporary storage that we use to copy a record's data while re-writing + /// its type indices. + SmallVector RemapStorage; }; } // end anonymous namespace +ArrayRef +TypeStreamMerger::serializeRemapped(const RemappedType &Record) { + TypeIndex TI; + ArrayRef OriginalData = Record.OriginalRecord.RecordData; + if (Record.Mappings.empty()) + return OriginalData; + + // At least one type index was remapped. We copy the full record bytes, + // re-write each type index, then return that. + RemapStorage.resize(OriginalData.size()); + ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size()); + uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix); + for (const auto &M : Record.Mappings) { + // First 4 bytes of every record are the record prefix, but the mapping + // offset is relative to the content which starts after. + *(TypeIndex *)(ContentBegin + M.first) = M.second; + } + auto RemapRef = makeArrayRef(RemapStorage); + return RemapRef; +} + const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); static bool isIdRecord(TypeLeafKind K) { @@ -191,7 +238,7 @@ bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) { // special mapping from OldTypeStream -> NewTypeStream which was computed // externally. Regardless, we use this special map if and only if we are // doing an id-only mapping. - if (DestTypeStream == nullptr) + if (!hasTypeStream()) return remapIndex(Idx, TypeLookup); assert(TypeLookup.empty()); @@ -199,31 +246,69 @@ bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) { } bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) { - assert(DestIdStream); + assert(hasIdStream()); return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, +// Local hashing entry points +Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, const CVTypeArray &Types) { DestTypeStream = &Dest; + UseGlobalHashes = false; return doit(Types); } -Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, +Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, const CVTypeArray &Ids) { DestIdStream = &Dest; TypeLookup = TypeSourceToDest; + UseGlobalHashes = false; return doit(Ids); } -Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds, - TypeTableBuilder &DestTypes, +Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds, + MergingTypeTableBuilder &DestTypes, const CVTypeArray &IdsAndTypes) { DestIdStream = &DestIds; DestTypeStream = &DestTypes; + UseGlobalHashes = false; + return doit(IdsAndTypes); +} + +// Global hashing entry points +Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest, + const CVTypeArray &Types, + ArrayRef Hashes) { + DestGlobalTypeStream = &Dest; + UseGlobalHashes = true; + GlobalHashes = Hashes; + + return doit(Types); +} + +Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids, + ArrayRef Hashes) { + DestGlobalIdStream = &Dest; + TypeLookup = TypeSourceToDest; + UseGlobalHashes = true; + GlobalHashes = Hashes; + + return doit(Ids); +} + +Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, + GlobalTypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes, + ArrayRef Hashes) { + DestGlobalIdStream = &DestIds; + DestGlobalTypeStream = &DestTypes; + UseGlobalHashes = true; + GlobalHashes = Hashes; return doit(IdsAndTypes); } @@ -261,21 +346,39 @@ Error TypeStreamMerger::doit(const CVTypeArray &Types) { } Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { - for (const CVType &Type : Types) - if (auto EC = remapType(Type)) - return EC; - return Error::success(); + BinaryStreamRef Stream = Types.getUnderlyingStream(); + ArrayRef Buffer; + cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); + + return forEachCodeViewRecord( + Buffer, [this](const CVType &T) { return remapType(T); }); } Error TypeStreamMerger::remapType(const CVType &Type) { - RemappedType R(Type); - SmallVector Refs; - discoverTypeIndices(Type.RecordData, Refs); - bool MappedAllIndices = remapIndices(R, Refs); - TypeTableBuilder &Dest = - isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; - if (auto EC = writeRecord(Dest, R, MappedAllIndices)) - return EC; + auto DoSerialize = [this, Type]() -> ArrayRef { + RemappedType R(Type); + SmallVector Refs; + discoverTypeIndices(Type.RecordData, Refs); + if (!remapIndices(R, Refs)) + return {}; + return serializeRemapped(R); + }; + + TypeIndex DestIdx = Untranslated; + if (UseGlobalHashes) { + GlobalTypeTableBuilder &Dest = + isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; + GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; + DestIdx = Dest.insertRecordAs(H, DoSerialize); + } else { + MergingTypeTableBuilder &Dest = + isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; + + auto Data = DoSerialize(); + if (!Data.empty()) + DestIdx = Dest.insertRecordBytes(Data); + } + addMapping(DestIdx); ++CurIndex; assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && @@ -306,14 +409,14 @@ bool TypeStreamMerger::remapIndices(RemappedType &Record, return Success; } -Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest, +Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, const CVTypeArray &Types) { TypeStreamMerger M(SourceToDest); return M.mergeTypeRecords(Dest, Types); } -Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, +Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef TypeSourceToDest, SmallVectorImpl &SourceToDest, const CVTypeArray &Ids) { @@ -322,8 +425,33 @@ Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, } Error llvm::codeview::mergeTypeAndIdRecords( - TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes) { TypeStreamMerger M(SourceToDest); return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); } + +Error llvm::codeview::mergeTypeAndIdRecords( + GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, const CVTypeArray &IdsAndTypes, + ArrayRef Hashes) { + TypeStreamMerger M(SourceToDest); + return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes); +} + +Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Types, + ArrayRef Hashes) { + TypeStreamMerger M(SourceToDest); + return M.mergeTypeRecords(Dest, Types, Hashes); +} + +Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, + ArrayRef Types, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids, + ArrayRef Hashes) { + TypeStreamMerger M(SourceToDest); + return M.mergeIdRecords(Dest, Types, Ids, Hashes); +} diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp index 456d6f19b237..cf951baa5111 100644 --- a/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -11,8 +11,6 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/RecordName.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" using namespace llvm; diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index f04ec7706cd8..ac30f74f3466 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -11,7 +11,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" @@ -22,12 +21,13 @@ using namespace llvm; -bool DWARFAcceleratorTable::extract() { +llvm::Error AppleAcceleratorTable::extract() { uint32_t Offset = 0; // Check that we can at least read the header. if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength)+4)) - return false; + return make_error("Section too small: cannot read header.", + inconvertibleErrorCode()); Hdr.Magic = AccelSection.getU32(&Offset); Hdr.Version = AccelSection.getU16(&Offset); @@ -38,9 +38,13 @@ bool DWARFAcceleratorTable::extract() { // Check that we can read all the hashes and offsets from the // section (see SourceLevelDebugging.rst for the structure of the index). + // We need to substract one because we're checking for an *offset* which is + // equal to the size for an empty table and hence pointer after the section. if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength + - Hdr.NumBuckets*4 + Hdr.NumHashes*8)) - return false; + Hdr.NumBuckets * 4 + Hdr.NumHashes * 8 - 1)) + return make_error( + "Section too small: cannot read buckets and hashes.", + inconvertibleErrorCode()); HdrData.DIEOffsetBase = AccelSection.getU32(&Offset); uint32_t NumAtoms = AccelSection.getU32(&Offset); @@ -52,23 +56,23 @@ bool DWARFAcceleratorTable::extract() { } IsValid = true; - return true; + return Error::success(); } -uint32_t DWARFAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; } -uint32_t DWARFAcceleratorTable::getNumHashes() { return Hdr.NumHashes; } -uint32_t DWARFAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } -uint32_t DWARFAcceleratorTable::getHeaderDataLength() { +uint32_t AppleAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; } +uint32_t AppleAcceleratorTable::getNumHashes() { return Hdr.NumHashes; } +uint32_t AppleAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } +uint32_t AppleAcceleratorTable::getHeaderDataLength() { return Hdr.HeaderDataLength; } -ArrayRef> -DWARFAcceleratorTable::getAtomsDesc() { +ArrayRef> +AppleAcceleratorTable::getAtomsDesc() { return HdrData.Atoms; } -bool DWARFAcceleratorTable::validateForms() { +bool AppleAcceleratorTable::validateForms() { for (auto Atom : getAtomsDesc()) { DWARFFormValue FormValue(Atom.second); switch (Atom.first) { @@ -79,6 +83,7 @@ bool DWARFAcceleratorTable::validateForms() { !FormValue.isFormClass(DWARFFormValue::FC_Flag)) || FormValue.getForm() == dwarf::DW_FORM_sdata) return false; + break; default: break; } @@ -87,7 +92,7 @@ bool DWARFAcceleratorTable::validateForms() { } std::pair -DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { +AppleAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { uint32_t DieOffset = dwarf::DW_INVALID_OFFSET; dwarf::Tag DieTag = dwarf::DW_TAG_null; DWARFFormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; @@ -109,7 +114,7 @@ DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { return {DieOffset, DieTag}; } -LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { +LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const { if (!IsValid) return; @@ -196,8 +201,8 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { } } -DWARFAcceleratorTable::ValueIterator::ValueIterator( - const DWARFAcceleratorTable &AccelTable, unsigned Offset) +AppleAcceleratorTable::ValueIterator::ValueIterator( + const AppleAcceleratorTable &AccelTable, unsigned Offset) : AccelTable(&AccelTable), DataOffset(Offset) { if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) return; @@ -210,7 +215,7 @@ DWARFAcceleratorTable::ValueIterator::ValueIterator( Next(); } -void DWARFAcceleratorTable::ValueIterator::Next() { +void AppleAcceleratorTable::ValueIterator::Next() { assert(NumData > 0 && "attempted to increment iterator past the end"); auto &AccelSection = AccelTable->AccelSection; if (Data >= NumData || @@ -225,8 +230,8 @@ void DWARFAcceleratorTable::ValueIterator::Next() { ++Data; } -iterator_range -DWARFAcceleratorTable::equal_range(StringRef Key) const { +iterator_range +AppleAcceleratorTable::equal_range(StringRef Key) const { if (!IsValid) return make_range(ValueIterator(), ValueIterator()); diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index 5893f223c074..76be5d7e6e70 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -82,76 +82,108 @@ static void dumpUUID(raw_ostream &OS, const ObjectFile &Obj) { OS << "UUID: "; memcpy(&UUID, LC.Ptr+sizeof(LC.C), sizeof(UUID)); OS.write_uuid(UUID); - OS << ' ' << MachO->getFileFormatName(); + Triple T = MachO->getArchTriple(); + OS << " (" << T.getArchName() << ')'; OS << ' ' << MachO->getFileName() << '\n'; } } } -static void -dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, - const DWARFObject &Obj, - const DWARFSection &StringOffsetsSection, - StringRef StringSection, bool LittleEndian) { +using ContributionCollection = + std::vector>; + +// Collect all the contributions to the string offsets table from all units, +// sort them by their starting offsets and remove duplicates. +static ContributionCollection +collectContributionData(DWARFContext::cu_iterator_range CUs, + DWARFContext::tu_section_iterator_range TUSs) { + ContributionCollection Contributions; + for (const auto &CU : CUs) + Contributions.push_back(CU->getStringOffsetsTableContribution()); + for (const auto &TUS : TUSs) + for (const auto &TU : TUS) + Contributions.push_back(TU->getStringOffsetsTableContribution()); + + // Sort the contributions so that any invalid ones are placed at + // the start of the contributions vector. This way they are reported + // first. + std::sort(Contributions.begin(), Contributions.end(), + [](const Optional &L, + const Optional &R) { + if (L && R) return L->Base < R->Base; + return R.hasValue(); + }); + + // Uniquify contributions, as it is possible that units (specifically + // type units in dwo or dwp files) share contributions. We don't want + // to report them more than once. + Contributions.erase( + std::unique(Contributions.begin(), Contributions.end(), + [](const Optional &L, + const Optional &R) { + if (L && R) + return L->Base == R->Base && L->Size == R->Size; + return false; + }), + Contributions.end()); + return Contributions; +} + +static void dumpDWARFv5StringOffsetsSection( + raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj, + const DWARFSection &StringOffsetsSection, StringRef StringSection, + DWARFContext::cu_iterator_range CUs, + DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian) { + auto Contributions = collectContributionData(CUs, TUSs); DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0); - uint32_t Offset = 0; + DataExtractor StrData(StringSection, LittleEndian, 0); uint64_t SectionSize = StringOffsetsSection.Data.size(); - - while (Offset < SectionSize) { - unsigned Version = 0; - DwarfFormat Format = DWARF32; - unsigned EntrySize = 4; - // Perform validation and extract the segment size from the header. - if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) { + uint32_t Offset = 0; + for (auto &Contribution : Contributions) { + // Report an ill-formed contribution. + if (!Contribution) { OS << "error: invalid contribution to string offsets table in section ." << SectionName << ".\n"; return; } - uint32_t ContributionStart = Offset; - uint64_t ContributionSize = StrOffsetExt.getU32(&Offset); - // A contribution size of 0xffffffff indicates DWARF64, with the actual size - // in the following 8 bytes. Otherwise, the DWARF standard mandates that - // the contribution size must be at most 0xfffffff0. - if (ContributionSize == 0xffffffff) { - if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) { - OS << "error: invalid contribution to string offsets table in section ." - << SectionName << ".\n"; - return; - } - Format = DWARF64; - EntrySize = 8; - ContributionSize = StrOffsetExt.getU64(&Offset); - } else if (ContributionSize > 0xfffffff0) { - OS << "error: invalid contribution to string offsets table in section ." + + dwarf::DwarfFormat Format = Contribution->getFormat(); + uint16_t Version = Contribution->getVersion(); + uint64_t ContributionHeader = Contribution->Base; + // In DWARF v5 there is a contribution header that immediately precedes + // the string offsets base (the location we have previously retrieved from + // the CU DIE's DW_AT_str_offsets attribute). The header is located either + // 8 or 16 bytes before the base, depending on the contribution's format. + if (Version >= 5) + ContributionHeader -= Format == DWARF32 ? 8 : 16; + + // Detect overlapping contributions. + if (Offset > ContributionHeader) { + OS << "error: overlapping contributions to string offsets table in " + "section ." << SectionName << ".\n"; return; } - - // We must ensure that we don't read a partial record at the end, so we - // validate for a multiple of EntrySize. Also, we're expecting a version - // number and padding, which adds an additional 4 bytes. - uint64_t ValidationSize = - 4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize)); - if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) { - OS << "error: contribution to string offsets table in section ." - << SectionName << " has invalid length.\n"; - return; + // Report a gap in the table. + if (Offset < ContributionHeader) { + OS << format("0x%8.8x: Gap, length = ", Offset); + OS << (ContributionHeader - Offset) << "\n"; } - - Version = StrOffsetExt.getU16(&Offset); - Offset += 2; - OS << format("0x%8.8x: ", ContributionStart); - OS << "Contribution size = " << ContributionSize + OS << format("0x%8.8x: ", (uint32_t)ContributionHeader); + OS << "Contribution size = " << Contribution->Size + << ", Format = " << (Format == DWARF32 ? "DWARF32" : "DWARF64") << ", Version = " << Version << "\n"; - uint32_t ContributionBase = Offset; - DataExtractor StrData(StringSection, LittleEndian, 0); - while (Offset - ContributionBase < ContributionSize) { + Offset = Contribution->Base; + unsigned EntrySize = Contribution->getDwarfOffsetByteSize(); + while (Offset - Contribution->Base < Contribution->Size) { OS << format("0x%8.8x: ", Offset); - // FIXME: We can only extract strings in DWARF32 format at the moment. + // FIXME: We can only extract strings if the offset fits in 32 bits. uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); - if (Format == DWARF32) { + // Extract the string if we can and display it. Otherwise just report + // the offset. + if (StringOffset <= std::numeric_limits::max()) { uint32_t StringOffset32 = (uint32_t)StringOffset; OS << format("%8.8x ", StringOffset32); const char *S = StrData.getCStr(&StringOffset32); @@ -162,6 +194,11 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, OS << "\n"; } } + // Report a gap at the end of the table. + if (Offset < SectionSize) { + OS << format("0x%8.8x: Gap, length = ", Offset); + OS << (SectionSize - Offset) << "\n"; + } } // Dump a DWARF string offsets section. This may be a DWARF v5 formatted @@ -170,17 +207,18 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, // a header containing size and version number. Alternatively, it may be a // monolithic series of string offsets, as generated by the pre-DWARF v5 // implementation of split DWARF. -static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, - const DWARFObject &Obj, - const DWARFSection &StringOffsetsSection, - StringRef StringSection, bool LittleEndian, - unsigned MaxVersion) { +static void dumpStringOffsetsSection( + raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj, + const DWARFSection &StringOffsetsSection, StringRef StringSection, + DWARFContext::cu_iterator_range CUs, + DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian, + unsigned MaxVersion) { // If we have at least one (compile or type) unit with DWARF v5 or greater, // we assume that the section is formatted like a DWARF v5 string offsets // section. if (MaxVersion >= 5) dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection, - StringSection, LittleEndian); + StringSection, CUs, TUSs, LittleEndian); else { DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); uint32_t offset = 0; @@ -357,12 +395,16 @@ void DWARFContext::dump( // Verbose dumping is done during parsing and not on the intermediate // representation. OS << "debug_line[" << format("0x%8.8x", Offset) << "]\n"; + unsigned OldOffset = Offset; if (DumpOpts.Verbose) { LineTable.parse(LineData, &Offset, U, &OS); } else { LineTable.parse(LineData, &Offset, U); LineTable.dump(OS); } + // Check for unparseable prologue, to avoid infinite loops. + if (OldOffset == Offset) + break; } } @@ -464,12 +506,14 @@ void DWARFContext::dump( DObj->getStringOffsetSection().Data)) dumpStringOffsetsSection( OS, "debug_str_offsets", *DObj, DObj->getStringOffsetSection(), - DObj->getStringSection(), isLittleEndian(), getMaxVersion()); + DObj->getStringSection(), compile_units(), type_unit_sections(), + isLittleEndian(), getMaxVersion()); if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets, DObj->getStringOffsetDWOSection().Data)) dumpStringOffsetsSection( OS, "debug_str_offsets.dwo", *DObj, DObj->getStringOffsetDWOSection(), - DObj->getStringDWOSection(), isLittleEndian(), getMaxVersion()); + DObj->getStringDWOSection(), dwo_compile_units(), + dwo_type_unit_sections(), isLittleEndian(), getMaxVersion()); if (shouldDump(Explicit, ".gnu_index", DIDT_ID_GdbIndex, DObj->getGdbIndexSection())) { @@ -659,36 +703,37 @@ const DWARFDebugMacro *DWARFContext::getDebugMacro() { return Macro.get(); } -static DWARFAcceleratorTable & -getAccelTable(std::unique_ptr &Cache, +static AppleAcceleratorTable & +getAccelTable(std::unique_ptr &Cache, const DWARFObject &Obj, const DWARFSection &Section, StringRef StringSection, bool IsLittleEndian) { if (Cache) return *Cache; DWARFDataExtractor AccelSection(Obj, Section, IsLittleEndian, 0); DataExtractor StrData(StringSection, IsLittleEndian, 0); - Cache.reset(new DWARFAcceleratorTable(AccelSection, StrData)); - Cache->extract(); + Cache.reset(new AppleAcceleratorTable(AccelSection, StrData)); + if (Error E = Cache->extract()) + llvm::consumeError(std::move(E)); return *Cache; } -const DWARFAcceleratorTable &DWARFContext::getAppleNames() { +const AppleAcceleratorTable &DWARFContext::getAppleNames() { return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(), DObj->getStringSection(), isLittleEndian()); } -const DWARFAcceleratorTable &DWARFContext::getAppleTypes() { +const AppleAcceleratorTable &DWARFContext::getAppleTypes() { return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(), DObj->getStringSection(), isLittleEndian()); } -const DWARFAcceleratorTable &DWARFContext::getAppleNamespaces() { +const AppleAcceleratorTable &DWARFContext::getAppleNamespaces() { return getAccelTable(AppleNamespaces, *DObj, DObj->getAppleNamespacesSection(), DObj->getStringSection(), isLittleEndian()); } -const DWARFAcceleratorTable &DWARFContext::getAppleObjC() { +const AppleAcceleratorTable &DWARFContext::getAppleObjC() { return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(), DObj->getStringSection(), isLittleEndian()); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index ed5d726ae4e2..b9ef6905912a 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -17,6 +17,13 @@ using namespace llvm; +void DWARFDebugArangeSet::Descriptor::dump(raw_ostream &OS, + uint32_t AddressSize) const { + OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, Address) + << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2, + getEndAddress()); +} + void DWARFDebugArangeSet::clear() { Offset = -1U; std::memset(&HeaderData, 0, sizeof(Header)); @@ -98,10 +105,8 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const { << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize); - const uint32_t hex_width = HeaderData.AddrSize * 2; for (const auto &Desc : ArangeDescriptors) { - OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, Desc.Address) - << format(" 0x%*.*" PRIx64 ")\n", - hex_width, hex_width, Desc.getEndAddress()); + Desc.dump(OS, HeaderData.AddrSize); + OS << '\n'; } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 3e7f3c59c30b..7bc6f10e516d 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -12,7 +12,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Format.h" @@ -49,6 +48,7 @@ void DWARFDebugLine::Prologue::clear() { MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; FormParams = DWARFFormParams({0, 0, DWARF32}); + HasMD5 = false; StandardOpcodeLengths.clear(); IncludeDirectories.clear(); FileNames.clear(); @@ -73,21 +73,32 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(I + 1).data(), StandardOpcodeLengths[I]); - if (!IncludeDirectories.empty()) + if (!IncludeDirectories.empty()) { + // DWARF v5 starts directory indexes at 0. + uint32_t DirBase = getVersion() >= 5 ? 0 : 1; for (uint32_t I = 0; I != IncludeDirectories.size(); ++I) - OS << format("include_directories[%3u] = '", I + 1) + OS << format("include_directories[%3u] = '", I + DirBase) << IncludeDirectories[I] << "'\n"; + } if (!FileNames.empty()) { - OS << " Dir Mod Time File Len File Name\n" - << " ---- ---------- ---------- -----------" - "----------------\n"; + if (HasMD5) + OS << " Dir MD5 Checksum File Name\n" + << " ---- -------------------------------- -----------" + "---------------\n"; + else + OS << " Dir Mod Time File Len File Name\n" + << " ---- ---------- ---------- -----------" + "----------------\n"; for (uint32_t I = 0; I != FileNames.size(); ++I) { const FileNameEntry &FileEntry = FileNames[I]; - OS << format("file_names[%3u] %4" PRIu64 " ", I + 1, FileEntry.DirIdx) - << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", FileEntry.ModTime, - FileEntry.Length) - << FileEntry.Name << '\n'; + OS << format("file_names[%3u] %4" PRIu64 " ", I + 1, FileEntry.DirIdx); + if (HasMD5) + OS << FileEntry.Checksum.digest(); + else + OS << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64, FileEntry.ModTime, + FileEntry.Length); + OS << ' ' << FileEntry.Name << '\n'; } } } @@ -123,7 +134,7 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, // ran off the end of the prologue. static ContentDescriptors parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, - uint64_t EndPrologueOffset) { + uint64_t EndPrologueOffset, bool *HasMD5) { ContentDescriptors Descriptors; int FormatCount = DebugLineData.getU8(OffsetPtr); bool HasPath = false; @@ -136,6 +147,8 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); if (Descriptor.Type == dwarf::DW_LNCT_path) HasPath = true; + else if (Descriptor.Type == dwarf::DW_LNCT_MD5 && HasMD5) + *HasMD5 = true; Descriptors.push_back(Descriptor); } return HasPath ? Descriptors : ContentDescriptors(); @@ -145,11 +158,11 @@ static bool parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, uint64_t EndPrologueOffset, const DWARFFormParams &FormParams, const DWARFUnit *U, - std::vector &IncludeDirectories, + bool &HasMD5, std::vector &IncludeDirectories, std::vector &FileNames) { // Get the directory entry description. ContentDescriptors DirDescriptors = - parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, nullptr); if (DirDescriptors.empty()) return false; @@ -175,7 +188,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, // Get the file entry description. ContentDescriptors FileDescriptors = - parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset, &HasMD5); if (FileDescriptors.empty()) return false; @@ -202,7 +215,11 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, case DW_LNCT_size: FileEntry.Length = Value.getAsUnsignedConstant().getValue(); break; - // FIXME: Add MD5 + case DW_LNCT_MD5: + assert(Value.getAsBlock().getValue().size() == 16); + std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16, + FileEntry.Checksum.Bytes.begin()); + break; default: break; } @@ -254,7 +271,7 @@ bool DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, if (getVersion() >= 5) { if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - getFormParams(), U, IncludeDirectories, + FormParams, U, HasMD5, IncludeDirectories, FileNames)) { fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 58f88536f317..02d17b278b47 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -43,8 +43,10 @@ void DWARFDebugLoc::LocationList::dump(raw_ostream &OS, bool IsLittleEndian, for (const Entry &E : Entries) { OS << '\n'; OS.indent(Indent); - OS << format("0x%016" PRIx64, E.Begin) << " - " - << format("0x%016" PRIx64, E.End) << ": "; + OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, + E.Begin) + << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2, E.End); + OS << ": "; dumpExpression(OS, E.Loc, IsLittleEndian, AddressSize, MRI); } diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index f0b7ec2751de..943a740c7ae4 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -17,9 +17,15 @@ using namespace llvm; +void DWARFAddressRange::dump(raw_ostream &OS, uint32_t AddressSize) const { + + OS << format("[0x%*.*" PRIx64 ", ", AddressSize * 2, AddressSize * 2, LowPC) + << format(" 0x%*.*" PRIx64 ")", AddressSize * 2, AddressSize * 2, HighPC); +} + raw_ostream &llvm::operator<<(raw_ostream &OS, const DWARFAddressRange &R) { - return OS << format("[0x%16.16" PRIx64 ", 0x%16.16" PRIx64 ")", R.LowPC, - R.HighPC); + R.dump(OS, /* AddressSize */ 8); + return OS; } void DWARFDebugRangeList::clear() { diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index c4bb2259244b..17559d2fa218 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -62,13 +62,11 @@ static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS, if (DumpOpts.Verbose) SectionNames = Obj.getSectionNames(); - for (size_t I = 0; I < Ranges.size(); ++I) { - const DWARFAddressRange &R = Ranges[I]; + for (const DWARFAddressRange &R : Ranges) { OS << '\n'; OS.indent(Indent); - OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")", AddressSize * 2, - R.LowPC, AddressSize * 2, R.HighPC); + R.dump(OS, AddressSize); if (SectionNames.empty() || R.SectionIndex == -1ULL) continue; @@ -236,12 +234,14 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, OS << *formValue.getAsUnsignedConstant(); else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose && formValue.getAsUnsignedConstant()) { - // Print the actual address rather than the offset. - uint64_t LowPC, HighPC, Index; - if (Die.getLowAndHighPC(LowPC, HighPC, Index)) - OS << format("0x%016" PRIx64, HighPC); - else - formValue.dump(OS, DumpOpts); + if (DumpOpts.ShowAddresses) { + // Print the actual address rather than the offset. + uint64_t LowPC, HighPC, Index; + if (Die.getLowAndHighPC(LowPC, HighPC, Index)) + OS << format("0x%016" PRIx64, HighPC); + else + formValue.dump(OS, DumpOpts); + } } else if (Attr == DW_AT_location || Attr == DW_AT_frame_base || Attr == DW_AT_data_member_location || Attr == DW_AT_GNU_call_site_value) @@ -458,7 +458,8 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, if (debug_info_data.isValidOffset(offset)) { uint32_t abbrCode = debug_info_data.getULEB128(&offset); - WithColor(OS, syntax::Address).get() << format("\n0x%8.8x: ", Offset); + if (DumpOpts.ShowAddresses) + WithColor(OS, syntax::Address).get() << format("\n0x%8.8x: ", Offset); if (abbrCode) { auto AbbrevDecl = getAbbreviationDeclarationPtr(); @@ -488,7 +489,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, } DWARFDie child = getFirstChild(); - if (DumpOpts.RecurseDepth > 0 && child) { + if (DumpOpts.ShowChildren && DumpOpts.RecurseDepth > 0 && child) { DumpOpts.RecurseDepth--; while (child) { child.dump(OS, Indent + 2, DumpOpts); diff --git a/lib/DebugInfo/DWARF/DWARFExpression.cpp b/lib/DebugInfo/DWARF/DWARFExpression.cpp index 16058e461f43..c704c2901aef 100644 --- a/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -9,8 +9,6 @@ #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Format.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index c4abd49797b1..769ac37aa0b6 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -64,8 +64,9 @@ DWARFFormValue::getFixedByteSize(dwarf::Form Form, const DWARFFormParams Params) { switch (Form) { case DW_FORM_addr: - assert(Params.Version && Params.AddrSize && "Invalid Params for form"); - return Params.AddrSize; + if (Params) + return Params.AddrSize; + return None; case DW_FORM_block: // ULEB128 length L followed by L bytes. case DW_FORM_block1: // 1 byte length L followed by L bytes. @@ -86,8 +87,9 @@ DWARFFormValue::getFixedByteSize(dwarf::Form Form, return None; case DW_FORM_ref_addr: - assert(Params.Version && Params.AddrSize && "Invalid Params for form"); - return Params.getRefAddrByteSize(); + if (Params) + return Params.getRefAddrByteSize(); + return None; case DW_FORM_flag: case DW_FORM_data1: @@ -118,8 +120,9 @@ DWARFFormValue::getFixedByteSize(dwarf::Form Form, case DW_FORM_line_strp: case DW_FORM_sec_offset: case DW_FORM_strp_sup: - assert(Params.Version && Params.AddrSize && "Invalid Params for form"); - return Params.getDwarfOffsetByteSize(); + if (Params) + return Params.getDwarfOffsetByteSize(); + return None; case DW_FORM_data8: case DW_FORM_ref8: @@ -186,6 +189,7 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, case DW_FORM_data2: case DW_FORM_data4: case DW_FORM_data8: + case DW_FORM_data16: case DW_FORM_flag: case DW_FORM_ref1: case DW_FORM_ref2: @@ -339,6 +343,11 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_ref_sup8: Value.uval = Data.getU64(OffsetPtr); break; + case DW_FORM_data16: + // Treat this like a 16-byte block. + Value.uval = 16; + IsBlock = true; + break; case DW_FORM_sdata: Value.sval = Data.getSLEB128(OffsetPtr); break; @@ -396,18 +405,19 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { uint64_t UValue = Value.uval; bool CURelativeOffset = false; - + raw_ostream &AddrOS = + DumpOpts.ShowAddresses ? WithColor(OS, syntax::Address).get() : nulls(); switch (Form) { case DW_FORM_addr: - OS << format("0x%016" PRIx64, UValue); + AddrOS << format("0x%016" PRIx64, UValue); break; case DW_FORM_GNU_addr_index: { - OS << format(" indexed (%8.8x) address = ", (uint32_t)UValue); + AddrOS << format(" indexed (%8.8x) address = ", (uint32_t)UValue); uint64_t Address; if (U == nullptr) OS << ""; else if (U->getAddrOffsetSectionItem(UValue, Address)) - OS << format("0x%016" PRIx64, Address); + AddrOS << format("0x%016" PRIx64, Address); else OS << ""; break; @@ -426,9 +436,14 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << format("0x%08x", (uint32_t)UValue); break; case DW_FORM_ref_sig8: + AddrOS << format("0x%016" PRIx64, UValue); + break; case DW_FORM_data8: OS << format("0x%016" PRIx64, UValue); break; + case DW_FORM_data16: + OS << format_bytes(ArrayRef(Value.data, 16), None, 16, 16); + break; case DW_FORM_string: OS << '"'; OS.write_escaped(Value.cstr); @@ -488,38 +503,40 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { case DW_FORM_strx3: case DW_FORM_strx4: case DW_FORM_GNU_str_index: - OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); + if (DumpOpts.Verbose) + OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); break; case DW_FORM_GNU_strp_alt: - OS << format("alt indirect string, offset: 0x%" PRIx64 "", UValue); + if (DumpOpts.Verbose) + OS << format("alt indirect string, offset: 0x%" PRIx64 "", UValue); dumpString(OS); break; case DW_FORM_ref_addr: - OS << format("0x%016" PRIx64, UValue); + AddrOS << format("0x%016" PRIx64, UValue); break; case DW_FORM_ref1: CURelativeOffset = true; - OS << format("cu + 0x%2.2x", (uint8_t)UValue); + AddrOS << format("cu + 0x%2.2x", (uint8_t)UValue); break; case DW_FORM_ref2: CURelativeOffset = true; - OS << format("cu + 0x%4.4x", (uint16_t)UValue); + AddrOS << format("cu + 0x%4.4x", (uint16_t)UValue); break; case DW_FORM_ref4: CURelativeOffset = true; - OS << format("cu + 0x%4.4x", (uint32_t)UValue); + AddrOS << format("cu + 0x%4.4x", (uint32_t)UValue); break; case DW_FORM_ref8: CURelativeOffset = true; - OS << format("cu + 0x%8.8" PRIx64, UValue); + AddrOS << format("cu + 0x%8.8" PRIx64, UValue); break; case DW_FORM_ref_udata: CURelativeOffset = true; - OS << format("cu + 0x%" PRIx64, UValue); + AddrOS << format("cu + 0x%" PRIx64, UValue); break; case DW_FORM_GNU_ref_alt: - OS << format("", UValue); + AddrOS << format("", UValue); break; // All DW_FORM_indirect attributes should be resolved prior to calling @@ -530,7 +547,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { // Should be formatted to 64-bit for DWARF64. case DW_FORM_sec_offset: - OS << format("0x%08x", (uint32_t)UValue); + AddrOS << format("0x%08x", (uint32_t)UValue); break; default: @@ -646,7 +663,8 @@ Optional DWARFFormValue::getAsSignedConstant() const { } Optional> DWARFFormValue::getAsBlock() const { - if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc)) + if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc) && + Form != DW_FORM_data16) return None; return makeArrayRef(Value.data, Value.uval); } diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index c3d8ff2cbc29..df55d7debf92 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFUnit.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" @@ -79,8 +80,10 @@ bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const { - unsigned ItemSize = getDwarfOffsetByteSize(); - uint32_t Offset = StringOffsetSectionBase + Index * ItemSize; + if (!StringOffsetsTableContribution) + return false; + unsigned ItemSize = getDwarfStringOffsetsByteSize(); + uint32_t Offset = getStringOffsetsBase() + Index * ItemSize; if (StringOffsetSection.Data.size() < Offset + ItemSize) return false; DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, @@ -251,15 +254,28 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); } - // In general, we derive the offset of the unit's contibution to the - // debug_str_offsets{.dwo} section from the unit DIE's - // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset - // we get from the index table. - StringOffsetSectionBase = - toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); + // In general, in DWARF v5 and beyond we derive the start of the unit's + // contribution to the string offsets table from the unit DIE's + // DW_AT_str_offsets_base attribute. Split DWARF units do not use this + // attribute, so we assume that there is a contribution to the string + // offsets table starting at offset 0 of the debug_str_offsets.dwo section. + // In both cases we need to determine the format of the contribution, + // which may differ from the unit's format. + uint64_t StringOffsetsContributionBase = + isDWO ? 0 : toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); if (IndexEntry) if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) - StringOffsetSectionBase += C->Offset; + StringOffsetsContributionBase += C->Offset; + + DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection, + isLittleEndian, 0); + if (isDWO) + StringOffsetsTableContribution = + determineStringOffsetsTableContributionDWO( + DA, StringOffsetsContributionBase); + else if (getVersion() >= 5) + StringOffsetsTableContribution = determineStringOffsetsTableContribution( + DA, StringOffsetsContributionBase); // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for // skeleton CU DIE, so that DWARF users not aware of it are not broken. @@ -344,45 +360,378 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { clearDIEs(true); } -void DWARFUnit::updateAddressDieMap(DWARFDie Die) { - if (Die.isSubroutineDIE()) { +// Populates a map from PC addresses to subprogram DIEs. +// +// This routine tries to look at the smallest amount of the debug info it can +// to locate the DIEs. This is because many subprograms will never end up being +// read or needed at all. We want to be as lazy as possible. +void DWARFUnit::buildSubprogramDIEAddrMap() { + assert(SubprogramDIEAddrMap.empty() && "Must only build this map once!"); + SmallVector Worklist; + Worklist.push_back(getUnitDIE()); + do { + DWARFDie Die = Worklist.pop_back_val(); + + // Queue up child DIEs to recurse through. + // FIXME: This causes us to read a lot more debug info than we really need. + // We should look at pruning out DIEs which cannot transitively hold + // separate subprograms. + for (DWARFDie Child : Die.children()) + Worklist.push_back(Child); + + // If handling a non-subprogram DIE, nothing else to do. + if (!Die.isSubprogramDIE()) + continue; + + // For subprogram DIEs, store them, and insert relevant markers into the + // address map. We don't care about overlap at all here as DWARF doesn't + // meaningfully support that, so we simply will insert a range with no DIE + // starting from the high PC. In the event there are overlaps, sorting + // these may truncate things in surprising ways but still will allow + // lookups to proceed. + int DIEIndex = SubprogramDIEAddrInfos.size(); + SubprogramDIEAddrInfos.push_back({Die, (uint64_t)-1, {}}); for (const auto &R : Die.getAddressRanges()) { // Ignore 0-sized ranges. if (R.LowPC == R.HighPC) continue; - auto B = AddrDieMap.upper_bound(R.LowPC); - if (B != AddrDieMap.begin() && R.LowPC < (--B)->second.first) { - // The range is a sub-range of existing ranges, we need to split the - // existing range. - if (R.HighPC < B->second.first) - AddrDieMap[R.HighPC] = B->second; - if (R.LowPC > B->first) - AddrDieMap[B->first].first = R.LowPC; + + SubprogramDIEAddrMap.push_back({R.LowPC, DIEIndex}); + SubprogramDIEAddrMap.push_back({R.HighPC, -1}); + + if (R.LowPC < SubprogramDIEAddrInfos.back().SubprogramBasePC) + SubprogramDIEAddrInfos.back().SubprogramBasePC = R.LowPC; + } + } while (!Worklist.empty()); + + if (SubprogramDIEAddrMap.empty()) { + // If we found no ranges, create a no-op map so that lookups remain simple + // but never find anything. + SubprogramDIEAddrMap.push_back({0, -1}); + return; + } + + // Next, sort the ranges and remove both exact duplicates and runs with the + // same DIE index. We order the ranges so that non-empty ranges are + // preferred. Because there may be ties, we also need to use stable sort. + std::stable_sort(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + if (LHS.first < RHS.first) + return true; + if (LHS.first > RHS.first) + return false; + + // For ranges that start at the same address, keep the one + // with a DIE. + if (LHS.second != -1 && RHS.second == -1) + return true; + + return false; + }); + SubprogramDIEAddrMap.erase( + std::unique(SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + // If the start addresses are exactly the same, we can + // remove all but the first one as it is the only one that + // will be found and used. + // + // If the DIE indices are the same, we can "merge" the + // ranges by eliminating the second. + return LHS.first == RHS.first || LHS.second == RHS.second; + }), + SubprogramDIEAddrMap.end()); + + assert(SubprogramDIEAddrMap.back().second == -1 && + "The last interval must not have a DIE as each DIE's address range is " + "bounded."); +} + +// Build the second level of mapping from PC to DIE, specifically one that maps +// a PC *within* a particular DWARF subprogram into a precise, maximally nested +// inlined subroutine DIE (if any exists). We build a separate map for each +// subprogram because many subprograms will never get queried for an address +// and this allows us to be significantly lazier in reading the DWARF itself. +void DWARFUnit::buildInlinedSubroutineDIEAddrMap( + SubprogramDIEAddrInfo &SPInfo) { + auto &AddrMap = SPInfo.InlinedSubroutineDIEAddrMap; + uint64_t BasePC = SPInfo.SubprogramBasePC; + + auto SubroutineAddrMapSorter = [](const std::pair &LHS, + const std::pair &RHS) { + if (LHS.first < RHS.first) + return true; + if (LHS.first > RHS.first) + return false; + + // For ranges that start at the same address, keep the + // non-empty one. + if (LHS.second != -1 && RHS.second == -1) + return true; + + return false; + }; + auto SubroutineAddrMapUniquer = [](const std::pair &LHS, + const std::pair &RHS) { + // If the start addresses are exactly the same, we can + // remove all but the first one as it is the only one that + // will be found and used. + // + // If the DIE indices are the same, we can "merge" the + // ranges by eliminating the second. + return LHS.first == RHS.first || LHS.second == RHS.second; + }; + + struct DieAndParentIntervalRange { + DWARFDie Die; + int ParentIntervalsBeginIdx, ParentIntervalsEndIdx; + }; + + SmallVector Worklist; + auto EnqueueChildDIEs = [&](const DWARFDie &Die, int ParentIntervalsBeginIdx, + int ParentIntervalsEndIdx) { + for (DWARFDie Child : Die.children()) + Worklist.push_back( + {Child, ParentIntervalsBeginIdx, ParentIntervalsEndIdx}); + }; + EnqueueChildDIEs(SPInfo.SubprogramDIE, 0, 0); + while (!Worklist.empty()) { + DWARFDie Die = Worklist.back().Die; + int ParentIntervalsBeginIdx = Worklist.back().ParentIntervalsBeginIdx; + int ParentIntervalsEndIdx = Worklist.back().ParentIntervalsEndIdx; + Worklist.pop_back(); + + // If we encounter a nested subprogram, simply ignore it. We map to + // (disjoint) subprograms before arriving here and we don't want to examine + // any inlined subroutines of an unrelated subpragram. + if (Die.getTag() == DW_TAG_subprogram) + continue; + + // For non-subroutines, just recurse to keep searching for inlined + // subroutines. + if (Die.getTag() != DW_TAG_inlined_subroutine) { + EnqueueChildDIEs(Die, ParentIntervalsBeginIdx, ParentIntervalsEndIdx); + continue; + } + + // Capture the inlined subroutine DIE that we will reference from the map. + int DIEIndex = InlinedSubroutineDIEs.size(); + InlinedSubroutineDIEs.push_back(Die); + + int DieIntervalsBeginIdx = AddrMap.size(); + // First collect the PC ranges for this DIE into our subroutine interval + // map. + for (auto R : Die.getAddressRanges()) { + // Clamp the PCs to be above the base. + R.LowPC = std::max(R.LowPC, BasePC); + R.HighPC = std::max(R.HighPC, BasePC); + // Compute relative PCs from the subprogram base and drop down to an + // unsigned 32-bit int to represent them within the data structure. This + // lets us cover a 4gb single subprogram. Because subprograms may be + // partitioned into distant parts of a binary (think hot/cold + // partitioning) we want to preserve as much as we can here without + // burning extra memory. Past that, we will simply truncate and lose the + // ability to map those PCs to a DIE more precise than the subprogram. + const uint32_t MaxRelativePC = std::numeric_limits::max(); + uint32_t RelativeLowPC = (R.LowPC - BasePC) > (uint64_t)MaxRelativePC + ? MaxRelativePC + : (uint32_t)(R.LowPC - BasePC); + uint32_t RelativeHighPC = (R.HighPC - BasePC) > (uint64_t)MaxRelativePC + ? MaxRelativePC + : (uint32_t)(R.HighPC - BasePC); + // Ignore empty or bogus ranges. + if (RelativeLowPC >= RelativeHighPC) + continue; + AddrMap.push_back({RelativeLowPC, DIEIndex}); + AddrMap.push_back({RelativeHighPC, -1}); + } + + // If there are no address ranges, there is nothing to do to map into them + // and there cannot be any child subroutine DIEs with address ranges of + // interest as those would all be required to nest within this DIE's + // non-existent ranges, so we can immediately continue to the next DIE in + // the worklist. + if (DieIntervalsBeginIdx == (int)AddrMap.size()) + continue; + + // The PCs from this DIE should never overlap, so we can easily sort them + // here. + std::sort(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(), + SubroutineAddrMapSorter); + // Remove any dead ranges. These should only come from "empty" ranges that + // were clobbered by some other range. + AddrMap.erase(std::unique(AddrMap.begin() + DieIntervalsBeginIdx, + AddrMap.end(), SubroutineAddrMapUniquer), + AddrMap.end()); + + // Compute the end index of this DIE's addr map intervals. + int DieIntervalsEndIdx = AddrMap.size(); + + assert(DieIntervalsBeginIdx != DieIntervalsEndIdx && + "Must not have an empty map for this layer!"); + assert(AddrMap.back().second == -1 && "Must end with an empty range!"); + assert(std::is_sorted(AddrMap.begin() + DieIntervalsBeginIdx, AddrMap.end(), + less_first()) && + "Failed to sort this DIE's interals!"); + + // If we have any parent intervals, walk the newly added ranges and find + // the parent ranges they were inserted into. Both of these are sorted and + // neither has any overlaps. We need to append new ranges to split up any + // parent ranges these new ranges would overlap when we merge them. + if (ParentIntervalsBeginIdx != ParentIntervalsEndIdx) { + int ParentIntervalIdx = ParentIntervalsBeginIdx; + for (int i = DieIntervalsBeginIdx, e = DieIntervalsEndIdx - 1; i < e; + ++i) { + const uint32_t IntervalStart = AddrMap[i].first; + const uint32_t IntervalEnd = AddrMap[i + 1].first; + const int IntervalDieIdx = AddrMap[i].second; + if (IntervalDieIdx == -1) { + // For empty intervals, nothing is required. This is a bit surprising + // however. If the prior interval overlaps a parent interval and this + // would be necessary to mark the end, we will synthesize a new end + // that switches back to the parent DIE below. And this interval will + // get dropped in favor of one with a DIE attached. However, we'll + // still include this and so worst-case, it will still end the prior + // interval. + continue; + } + + // We are walking the new ranges in order, so search forward from the + // last point for a parent range that might overlap. + auto ParentIntervalsRange = + make_range(AddrMap.begin() + ParentIntervalIdx, + AddrMap.begin() + ParentIntervalsEndIdx); + assert(std::is_sorted(ParentIntervalsRange.begin(), + ParentIntervalsRange.end(), less_first()) && + "Unsorted parent intervals can't be searched!"); + auto PI = std::upper_bound( + ParentIntervalsRange.begin(), ParentIntervalsRange.end(), + IntervalStart, + [](uint32_t LHS, const std::pair &RHS) { + return LHS < RHS.first; + }); + if (PI == ParentIntervalsRange.begin() || + PI == ParentIntervalsRange.end()) + continue; + + ParentIntervalIdx = PI - AddrMap.begin(); + int32_t &ParentIntervalDieIdx = std::prev(PI)->second; + uint32_t &ParentIntervalStart = std::prev(PI)->first; + const uint32_t ParentIntervalEnd = PI->first; + + // If the new range starts exactly at the position of the parent range, + // we need to adjust the parent range. Note that these collisions can + // only happen with the original parent range because we will merge any + // adjacent ranges in the child. + if (IntervalStart == ParentIntervalStart) { + // If there will be a tail, just shift the start of the parent + // forward. Note that this cannot change the parent ordering. + if (IntervalEnd < ParentIntervalEnd) { + ParentIntervalStart = IntervalEnd; + continue; + } + // Otherwise, mark this as becoming empty so we'll remove it and + // prefer the child range. + ParentIntervalDieIdx = -1; + continue; + } + + // Finally, if the parent interval will need to remain as a prefix to + // this one, insert a new interval to cover any tail. + if (IntervalEnd < ParentIntervalEnd) + AddrMap.push_back({IntervalEnd, ParentIntervalDieIdx}); } - AddrDieMap[R.LowPC] = std::make_pair(R.HighPC, Die); } + + // Note that we don't need to re-sort even this DIE's address map intervals + // after this. All of the newly added intervals actually fill in *gaps* in + // this DIE's address map, and we know that children won't need to lookup + // into those gaps. + + // Recurse through its children, giving them the interval map range of this + // DIE to use as their parent intervals. + EnqueueChildDIEs(Die, DieIntervalsBeginIdx, DieIntervalsEndIdx); + } + + if (AddrMap.empty()) { + AddrMap.push_back({0, -1}); + return; } - // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to - // simplify the logic to update AddrDieMap. The child's range will always - // be equal or smaller than the parent's range. With this assumption, when - // adding one range into the map, it will at most split a range into 3 - // sub-ranges. - for (DWARFDie Child = Die.getFirstChild(); Child; Child = Child.getSibling()) - updateAddressDieMap(Child); + + // Now that we've added all of the intervals needed, we need to resort and + // unique them. Most notably, this will remove all the empty ranges that had + // a parent range covering, etc. We only expect a single non-empty interval + // at any given start point, so we just use std::sort. This could potentially + // produce non-deterministic maps for invalid DWARF. + std::sort(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapSorter); + AddrMap.erase( + std::unique(AddrMap.begin(), AddrMap.end(), SubroutineAddrMapUniquer), + AddrMap.end()); } DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { extractDIEsIfNeeded(false); - if (AddrDieMap.empty()) - updateAddressDieMap(getUnitDIE()); - auto R = AddrDieMap.upper_bound(Address); - if (R == AddrDieMap.begin()) + + // We use a two-level mapping structure to locate subroutines for a given PC + // address. + // + // First, we map the address to a subprogram. This can be done more cheaply + // because subprograms cannot nest within each other. It also allows us to + // avoid detailed examination of many subprograms, instead only focusing on + // the ones which we end up actively querying. + if (SubprogramDIEAddrMap.empty()) + buildSubprogramDIEAddrMap(); + + assert(!SubprogramDIEAddrMap.empty() && + "We must always end up with a non-empty map!"); + + auto I = std::upper_bound( + SubprogramDIEAddrMap.begin(), SubprogramDIEAddrMap.end(), Address, + [](uint64_t LHS, const std::pair &RHS) { + return LHS < RHS.first; + }); + // If we find the beginning, then the address is before the first subprogram. + if (I == SubprogramDIEAddrMap.begin()) return DWARFDie(); - // upper_bound's previous item contains Address. - --R; - if (Address >= R->second.first) + // Back up to the interval containing the address and see if it + // has a DIE associated with it. + --I; + if (I->second == -1) return DWARFDie(); - return R->second.second; + + auto &SPInfo = SubprogramDIEAddrInfos[I->second]; + + // Now that we have the subprogram for this address, we do the second level + // mapping by building a map within a subprogram's PC range to any specific + // inlined subroutine. + if (SPInfo.InlinedSubroutineDIEAddrMap.empty()) + buildInlinedSubroutineDIEAddrMap(SPInfo); + + // We lookup within the inlined subroutine using a subprogram-relative + // address. + assert(Address >= SPInfo.SubprogramBasePC && + "Address isn't above the start of the subprogram!"); + uint32_t RelativeAddr = ((Address - SPInfo.SubprogramBasePC) > + (uint64_t)std::numeric_limits::max()) + ? std::numeric_limits::max() + : (uint32_t)(Address - SPInfo.SubprogramBasePC); + + auto J = + std::upper_bound(SPInfo.InlinedSubroutineDIEAddrMap.begin(), + SPInfo.InlinedSubroutineDIEAddrMap.end(), RelativeAddr, + [](uint32_t LHS, const std::pair &RHS) { + return LHS < RHS.first; + }); + // If we find the beginning, the address is before any inlined subroutine so + // return the subprogram DIE. + if (J == SPInfo.InlinedSubroutineDIEAddrMap.begin()) + return SPInfo.SubprogramDIE; + // Back up `J` and return the inlined subroutine if we have one or the + // subprogram if we don't. + --J; + return J->second == -1 ? SPInfo.SubprogramDIE + : InlinedSubroutineDIEs[J->second]; } void @@ -466,3 +815,89 @@ const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); return Abbrevs; } + +Optional +StrOffsetsContributionDescriptor::validateContributionSize( + DWARFDataExtractor &DA) { + uint8_t EntrySize = getDwarfOffsetByteSize(); + // In order to ensure that we don't read a partial record at the end of + // the section we validate for a multiple of the entry size. + uint64_t ValidationSize = alignTo(Size, EntrySize); + // Guard against overflow. + if (ValidationSize >= Size) + if (DA.isValidOffsetForDataOfSize((uint32_t)Base, ValidationSize)) + return *this; + return Optional(); +} + +// Look for a DWARF64-formatted contribution to the string offsets table +// starting at a given offset and record it in a descriptor. +static Optional +parseDWARF64StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { + if (!DA.isValidOffsetForDataOfSize(Offset, 16)) + return Optional(); + + if (DA.getU32(&Offset) != 0xffffffff) + return Optional(); + + uint64_t Size = DA.getU64(&Offset); + uint8_t Version = DA.getU16(&Offset); + (void)DA.getU16(&Offset); // padding + return StrOffsetsContributionDescriptor(Offset, Size, Version, DWARF64); + //return Optional(Descriptor); +} + +// Look for a DWARF32-formatted contribution to the string offsets table +// starting at a given offset and record it in a descriptor. +static Optional +parseDWARF32StringOffsetsTableHeader(DWARFDataExtractor &DA, uint32_t Offset) { + if (!DA.isValidOffsetForDataOfSize(Offset, 8)) + return Optional(); + uint32_t ContributionSize = DA.getU32(&Offset); + if (ContributionSize >= 0xfffffff0) + return Optional(); + uint8_t Version = DA.getU16(&Offset); + (void)DA.getU16(&Offset); // padding + return StrOffsetsContributionDescriptor(Offset, ContributionSize, Version, DWARF32); + //return Optional(Descriptor); +} + +Optional +DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA, + uint64_t Offset) { + Optional Descriptor; + // Attempt to find a DWARF64 contribution 16 bytes before the base. + if (Offset >= 16) + Descriptor = + parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset - 16); + // Try to find a DWARF32 contribution 8 bytes before the base. + if (!Descriptor && Offset >= 8) + Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset - 8); + return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor; +} + +Optional +DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA, + uint64_t Offset) { + if (getVersion() >= 5) { + // Look for a valid contribution at the given offset. + auto Descriptor = + parseDWARF64StringOffsetsTableHeader(DA, (uint32_t)Offset); + if (!Descriptor) + Descriptor = parseDWARF32StringOffsetsTableHeader(DA, (uint32_t)Offset); + return Descriptor ? Descriptor->validateContributionSize(DA) : Descriptor; + } + // Prior to DWARF v5, we derive the contribution size from the + // index table (in a package file). In a .dwo file it is simply + // the length of the string offsets section. + uint64_t Size = 0; + if (!IndexEntry) + Size = StringOffsetSection.Data.size(); + else if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) + Size = C->Length; + // Return a descriptor with the given offset as base, version 4 and + // DWARF32 format. + //return Optional( + //StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32)); + return StrOffsetsContributionDescriptor(Offset, Size, 4, DWARF32); +} diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 8e07bb3c462d..da3226ed0a2f 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -669,13 +669,13 @@ bool DWARFVerifier::handleDebugLine() { return NumDebugLineErrors == 0; } -unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, - DataExtractor *StrData, - const char *SectionName) { +unsigned DWARFVerifier::verifyAppleAccelTable(const DWARFSection *AccelSection, + DataExtractor *StrData, + const char *SectionName) { unsigned NumErrors = 0; DWARFDataExtractor AccelSectionData(DCtx.getDWARFObj(), *AccelSection, DCtx.isLittleEndian(), 0); - DWARFAcceleratorTable AccelTable(AccelSectionData, *StrData); + AppleAcceleratorTable AccelTable(AccelSectionData, *StrData); OS << "Verifying " << SectionName << "...\n"; @@ -686,8 +686,8 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, } // Verify that the section is not too short. - if (!AccelTable.extract()) { - error() << "Section is smaller than size described in section header.\n"; + if (Error E = AccelTable.extract()) { + error() << toString(std::move(E)) << '\n'; return 1; } @@ -779,16 +779,16 @@ bool DWARFVerifier::handleAccelTables() { unsigned NumErrors = 0; if (!D.getAppleNamesSection().Data.empty()) NumErrors += - verifyAccelTable(&D.getAppleNamesSection(), &StrData, ".apple_names"); + verifyAppleAccelTable(&D.getAppleNamesSection(), &StrData, ".apple_names"); if (!D.getAppleTypesSection().Data.empty()) NumErrors += - verifyAccelTable(&D.getAppleTypesSection(), &StrData, ".apple_types"); + verifyAppleAccelTable(&D.getAppleTypesSection(), &StrData, ".apple_types"); if (!D.getAppleNamespacesSection().Data.empty()) - NumErrors += verifyAccelTable(&D.getAppleNamespacesSection(), &StrData, + NumErrors += verifyAppleAccelTable(&D.getAppleNamespacesSection(), &StrData, ".apple_namespaces"); if (!D.getAppleObjCSection().Data.empty()) NumErrors += - verifyAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc"); + verifyAppleAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc"); return NumErrors == 0; } diff --git a/lib/DebugInfo/MSF/MSFCommon.cpp b/lib/DebugInfo/MSF/MSFCommon.cpp index d7e1dcf31a3a..d398304375ac 100644 --- a/lib/DebugInfo/MSF/MSFCommon.cpp +++ b/lib/DebugInfo/MSF/MSFCommon.cpp @@ -64,15 +64,13 @@ MSFStreamLayout llvm::msf::getFpmStreamLayout(const MSFLayout &Msf, bool IncludeUnusedFpmData, bool AltFpm) { MSFStreamLayout FL; - uint32_t NumFpmIntervals = getNumFpmIntervals(Msf, IncludeUnusedFpmData); - support::ulittle32_t FpmBlock = Msf.SB->FreeBlockMapBlock; - assert(FpmBlock == 1 || FpmBlock == 2); - if (AltFpm) { - // If they requested the alternate FPM, then 2 becomes 1 and 1 becomes 2. - FpmBlock = 3U - FpmBlock; - } + uint32_t NumFpmIntervals = + getNumFpmIntervals(Msf, IncludeUnusedFpmData, AltFpm); + + uint32_t FpmBlock = AltFpm ? Msf.alternateFpmBlock() : Msf.mainFpmBlock(); + for (uint32_t I = 0; I < NumFpmIntervals; ++I) { - FL.Blocks.push_back(FpmBlock); + FL.Blocks.push_back(support::ulittle32_t(FpmBlock)); FpmBlock += msf::getFpmIntervalLength(Msf); } diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 0eeac7e4c084..04e6664c68db 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -12,7 +12,6 @@ #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" -#include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp index 829879060c33..17c9392a9dd5 100644 --- a/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -10,12 +10,10 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; diff --git a/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp index 60416f69e137..4644ddcf24e3 100644 --- a/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" -#include "llvm/DebugInfo/PDB/Native/NativeSession.h" namespace llvm { namespace pdb { diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index b29d589eaa91..e8b06065fc60 100644 --- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -13,9 +13,7 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" -#include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" namespace llvm { namespace pdb { diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index acd45f7a6219..f1c10357132b 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -10,7 +10,6 @@ #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index 90acfadd311f..ece3e00b1a87 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -10,9 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index a3a44ceddca9..f6466eb80464 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -26,9 +26,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/lib/DebugInfo/PDB/Native/SymbolStream.cpp b/lib/DebugInfo/PDB/Native/SymbolStream.cpp index 5da1cd54192a..2d8d04ceca4d 100644 --- a/lib/DebugInfo/PDB/Native/SymbolStream.cpp +++ b/lib/DebugInfo/PDB/Native/SymbolStream.cpp @@ -12,9 +12,6 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/PDBFile.h" -#include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 9e943c7f114d..8dd30018028e 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -17,7 +17,6 @@ #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamArray.h" diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp index 147e2f7abfe7..ee752cda346e 100644 --- a/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/lib/DebugInfo/PDB/PDBExtras.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBExtras.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/CodeView/Formatters.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 5a5cb4c1b5ca..c8c44d97e2f7 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -15,7 +15,6 @@ #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp index a8054a42d866..ba40f65ef40f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp @@ -9,7 +9,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeArray.h" -#include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp index 2addea072c88..f9c3067c20bf 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp @@ -9,10 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" -#include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" -#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 0304c6286c8f..8fd3b49155c9 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" #include @@ -84,3 +85,21 @@ void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const { void PDBSymbolTypeFunctionSig::dumpRight(PDBSymDumper &Dumper) const { Dumper.dumpRight(*this); } + +bool PDBSymbolTypeFunctionSig::isCVarArgs() const { + auto SigArguments = getArguments(); + if (!SigArguments) + return false; + uint32_t NumArgs = SigArguments->getChildCount(); + if (NumArgs == 0) + return false; + auto Last = SigArguments->getChildAtIndex(NumArgs - 1); + if (auto Builtin = llvm::dyn_cast_or_null(Last.get())) { + if (Builtin->getBuiltinType() == PDB_BuiltinType::None) + return true; + } + + // Note that for a variadic template signature, this method always returns + // false since the parameters of the template are specialized. + return false; +} diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp index 15dc15352165..715ae15e1a7a 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp @@ -17,7 +17,6 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" -#include "llvm/DebugInfo/PDB/UDTLayout.h" #include diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 7aa55e755d2c..e997ef5b6069 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -22,7 +22,6 @@ #include "llvm/DebugInfo/PDB/PDB.h" #include "llvm/DebugInfo/PDB/PDBContext.h" #include "llvm/Object/COFF.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Support/Casting.h" diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp index 34f4017d9828..9c2258f5b933 100644 --- a/lib/Demangle/ItaniumDemangle.cpp +++ b/lib/Demangle/ItaniumDemangle.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" +#include "llvm/Support/Compiler.h" // This file exports a single function: llvm::itanium_demangle. // It also has no dependencies on the rest of llvm. It is implemented this way @@ -1947,7 +1948,7 @@ static const char *parse_type(const char *first, const char *last, C &db) { break; } } - // falls through + LLVM_FALLTHROUGH; default: // must check for builtin-types before class-enum-types to avoid // ambiguities with operator-names diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index c59885753a8f..e4efc15f2ae3 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -49,14 +49,13 @@ STATISTIC(NumGlobals , "Number of global vars initialized"); ExecutionEngine *(*ExecutionEngine::MCJITCtor)( std::unique_ptr M, std::string *ErrorStr, std::shared_ptr MemMgr, - - std::shared_ptr Resolver, + std::shared_ptr Resolver, std::unique_ptr TM) = nullptr; ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)( - std::string *ErrorStr, std::shared_ptr MemMgr, - std::shared_ptr Resolver, - std::unique_ptr TM) = nullptr; + std::string *ErrorStr, std::shared_ptr MemMgr, + std::shared_ptr Resolver, + std::unique_ptr TM) = nullptr; ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr M, std::string *ErrorStr) =nullptr; @@ -502,9 +501,9 @@ EngineBuilder::setMemoryManager(std::unique_ptr MM) { return *this; } -EngineBuilder& -EngineBuilder::setSymbolResolver(std::unique_ptr SR) { - Resolver = std::shared_ptr(std::move(SR)); +EngineBuilder & +EngineBuilder::setSymbolResolver(std::unique_ptr SR) { + Resolver = std::shared_ptr(std::move(SR)); return *this; } @@ -532,7 +531,6 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. if ((WhichEngine & EngineKind::JIT) && TheTM) { - Triple TT(M->getTargetTriple()); if (!TM->getTarget().hasJIT()) { errs() << "WARNING: This target JIT is not designed for the host" << " you are running. If bad things happen, please choose" diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 1164d60ffc10..438e656b60f0 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DynamicLibrary.h" @@ -40,11 +39,10 @@ static struct RegisterJIT { extern "C" void LLVMLinkInMCJIT() { } -ExecutionEngine* -MCJIT::createJIT(std::unique_ptr M, - std::string *ErrorStr, +ExecutionEngine * +MCJIT::createJIT(std::unique_ptr M, std::string *ErrorStr, std::shared_ptr MemMgr, - std::shared_ptr Resolver, + std::shared_ptr Resolver, std::unique_ptr TM) { // Try to register the program as a source of symbols to resolve against. // @@ -65,7 +63,7 @@ MCJIT::createJIT(std::unique_ptr M, MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr TM, std::shared_ptr MemMgr, - std::shared_ptr Resolver) + std::shared_ptr Resolver) : ExecutionEngine(TM->createDataLayout(), std::move(M)), TM(std::move(TM)), Ctx(nullptr), MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)), Dyld(*this->MemMgr, this->Resolver), diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index daf578f5daae..110cfa675cf3 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -26,11 +26,11 @@ class MCJIT; // functions across modules that it owns. It aggregates the memory manager // that is passed in to the MCJIT constructor and defers most functionality // to that object. -class LinkingSymbolResolver : public JITSymbolResolver { +class LinkingSymbolResolver : public LegacyJITSymbolResolver { public: LinkingSymbolResolver(MCJIT &Parent, - std::shared_ptr Resolver) - : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {} + std::shared_ptr Resolver) + : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {} JITSymbol findSymbol(const std::string &Name) override; @@ -41,7 +41,7 @@ class LinkingSymbolResolver : public JITSymbolResolver { private: MCJIT &ParentEngine; - std::shared_ptr ClientResolver; + std::shared_ptr ClientResolver; }; // About Module states: added->loaded->finalized. @@ -67,7 +67,7 @@ class LinkingSymbolResolver : public JITSymbolResolver { class MCJIT : public ExecutionEngine { MCJIT(std::unique_ptr M, std::unique_ptr tm, std::shared_ptr MemMgr, - std::shared_ptr Resolver); + std::shared_ptr Resolver); typedef llvm::SmallPtrSet ModulePtrSet; @@ -300,11 +300,10 @@ class MCJIT : public ExecutionEngine { MCJITCtor = createJIT; } - static ExecutionEngine* - createJIT(std::unique_ptr M, - std::string *ErrorStr, + static ExecutionEngine * + createJIT(std::unique_ptr M, std::string *ErrorStr, std::shared_ptr MemMgr, - std::shared_ptr Resolver, + std::shared_ptr Resolver, std::unique_ptr TM); // @} diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt index f83e002c758f..ca1b9ee005ce 100644 --- a/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -1,6 +1,8 @@ add_llvm_library(LLVMOrcJIT + Core.cpp ExecutionUtils.cpp IndirectionUtils.cpp + Legacy.cpp NullResolver.cpp OrcABISupport.cpp OrcCBindings.cpp diff --git a/lib/ExecutionEngine/Orc/Core.cpp b/lib/ExecutionEngine/Orc/Core.cpp new file mode 100644 index 000000000000..ff78ba199393 --- /dev/null +++ b/lib/ExecutionEngine/Orc/Core.cpp @@ -0,0 +1,345 @@ +//===--------- Core.cpp - Core ORC APIs (SymbolSource, VSO, etc.) ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/OrcError.h" + +namespace llvm { +namespace orc { + +void SymbolResolver::anchor() {} +void SymbolSource::anchor() {} + +AsynchronousSymbolQuery::AsynchronousSymbolQuery( + const SymbolNameSet &Symbols, SymbolsResolvedCallback NotifySymbolsResolved, + SymbolsReadyCallback NotifySymbolsReady) + : NotifySymbolsResolved(std::move(NotifySymbolsResolved)), + NotifySymbolsReady(std::move(NotifySymbolsReady)) { + assert(this->NotifySymbolsResolved && + "Symbols resolved callback must be set"); + assert(this->NotifySymbolsReady && "Symbols ready callback must be set"); + OutstandingResolutions = OutstandingFinalizations = Symbols.size(); +} + +void AsynchronousSymbolQuery::setFailed(Error Err) { + OutstandingResolutions = OutstandingFinalizations = 0; + if (NotifySymbolsResolved) + NotifySymbolsResolved(std::move(Err)); + else + NotifySymbolsReady(std::move(Err)); +} + +void AsynchronousSymbolQuery::setDefinition(SymbolStringPtr Name, + JITEvaluatedSymbol Sym) { + // If OutstandingResolutions is zero we must have errored out already. Just + // ignore this. + if (OutstandingResolutions == 0) + return; + + assert(!Symbols.count(Name) && "Symbol has already been assigned an address"); + Symbols.insert(std::make_pair(std::move(Name), std::move(Sym))); + --OutstandingResolutions; + if (OutstandingResolutions == 0) { + NotifySymbolsResolved(std::move(Symbols)); + // Null out NotifySymbolsResolved to indicate that we've already called it. + NotifySymbolsResolved = {}; + } +} + +void AsynchronousSymbolQuery::notifySymbolFinalized() { + // If OutstandingFinalizations is zero we must have errored out already. Just + // ignore this. + if (OutstandingFinalizations == 0) + return; + + assert(OutstandingFinalizations > 0 && "All symbols already finalized"); + --OutstandingFinalizations; + if (OutstandingFinalizations == 0) + NotifySymbolsReady(Error::success()); +} + +VSO::MaterializationInfo::MaterializationInfo(JITSymbolFlags Flags, + AsynchronousSymbolQuery &Query) + : Flags(std::move(Flags)), PendingResolution({&Query}) {} + +JITSymbolFlags VSO::MaterializationInfo::getFlags() const { return Flags; } + +JITTargetAddress VSO::MaterializationInfo::getAddress() const { + return Address; +} + +void VSO::MaterializationInfo::query(SymbolStringPtr Name, + AsynchronousSymbolQuery &Query) { + if (Address != 0) { + Query.setDefinition(Name, JITEvaluatedSymbol(Address, Flags)); + PendingFinalization.push_back(&Query); + } else + PendingResolution.push_back(&Query); +} + +void VSO::MaterializationInfo::resolve(SymbolStringPtr Name, + JITEvaluatedSymbol Sym) { + // FIXME: Sanity check flags? + Flags = Sym.getFlags(); + Address = Sym.getAddress(); + for (auto *Query : PendingResolution) { + Query->setDefinition(Name, std::move(Sym)); + PendingFinalization.push_back(Query); + } + PendingResolution = {}; +} + +void VSO::MaterializationInfo::finalize() { + for (auto *Query : PendingFinalization) + Query->notifySymbolFinalized(); + PendingFinalization = {}; +} + +VSO::SymbolTableEntry::SymbolTableEntry(JITSymbolFlags Flags, + SymbolSource &Source) + : Flags(JITSymbolFlags::FlagNames(Flags | JITSymbolFlags::NotMaterialized)), + Source(&Source) { + // FIXME: Assert flag sanity. +} + +VSO::SymbolTableEntry::SymbolTableEntry(JITEvaluatedSymbol Sym) + : Flags(Sym.getFlags()), Address(Sym.getAddress()) { + // FIXME: Assert flag sanity. +} + +VSO::SymbolTableEntry::SymbolTableEntry(SymbolTableEntry &&Other) + : Flags(Other.Flags), Address(0) { + if (Flags.isMaterializing()) + MatInfo = std::move(Other.MatInfo); + else + Source = Other.Source; +} + +VSO::SymbolTableEntry::~SymbolTableEntry() { + assert(!Flags.isMaterializing() && + "Symbol table entry destroyed while symbol was being materialized"); +} + +JITSymbolFlags VSO::SymbolTableEntry::getFlags() const { return Flags; } + +void VSO::SymbolTableEntry::replaceWithSource(VSO &V, SymbolStringPtr Name, + JITSymbolFlags Flags, + SymbolSource &NewSource) { + assert(!this->Flags.isMaterializing() && + "Attempted to replace symbol with lazy definition during " + "materialization"); + if (!this->Flags.isMaterialized()) + Source->discard(V, Name); + this->Flags = Flags; + this->Source = &NewSource; +} + +SymbolSource *VSO::SymbolTableEntry::query(SymbolStringPtr Name, + AsynchronousSymbolQuery &Query) { + if (Flags.isMaterializing()) { + MatInfo->query(std::move(Name), Query); + return nullptr; + } else if (Flags.isMaterialized()) { + Query.setDefinition(std::move(Name), JITEvaluatedSymbol(Address, Flags)); + Query.notifySymbolFinalized(); + return nullptr; + } + SymbolSource *S = Source; + new (&MatInfo) std::unique_ptr( + llvm::make_unique(Flags, Query)); + Flags |= JITSymbolFlags::Materializing; + return S; +} + +void VSO::SymbolTableEntry::resolve(VSO &V, SymbolStringPtr Name, + JITEvaluatedSymbol Sym) { + if (Flags.isMaterializing()) + MatInfo->resolve(std::move(Name), std::move(Sym)); + else { + // If there's a layer for this symbol. + if (!Flags.isMaterialized()) + Source->discard(V, Name); + + // FIXME: Should we assert flag state here (flags must match except for + // materialization state, overrides must be legal) or in the caller + // in VSO? + Flags = Sym.getFlags(); + Address = Sym.getAddress(); + } +} + +void VSO::SymbolTableEntry::finalize() { + if (Flags.isMaterializing()) { + auto TmpMatInfo = std::move(MatInfo); + MatInfo.std::unique_ptr::~unique_ptr(); + // FIXME: Assert flag sanity? + Flags = TmpMatInfo->getFlags(); + Address = TmpMatInfo->getAddress(); + TmpMatInfo->finalize(); + } + assert(Flags.isMaterialized() && "Trying to finalize not-emitted symbol"); +} + +VSO::RelativeLinkageStrength VSO::compareLinkage(Optional Old, + JITSymbolFlags New) { + if (Old == None) + return llvm::orc::VSO::NewDefinitionIsStronger; + + if (Old->isStrong()) { + if (New.isStrong()) + return llvm::orc::VSO::DuplicateDefinition; + else + return llvm::orc::VSO::ExistingDefinitionIsStronger; + } else { + if (New.isStrong()) + return llvm::orc::VSO::NewDefinitionIsStronger; + else + return llvm::orc::VSO::ExistingDefinitionIsStronger; + } +} + +VSO::RelativeLinkageStrength +VSO::compareLinkage(SymbolStringPtr Name, JITSymbolFlags NewFlags) const { + auto I = Symbols.find(Name); + return compareLinkage(I == Symbols.end() + ? None + : Optional(I->second.getFlags()), + NewFlags); +} + +Error VSO::define(SymbolMap NewSymbols) { + Error Err = Error::success(); + for (auto &KV : NewSymbols) { + auto I = Symbols.find(KV.first); + auto LinkageResult = compareLinkage( + I == Symbols.end() ? None + : Optional(I->second.getFlags()), + KV.second.getFlags()); + + // Silently discard weaker definitions. + if (LinkageResult == ExistingDefinitionIsStronger) + continue; + + // Report duplicate definition errors. + if (LinkageResult == DuplicateDefinition) { + Err = joinErrors(std::move(Err), + make_error(*KV.first)); + continue; + } + + if (I != Symbols.end()) { + I->second.resolve(*this, KV.first, std::move(KV.second)); + I->second.finalize(); + } else + Symbols.insert(std::make_pair(KV.first, std::move(KV.second))); + } + return Err; +} + +Error VSO::defineLazy(const SymbolFlagsMap &NewSymbols, SymbolSource &Source) { + Error Err = Error::success(); + for (auto &KV : NewSymbols) { + auto I = Symbols.find(KV.first); + + auto LinkageResult = compareLinkage( + I == Symbols.end() ? None + : Optional(I->second.getFlags()), + KV.second); + + // Discard weaker definitions. + if (LinkageResult == ExistingDefinitionIsStronger) + Source.discard(*this, KV.first); + + // Report duplicate definition errors. + if (LinkageResult == DuplicateDefinition) { + Err = joinErrors(std::move(Err), + make_error(*KV.first)); + continue; + } + + if (I != Symbols.end()) + I->second.replaceWithSource(*this, KV.first, KV.second, Source); + else + Symbols.emplace( + std::make_pair(KV.first, SymbolTableEntry(KV.second, Source))); + } + return Err; +} + +void VSO::resolve(SymbolMap SymbolValues) { + for (auto &KV : SymbolValues) { + auto I = Symbols.find(KV.first); + assert(I != Symbols.end() && "Resolving symbol not present in this dylib"); + I->second.resolve(*this, KV.first, std::move(KV.second)); + } +} + +void VSO::finalize(SymbolNameSet SymbolsToFinalize) { + for (auto &S : SymbolsToFinalize) { + auto I = Symbols.find(S); + assert(I != Symbols.end() && "Finalizing symbol not present in this dylib"); + I->second.finalize(); + } +} + +LookupFlagsResult VSO::lookupFlags(SymbolNameSet Names) { + SymbolFlagsMap FlagsFound; + + for (SymbolNameSet::iterator I = Names.begin(), E = Names.end(); I != E;) { + auto Tmp = I++; + auto SymI = Symbols.find(*Tmp); + + // If the symbol isn't in this dylib then just continue. + if (SymI == Symbols.end()) + continue; + + Names.erase(Tmp); + + FlagsFound[SymI->first] = + JITSymbolFlags::stripTransientFlags(SymI->second.getFlags()); + } + + return {std::move(FlagsFound), std::move(Names)}; +} + +VSO::LookupResult VSO::lookup(AsynchronousSymbolQuery &Query, + SymbolNameSet Names) { + SourceWorkMap MaterializationWork; + + for (SymbolNameSet::iterator I = Names.begin(), E = Names.end(); I != E;) { + auto Tmp = I++; + auto SymI = Symbols.find(*Tmp); + + // If the symbol isn't in this dylib then just continue. + if (SymI == Symbols.end()) + continue; + + // The symbol is in the dylib. Erase it from Names and proceed. + Names.erase(Tmp); + + // Forward the query to the given SymbolTableEntry, and if it return a + // layer to perform materialization with, add that to the + // MaterializationWork map. + if (auto *Source = SymI->second.query(SymI->first, Query)) + MaterializationWork[Source].insert(SymI->first); + } + + return {std::move(MaterializationWork), std::move(Names)}; +} + +ExecutionSession::ExecutionSession(SymbolStringPool &SSP) : SSP(SSP) {} + +VModuleKey ExecutionSession::allocateVModule() { return ++LastKey; } + +void ExecutionSession::releaseVModule(VModuleKey VMod) { + // FIXME: Recycle keys. +} + +} // End namespace orc. +} // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/Legacy.cpp b/lib/ExecutionEngine/Orc/Legacy.cpp new file mode 100644 index 000000000000..e4eba8bd7565 --- /dev/null +++ b/lib/ExecutionEngine/Orc/Legacy.cpp @@ -0,0 +1,75 @@ +//===------- Legacy.cpp - Adapters for ExecutionEngine API interop --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/Legacy.h" + +namespace llvm { +namespace orc { + +JITSymbolResolverAdapter::JITSymbolResolverAdapter(ExecutionSession &ES, + SymbolResolver &R) + : ES(ES), R(R) {} + +Expected +JITSymbolResolverAdapter::lookup(const LookupSet &Symbols) { + Error Err = Error::success(); + JITSymbolResolver::LookupResult Result; + + SymbolNameSet InternedSymbols; + for (auto &S : Symbols) + InternedSymbols.insert(ES.getSymbolStringPool().intern(S)); + + auto OnResolve = [&](Expected R) { + if (R) { + for (auto &KV : *R) { + ResolvedStrings.insert(KV.first); + Result[*KV.first] = KV.second; + } + } else + Err = joinErrors(std::move(Err), R.takeError()); + }; + + auto OnReady = [](Error Err) { + // FIXME: Report error to ExecutionSession. + logAllUnhandledErrors(std::move(Err), errs(), + "legacy resolver received on-ready error:\n"); + }; + + AsynchronousSymbolQuery Query(InternedSymbols, OnResolve, OnReady); + + auto UnresolvedSymbols = R.lookup(Query, InternedSymbols); + + if (!UnresolvedSymbols.empty()) + Err = joinErrors(std::move(Err), + make_error("Unresolved symbols", + inconvertibleErrorCode())); + + if (Err) + return std::move(Err); + + return Result; +} + +Expected +JITSymbolResolverAdapter::lookupFlags(const LookupSet &Symbols) { + SymbolNameSet InternedSymbols; + for (auto &S : Symbols) + InternedSymbols.insert(ES.getSymbolStringPool().intern(S)); + + LookupFlagsResult Result; + for (auto &KV : R.lookupFlags(InternedSymbols).SymbolFlags) { + ResolvedStrings.insert(KV.first); + Result[*KV.first] = KV.second; + } + + return Result; +} + +} // End namespace orc. +} // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/lib/ExecutionEngine/Orc/OrcABISupport.cpp index e76954499f61..e3c968157976 100644 --- a/lib/ExecutionEngine/Orc/OrcABISupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcABISupport.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" -#include "llvm/ADT/Triple.h" #include "llvm/Support/Process.h" namespace llvm { diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 405970e063d8..05b1f47eb5bb 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -196,7 +196,7 @@ class OrcCBindingsStack { return mapError(IndirectStubsMgr->updatePointer(Name, Addr)); } - std::shared_ptr + std::shared_ptr createResolver(LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return orc::createLambdaResolver( diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp index c218cb9a523c..f0bfed8ddb8a 100644 --- a/lib/ExecutionEngine/Orc/OrcError.cpp +++ b/lib/ExecutionEngine/Orc/OrcError.cpp @@ -29,6 +29,10 @@ class OrcErrorCategory : public std::error_category { std::string message(int condition) const override { switch (static_cast(condition)) { + case OrcErrorCode::DuplicateDefinition: + return "Duplicate symbol definition"; + case OrcErrorCode::JITSymbolNotFound: + return "JIT symbol not found"; case OrcErrorCode::RemoteAllocatorDoesNotExist: return "Remote allocator does not exist"; case OrcErrorCode::RemoteAllocatorIdAlreadyInUse: @@ -45,8 +49,6 @@ class OrcErrorCategory : public std::error_category { return "Could not negotiate RPC function"; case OrcErrorCode::RPCResponseAbandoned: return "RPC response abandoned"; - case OrcErrorCode::JITSymbolNotFound: - return "JIT symbol not found"; case OrcErrorCode::UnexpectedRPCCall: return "Unexpected RPC call"; case OrcErrorCode::UnexpectedRPCResponse: @@ -67,6 +69,7 @@ static ManagedStatic OrcErrCat; namespace llvm { namespace orc { +char DuplicateDefinition::ID = 0; char JITSymbolNotFound::ID = 0; std::error_code orcError(OrcErrorCode ErrCode) { @@ -74,6 +77,22 @@ std::error_code orcError(OrcErrorCode ErrCode) { return std::error_code(static_cast(ErrCode), *OrcErrCat); } + +DuplicateDefinition::DuplicateDefinition(std::string SymbolName) + : SymbolName(std::move(SymbolName)) {} + +std::error_code DuplicateDefinition::convertToErrorCode() const { + return orcError(OrcErrorCode::DuplicateDefinition); +} + +void DuplicateDefinition::log(raw_ostream &OS) const { + OS << "Duplicate definition of symbol '" << SymbolName << "'"; +} + +const std::string &DuplicateDefinition::getSymbolName() const { + return SymbolName; +} + JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) : SymbolName(std::move(SymbolName)) {} diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 1dc8d4ac7bc5..166d1369c724 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -138,7 +138,7 @@ class OrcMCJITReplacement : public ExecutionEngine { std::shared_ptr ClientMM; }; - class LinkingResolver : public JITSymbolResolver { + class LinkingResolver : public LegacyJITSymbolResolver { public: LinkingResolver(OrcMCJITReplacement &M) : M(M) {} @@ -160,20 +160,19 @@ class OrcMCJITReplacement : public ExecutionEngine { static ExecutionEngine * createOrcMCJITReplacement(std::string *ErrorMsg, std::shared_ptr MemMgr, - std::shared_ptr Resolver, + std::shared_ptr Resolver, std::unique_ptr TM) { return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver), std::move(TM)); } public: - OrcMCJITReplacement( - std::shared_ptr MemMgr, - std::shared_ptr ClientResolver, - std::unique_ptr TM) + OrcMCJITReplacement(std::shared_ptr MemMgr, + std::shared_ptr ClientResolver, + std::unique_ptr TM) : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)), - MemMgr(std::make_shared(*this, - std::move(MemMgr))), + MemMgr( + std::make_shared(*this, std::move(MemMgr))), Resolver(std::make_shared(*this)), ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this), NotifyFinalized(*this), @@ -378,7 +377,7 @@ class OrcMCJITReplacement : public ExecutionEngine { std::unique_ptr TM; std::shared_ptr MemMgr; std::shared_ptr Resolver; - std::shared_ptr ClientResolver; + std::shared_ptr ClientResolver; Mangler Mang; // IMPORTANT: ShouldDelete *must* come before LocalModules: The shared_ptr diff --git a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp index 87059ef2b88f..2b3c00fd7d7a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp @@ -47,3 +47,53 @@ ARMJITSymbolFlags llvm::ARMJITSymbolFlags::fromObjectSymbol( Flags |= ARMJITSymbolFlags::Thumb; return Flags; } + +/// @brief Performs lookup by, for each symbol, first calling +/// findSymbolInLogicalDylib and if that fails calling +/// findSymbol. +Expected +LegacyJITSymbolResolver::lookup(const LookupSet &Symbols) { + JITSymbolResolver::LookupResult Result; + for (auto &Symbol : Symbols) { + std::string SymName = Symbol.str(); + if (auto Sym = findSymbolInLogicalDylib(SymName)) { + if (auto AddrOrErr = Sym.getAddress()) + Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags()); + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return std::move(Err); + else { + // findSymbolInLogicalDylib failed. Lets try findSymbol. + if (auto Sym = findSymbol(SymName)) { + if (auto AddrOrErr = Sym.getAddress()) + Result[Symbol] = JITEvaluatedSymbol(*AddrOrErr, Sym.getFlags()); + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return std::move(Err); + else + return make_error("Symbol not found: " + Symbol, + inconvertibleErrorCode()); + } + } + + return std::move(Result); +} + +/// @brief Performs flags lookup by calling findSymbolInLogicalDylib and +/// returning the flags value for that symbol. +Expected +LegacyJITSymbolResolver::lookupFlags(const LookupSet &Symbols) { + JITSymbolResolver::LookupFlagsResult Result; + + for (auto &Symbol : Symbols) { + std::string SymName = Symbol.str(); + if (auto Sym = findSymbolInLogicalDylib(SymName)) + Result[Symbol] = Sym.getFlags(); + else if (auto Err = Sym.takeError()) + return std::move(Err); + } + + return std::move(Result); +} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index c5e4dfa1e536..5c4b8c12f349 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -202,7 +202,32 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { ObjSectionToIDMap LocalSections; // Common symbols requiring allocation, with their sizes and alignments - CommonSymbolList CommonSymbols; + CommonSymbolList CommonSymbolsToAllocate; + + uint64_t CommonSize = 0; + uint32_t CommonAlign = 0; + + // First, collect all weak and common symbols. We need to know if stronger + // definitions occur elsewhere. + JITSymbolResolver::LookupFlagsResult SymbolFlags; + { + JITSymbolResolver::LookupSet Symbols; + for (auto &Sym : Obj.symbols()) { + uint32_t Flags = Sym.getFlags(); + if ((Flags & SymbolRef::SF_Common) || (Flags & SymbolRef::SF_Weak)) { + // Get symbol name. + if (auto NameOrErr = Sym.getName()) + Symbols.insert(*NameOrErr); + else + return NameOrErr.takeError(); + } + } + + if (auto FlagsResultOrErr = Resolver.lookupFlags(Symbols)) + SymbolFlags = std::move(*FlagsResultOrErr); + else + return FlagsResultOrErr.takeError(); + } // Parse symbols DEBUG(dbgs() << "Parse symbols:\n"); @@ -214,102 +239,108 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { if (Flags & SymbolRef::SF_Undefined) continue; - if (Flags & SymbolRef::SF_Common) - CommonSymbols.push_back(*I); - else { + // Get the symbol type. + object::SymbolRef::Type SymType; + if (auto SymTypeOrErr = I->getType()) + SymType = *SymTypeOrErr; + else + return SymTypeOrErr.takeError(); - // Get the symbol type. - object::SymbolRef::Type SymType; - if (auto SymTypeOrErr = I->getType()) - SymType = *SymTypeOrErr; - else - return SymTypeOrErr.takeError(); + // Get symbol name. + StringRef Name; + if (auto NameOrErr = I->getName()) + Name = *NameOrErr; + else + return NameOrErr.takeError(); - // Get symbol name. - StringRef Name; - if (auto NameOrErr = I->getName()) - Name = *NameOrErr; - else - return NameOrErr.takeError(); - - // Compute JIT symbol flags. - JITSymbolFlags JITSymFlags = getJITSymbolFlags(*I); - - // If this is a weak definition, check to see if there's a strong one. - // If there is, skip this symbol (we won't be providing it: the strong - // definition will). If there's no strong definition, make this definition - // strong. - if (JITSymFlags.isWeak()) { - // First check whether there's already a definition in this instance. - // FIXME: Override existing weak definitions with strong ones. - if (GlobalSymbolTable.count(Name)) - continue; - // Then check the symbol resolver to see if there's a definition - // elsewhere in this logical dylib. - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) { - if (Sym.getFlags().isStrongDefinition()) - continue; - } else if (auto Err = Sym.takeError()) - return std::move(Err); - // else - JITSymFlags &= ~JITSymbolFlags::Weak; - } + // Compute JIT symbol flags. + JITSymbolFlags JITSymFlags = getJITSymbolFlags(*I); + + // If this is a weak definition, check to see if there's a strong one. + // If there is, skip this symbol (we won't be providing it: the strong + // definition will). If there's no strong definition, make this definition + // strong. + if (JITSymFlags.isWeak() || JITSymFlags.isCommon()) { + // First check whether there's already a definition in this instance. + // FIXME: Override existing weak definitions with strong ones. + if (GlobalSymbolTable.count(Name)) + continue; - if (Flags & SymbolRef::SF_Absolute && - SymType != object::SymbolRef::ST_File) { - uint64_t Addr = 0; - if (auto AddrOrErr = I->getAddress()) - Addr = *AddrOrErr; - else - return AddrOrErr.takeError(); - - unsigned SectionID = AbsoluteSymbolSection; - - DEBUG(dbgs() << "\tType: " << SymType << " (absolute) Name: " << Name - << " SID: " << SectionID << " Offset: " - << format("%p", (uintptr_t)Addr) - << " flags: " << Flags << "\n"); - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, Addr, JITSymFlags); - } else if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data || - SymType == object::SymbolRef::ST_Unknown || - SymType == object::SymbolRef::ST_Other) { - - section_iterator SI = Obj.section_end(); - if (auto SIOrErr = I->getSection()) - SI = *SIOrErr; - else - return SIOrErr.takeError(); + // Then check whether we found flags for an existing symbol during the + // flags lookup earlier. + auto FlagsI = SymbolFlags.find(Name); + if (FlagsI == SymbolFlags.end() || + (JITSymFlags.isWeak() && !FlagsI->second.isStrong()) || + (JITSymFlags.isCommon() && FlagsI->second.isCommon())) { + if (JITSymFlags.isWeak()) + JITSymFlags &= ~JITSymbolFlags::Weak; + if (JITSymFlags.isCommon()) { + JITSymFlags &= ~JITSymbolFlags::Common; + uint32_t Align = I->getAlignment(); + uint64_t Size = I->getCommonSize(); + if (!CommonAlign) + CommonAlign = Align; + CommonSize += alignTo(CommonSize, Align) + Size; + CommonSymbolsToAllocate.push_back(*I); + } + } else + continue; + } + + if (Flags & SymbolRef::SF_Absolute && + SymType != object::SymbolRef::ST_File) { + uint64_t Addr = 0; + if (auto AddrOrErr = I->getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + + unsigned SectionID = AbsoluteSymbolSection; + + DEBUG(dbgs() << "\tType: " << SymType << " (absolute) Name: " << Name + << " SID: " << SectionID + << " Offset: " << format("%p", (uintptr_t)Addr) + << " flags: " << Flags << "\n"); + GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, Addr, JITSymFlags); + } else if (SymType == object::SymbolRef::ST_Function || + SymType == object::SymbolRef::ST_Data || + SymType == object::SymbolRef::ST_Unknown || + SymType == object::SymbolRef::ST_Other) { + + section_iterator SI = Obj.section_end(); + if (auto SIOrErr = I->getSection()) + SI = *SIOrErr; + else + return SIOrErr.takeError(); - if (SI == Obj.section_end()) - continue; + if (SI == Obj.section_end()) + continue; - // Get symbol offset. - uint64_t SectOffset; - if (auto Err = getOffset(*I, *SI, SectOffset)) - return std::move(Err); + // Get symbol offset. + uint64_t SectOffset; + if (auto Err = getOffset(*I, *SI, SectOffset)) + return std::move(Err); - bool IsCode = SI->isText(); - unsigned SectionID; - if (auto SectionIDOrErr = findOrEmitSection(Obj, *SI, IsCode, - LocalSections)) - SectionID = *SectionIDOrErr; - else - return SectionIDOrErr.takeError(); + bool IsCode = SI->isText(); + unsigned SectionID; + if (auto SectionIDOrErr = + findOrEmitSection(Obj, *SI, IsCode, LocalSections)) + SectionID = *SectionIDOrErr; + else + return SectionIDOrErr.takeError(); - DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name - << " SID: " << SectionID << " Offset: " - << format("%p", (uintptr_t)SectOffset) - << " flags: " << Flags << "\n"); - GlobalSymbolTable[Name] = + DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name + << " SID: " << SectionID + << " Offset: " << format("%p", (uintptr_t)SectOffset) + << " flags: " << Flags << "\n"); + GlobalSymbolTable[Name] = SymbolTableEntry(SectionID, SectOffset, JITSymFlags); - } } } // Allocate common symbols - if (auto Err = emitCommonSymbols(Obj, CommonSymbols)) + if (auto Err = emitCommonSymbols(Obj, CommonSymbolsToAllocate, CommonSize, + CommonAlign)) return std::move(Err); // Parse and process relocations @@ -621,45 +652,12 @@ JITSymbolFlags RuntimeDyldImpl::getJITSymbolFlags(const BasicSymbolRef &SR) { } Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj, - CommonSymbolList &CommonSymbols) { - if (CommonSymbols.empty()) + CommonSymbolList &SymbolsToAllocate, + uint64_t CommonSize, + uint32_t CommonAlign) { + if (SymbolsToAllocate.empty()) return Error::success(); - uint64_t CommonSize = 0; - uint32_t CommonAlign = CommonSymbols.begin()->getAlignment(); - CommonSymbolList SymbolsToAllocate; - - DEBUG(dbgs() << "Processing common symbols...\n"); - - for (const auto &Sym : CommonSymbols) { - StringRef Name; - if (auto NameOrErr = Sym.getName()) - Name = *NameOrErr; - else - return NameOrErr.takeError(); - - // Skip common symbols already elsewhere. - if (GlobalSymbolTable.count(Name)) { - DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name - << "'\n"); - continue; - } - - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) { - if (!Sym.getFlags().isCommon()) { - DEBUG(dbgs() << "\tSkipping common symbol '" << Name - << "' in favor of stronger definition.\n"); - continue; - } - } - uint32_t Align = Sym.getAlignment(); - uint64_t Size = Sym.getCommonSize(); - - CommonSize = alignTo(CommonSize, Align) + Size; - - SymbolsToAllocate.push_back(Sym); - } - // Allocate memory for the section unsigned SectionID = Sections.size(); uint8_t *Addr = MemMgr.allocateDataSection(CommonSize, CommonAlign, SectionID, @@ -997,7 +995,40 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } Error RuntimeDyldImpl::resolveExternalSymbols() { + StringMap ExternalSymbolMap; + + // Resolution can trigger emission of more symbols, so iterate until + // we've resolved *everything*. + { + JITSymbolResolver::LookupSet ResolvedSymbols; + + while (true) { + JITSymbolResolver::LookupSet NewSymbols; + + for (auto &RelocKV : ExternalSymbolRelocations) { + StringRef Name = RelocKV.first(); + if (!Name.empty() && !GlobalSymbolTable.count(Name) && + !ResolvedSymbols.count(Name)) + NewSymbols.insert(Name); + } + + if (NewSymbols.empty()) + break; + + auto NewResolverResults = Resolver.lookup(NewSymbols); + if (!NewResolverResults) + return NewResolverResults.takeError(); + + for (auto &RRKV : *NewResolverResults) { + assert(!ResolvedSymbols.count(RRKV.first) && "Redundant resolution?"); + ExternalSymbolMap.insert(RRKV); + ResolvedSymbols.insert(RRKV.first); + } + } + } + while (!ExternalSymbolRelocations.empty()) { + StringMap::iterator i = ExternalSymbolRelocations.begin(); StringRef Name = i->first(); @@ -1012,29 +1043,10 @@ Error RuntimeDyldImpl::resolveExternalSymbols() { JITSymbolFlags Flags; RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(Name); if (Loc == GlobalSymbolTable.end()) { - // This is an external symbol, try to get its address from the symbol - // resolver. - // First search for the symbol in this logical dylib. - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name.data())) { - if (auto AddrOrErr = Sym.getAddress()) { - Addr = *AddrOrErr; - Flags = Sym.getFlags(); - } else - return AddrOrErr.takeError(); - } else if (auto Err = Sym.takeError()) - return Err; - - // If that fails, try searching for an external symbol. - if (!Addr) { - if (auto Sym = Resolver.findSymbol(Name.data())) { - if (auto AddrOrErr = Sym.getAddress()) { - Addr = *AddrOrErr; - Flags = Sym.getFlags(); - } else - return AddrOrErr.takeError(); - } else if (auto Err = Sym.takeError()) - return Err; - } + auto RRI = ExternalSymbolMap.find(Name); + assert(RRI != ExternalSymbolMap.end() && "No result for symbol"); + Addr = RRI->second.getAddress(); + Flags = RRI->second.getFlags(); // The call to getSymbolAddress may have caused additional modules to // be loaded, which may have added new entries to the // ExternalSymbolRelocations map. Consquently, we need to update our @@ -1095,6 +1107,7 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress( void RuntimeDyld::MemoryManager::anchor() {} void JITSymbolResolver::anchor() {} +void LegacyJITSymbolResolver::anchor() {} RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 5bc7434e703f..3d944bf7b605 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -731,7 +731,14 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix, bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const { if (getRTDyld().getSymbol(Symbol)) return true; - return !!getRTDyld().Resolver.findSymbol(Symbol); + JITSymbolResolver::LookupSet Symbols({Symbol}); + auto Result = getRTDyld().Resolver.lookup(Symbols); + if (!Result) { + logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: "); + return false; + } + assert(Result->count(Symbol) && "Missing symbol result"); + return true; } uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { @@ -742,7 +749,16 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) return InternalSymbol.getAddress(); - return cantFail(getRTDyld().Resolver.findSymbol(Symbol).getAddress()); + + JITSymbolResolver::LookupSet Symbols({Symbol}); + auto Result = getRTDyld().Resolver.lookup(Symbols); + if (!Result) { + logAllUnhandledErrors(Result.takeError(), errs(), "RTDyldChecker: "); + return 0; + } + auto I = Result->find(Symbol); + assert(I != Result->end() && "Missing symbol result"); + return I->second.getAddress(); } uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 4f53bc7dc5a4..7307db7ee612 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -14,12 +14,10 @@ #include "RuntimeDyldELF.h" #include "RuntimeDyldCheckerImpl.h" #include "Targets/RuntimeDyldELFMips.h" -#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Endian.h" @@ -67,7 +65,7 @@ template class DyldELFObject : public ELFObjectFile { typedef Elf_Ehdr_Impl Elf_Ehdr; - typedef typename ELFDataTypeTypedefHelper::value_type addr_type; + typedef typename ELFT::uint addr_type; DyldELFObject(ELFObjectFile &&Obj); @@ -150,8 +148,8 @@ template static Expected>> createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject, const LoadedELFObjectInfo &L) { - typedef typename ELFFile::Elf_Shdr Elf_Shdr; - typedef typename ELFDataTypeTypedefHelper::value_type addr_type; + typedef typename ELFT::Shdr Elf_Shdr; + typedef typename ELFT::uint addr_type; Expected>> ObjOrErr = DyldELFObject::create(Buffer); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index e046a8504e9f..e940004bb2ef 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -385,7 +385,8 @@ class RuntimeDyldImpl { /// new section for them and update the symbol mappings in the object and /// symbol table. Error emitCommonSymbols(const ObjectFile &Obj, - CommonSymbolList &CommonSymbols); + CommonSymbolList &CommonSymbols, uint64_t CommonSize, + uint32_t CommonAlign); /// \brief Emits section data from the object file to the MemoryManager. /// \param IsCode if it's true then allocateCodeSection() will be diff --git a/lib/FuzzMutate/IRMutator.cpp b/lib/FuzzMutate/IRMutator.cpp index 6545446a9849..00b558ac4dcb 100644 --- a/lib/FuzzMutate/IRMutator.cpp +++ b/lib/FuzzMutate/IRMutator.cpp @@ -8,15 +8,17 @@ //===----------------------------------------------------------------------===// #include "llvm/FuzzMutate/IRMutator.h" +#include "llvm/ADT/Optional.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/FuzzMutate/Operations.h" #include "llvm/FuzzMutate/Random.h" #include "llvm/FuzzMutate/RandomIRBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar/DCE.h" using namespace llvm; @@ -90,14 +92,14 @@ std::vector InjectorIRStrategy::getDefaultOps() { return Ops; } -fuzzerop::OpDescriptor +Optional InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) { auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) { return Op.SourcePreds[0].matches({}, Src); }; auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred)); if (RS.isEmpty()) - report_fatal_error("No available operations for src type"); + return None; return *RS; } @@ -105,6 +107,8 @@ void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { SmallVector Insts; for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I) Insts.push_back(&*I); + if (Insts.size() < 1) + return; // Choose an insertion point for our new instruction. size_t IP = uniform(IB.Rand, 0, Insts.size() - 1); @@ -118,10 +122,15 @@ void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) { // Choose an operation that's constrained to be valid for the type of the // source, collect any other sources it needs, and then build it. - fuzzerop::OpDescriptor OpDesc = chooseOperation(Srcs[0], IB); - for (const auto &Pred : makeArrayRef(OpDesc.SourcePreds).slice(1)) + auto OpDesc = chooseOperation(Srcs[0], IB); + // Bail if no operation was found + if (!OpDesc) + return; + + for (const auto &Pred : makeArrayRef(OpDesc->SourcePreds).slice(1)) Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred)); - if (Value *Op = OpDesc.BuilderFunc(Srcs, Insts[IP])) { + + if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) { // Find a sink and wire up the results of the operation. IB.connectToSink(BB, InstsAfter, Op); } @@ -147,7 +156,9 @@ void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) { for (Instruction &Inst : instructions(F)) if (!Inst.isTerminator()) RS.sample(&Inst, /*Weight=*/1); - assert(!RS.isEmpty() && "No instructions to delete"); + if (RS.isEmpty()) + return; + // Delete the instruction. mutate(*RS.getSelection(), IB); // Clean up any dead code that's left over after removing the instruction. diff --git a/lib/FuzzMutate/Operations.cpp b/lib/FuzzMutate/Operations.cpp index 083d9aa039e1..b842f6d64fb1 100644 --- a/lib/FuzzMutate/Operations.cpp +++ b/lib/FuzzMutate/Operations.cpp @@ -142,9 +142,14 @@ OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) { auto buildSplitBlock = [](ArrayRef Srcs, Instruction *Inst) { BasicBlock *Block = Inst->getParent(); BasicBlock *Next = Block->splitBasicBlock(Inst, "BB"); + + // If it was an exception handling block, we are done. + if (Block->isEHPad()) + return nullptr; + + // Loop back on this block by replacing the unconditional forward branch + // with a conditional with a backedge. if (Block != &Block->getParent()->getEntryBlock()) { - // Loop back on this block by replacing the unconditional forward branch - // with a conditional with a backedge. BranchInst::Create(Block, Next, Srcs[0], Block->getTerminator()); Block->getTerminator()->eraseFromParent(); @@ -172,7 +177,7 @@ OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) { // TODO: Handle aggregates and vectors // TODO: Support multiple indices. // TODO: Try to avoid meaningless accesses. - return {Weight, {anyPtrType(), anyIntType()}, buildGEP}; + return {Weight, {sizedPtrType(), anyIntType()}, buildGEP}; } static uint64_t getAggregateNumElements(Type *T) { @@ -216,8 +221,9 @@ OpDescriptor llvm::fuzzerop::extractValueDescriptor(unsigned Weight) { static SourcePred matchScalarInAggregate() { auto Pred = [](ArrayRef Cur, const Value *V) { - if (isa(Cur[0]->getType())) - return V->getType() == Cur[0]->getType(); + if (auto *ArrayT = dyn_cast(Cur[0]->getType())) + return V->getType() == ArrayT->getElementType(); + auto *STy = cast(Cur[0]->getType()); for (int I = 0, E = STy->getNumElements(); I < E; ++I) if (STy->getTypeAtIndex(I) == V->getType()) @@ -225,8 +231,9 @@ static SourcePred matchScalarInAggregate() { return false; }; auto Make = [](ArrayRef Cur, ArrayRef) { - if (isa(Cur[0]->getType())) - return makeConstantsWithType(Cur[0]->getType()); + if (auto *ArrayT = dyn_cast(Cur[0]->getType())) + return makeConstantsWithType(ArrayT->getElementType()); + std::vector Result; auto *STy = cast(Cur[0]->getType()); for (int I = 0, E = STy->getNumElements(); I < E; ++I) @@ -240,9 +247,9 @@ static SourcePred validInsertValueIndex() { auto Pred = [](ArrayRef Cur, const Value *V) { auto *CTy = cast(Cur[0]->getType()); if (auto *CI = dyn_cast(V)) - if (CI->getBitWidth() == 32) - if (CTy->getTypeAtIndex(CI->getZExtValue()) == V->getType()) - return true; + if (CI->getBitWidth() == 32 && + CTy->getTypeAtIndex(CI->getZExtValue()) == Cur[1]->getType()) + return true; return false; }; auto Make = [](ArrayRef Cur, ArrayRef Ts) { diff --git a/lib/FuzzMutate/RandomIRBuilder.cpp b/lib/FuzzMutate/RandomIRBuilder.cpp index 42e30464b0d4..9f5b7d608a1d 100644 --- a/lib/FuzzMutate/RandomIRBuilder.cpp +++ b/lib/FuzzMutate/RandomIRBuilder.cpp @@ -15,7 +15,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" using namespace llvm; using namespace fuzzerop; @@ -45,22 +44,27 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef Insts, // Generate some constants to choose from. auto RS = makeSampler(Rand); RS.sample(Pred.generate(Srcs, KnownTypes)); - assert(!RS.isEmpty() && "Failed to generate sources"); // If we can find a pointer to load from, use it half the time. Value *Ptr = findPointer(BB, Insts, Srcs, Pred); - if (Ptr) - RS.sample(Ptr, RS.totalWeight()); - - Value *Result = RS.getSelection(); - if (Result != Ptr) - return Result; - - // If we choose the pointer, we need to create a load. - auto IP = BB.getFirstInsertionPt(); - if (auto *I = dyn_cast(Ptr)) - IP = ++I->getIterator(); - return new LoadInst(Ptr, "L", &*IP); + if (Ptr) { + // Create load from the chosen pointer + auto IP = BB.getFirstInsertionPt(); + if (auto *I = dyn_cast(Ptr)) { + IP = ++I->getIterator(); + assert(IP != BB.end() && "guaranteed by the findPointer"); + } + auto *NewLoad = new LoadInst(Ptr, "L", &*IP); + + // Only sample this load if it really matches the descriptor + if (Pred.matches(Srcs, NewLoad)) + RS.sample(NewLoad, RS.totalWeight()); + else + NewLoad->eraseFromParent(); + } + + assert(!RS.isEmpty() && "Failed to generate sources"); + return RS.getSelection(); } static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, @@ -73,12 +77,13 @@ static bool isCompatibleReplacement(const Instruction *I, const Use &Operand, case Instruction::ExtractValue: // TODO: We could potentially validate these, but for now just leave indices // alone. - if (Operand.getOperandNo() > 1) + if (Operand.getOperandNo() >= 1) return false; break; case Instruction::InsertValue: case Instruction::InsertElement: - if (Operand.getOperandNo() > 2) + case Instruction::ShuffleVector: + if (Operand.getOperandNo() >= 2) return false; break; default: @@ -129,9 +134,20 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB, ArrayRef Insts, ArrayRef Srcs, SourcePred Pred) { auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) { - if (auto PtrTy = dyn_cast(Inst->getType())) + // Invoke instructions sometimes produce valid pointers but currently + // we can't insert loads or stores from them + if (isa(Inst)) + return false; + + if (auto PtrTy = dyn_cast(Inst->getType())) { + // We can never generate loads from non first class or non sized types + if (!PtrTy->getElementType()->isSized() || + !PtrTy->getElementType()->isFirstClassType()) + return false; + // TODO: Check if this is horribly expensive. return Pred.matches(Srcs, UndefValue::get(PtrTy->getElementType())); + } return false; }; if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr))) diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 0fafe82404e4..7c6476058941 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2497,8 +2497,13 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis, } } -static void PrintDSOLocation(bool IsDSOLocal, formatted_raw_ostream &Out){ - if (IsDSOLocal) +static void PrintDSOLocation(const GlobalValue &GV, + formatted_raw_ostream &Out) { + // GVs with local linkage or non default visibility are implicitly dso_local, + // so we don't print it. + bool Implicit = GV.hasLocalLinkage() || + (!GV.hasExternalWeakLinkage() && !GV.hasDefaultVisibility()); + if (GV.isDSOLocal() && !Implicit) Out << "dso_local "; } @@ -2572,7 +2577,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << "external "; Out << getLinkagePrintName(GV->getLinkage()); - PrintDSOLocation(GV->isDSOLocal(), Out); + PrintDSOLocation(*GV, Out); PrintVisibility(GV->getVisibility(), Out); PrintDLLStorageClass(GV->getDLLStorageClass(), Out); PrintThreadLocalModel(GV->getThreadLocalMode(), Out); @@ -2619,7 +2624,7 @@ void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) { Out << " = "; Out << getLinkagePrintName(GIS->getLinkage()); - PrintDSOLocation(GIS->isDSOLocal(), Out); + PrintDSOLocation(*GIS, Out); PrintVisibility(GIS->getVisibility(), Out); PrintDLLStorageClass(GIS->getDLLStorageClass(), Out); PrintThreadLocalModel(GIS->getThreadLocalMode(), Out); @@ -2731,7 +2736,7 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "define "; Out << getLinkagePrintName(F->getLinkage()); - PrintDSOLocation(F->isDSOLocal(), Out); + PrintDSOLocation(*F, Out); PrintVisibility(F->getVisibility(), Out); PrintDLLStorageClass(F->getDLLStorageClass(), Out); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index c8f1aaaccee3..30216bcde680 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -245,6 +245,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { if (hasAttribute(Attribute::SanitizeAddress)) return "sanitize_address"; + if (hasAttribute(Attribute::SanitizeHWAddress)) + return "sanitize_hwaddress"; if (hasAttribute(Attribute::AlwaysInline)) return "alwaysinline"; if (hasAttribute(Attribute::ArgMemOnly)) @@ -541,26 +543,21 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, AttributeSet AttributeSet::removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const { if (!hasAttribute(Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, B); + AttrBuilder B(*this); + B.removeAttribute(Kind); + return get(C, B); } AttributeSet AttributeSet::removeAttribute(LLVMContext &C, StringRef Kind) const { if (!hasAttribute(Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, B); + AttrBuilder B(*this); + B.removeAttribute(Kind); + return get(C, B); } AttributeSet AttributeSet::removeAttributes(LLVMContext &C, const AttrBuilder &Attrs) const { - - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); - AttrBuilder B(*this); B.remove(Attrs); return get(C, B); @@ -1096,17 +1093,27 @@ AttributeList AttributeList::addParamAttribute(LLVMContext &C, AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (!hasAttribute(Index, Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, Index, B); + + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + assert(Index < AttrSets.size()); + + AttrSets[Index] = AttrSets[Index].removeAttribute(C, Kind); + + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { if (!hasAttribute(Index, Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, Index, B); + + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + assert(Index < AttrSets.size()); + + AttrSets[Index] = AttrSets[Index].removeAttribute(C, Kind); + + return getImpl(C, AttrSets); } AttributeList @@ -1115,18 +1122,12 @@ AttributeList::removeAttributes(LLVMContext &C, unsigned Index, if (!pImpl) return AttributeList(); - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!"); - Index = attrIdxToArrayIdx(Index); SmallVector AttrSets(this->begin(), this->end()); if (Index >= AttrSets.size()) AttrSets.resize(Index + 1); - AttrBuilder B(AttrSets[Index]); - B.remove(AttrsToRemove); - AttrSets[Index] = AttributeSet::get(C, B); + AttrSets[Index] = AttrSets[Index].removeAttributes(C, AttrsToRemove); return getImpl(C, AttrSets); } diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index afbe4eb9543d..5d219995aed3 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -15,8 +15,6 @@ #include "llvm/IR/AutoUpgrade.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" @@ -78,6 +76,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name=="ssse3.pabs.d.128" || // Added in 6.0 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 + Name.startswith("avx512.kunpck") || //added in 6.0 Name.startswith("avx2.pabs.") || // Added in 6.0 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 Name.startswith("avx512.broadcastm") || // Added in 6.0 @@ -159,6 +158,10 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 + Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 + Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 Name == "avx512.mask.add.pd.128" || // Added in 4.0 Name == "avx512.mask.add.pd.256" || // Added in 4.0 Name == "avx512.mask.add.ps.128" || // Added in 4.0 @@ -520,6 +523,37 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + // Updating the memory intrinsics (memcpy/memmove/memset) that have an + // alignment parameter to embedding the alignment as an attribute of + // the pointer args. + if (Name.startswith("memcpy.") && F->arg_size() == 5) { + rename(F); + // Get the types of dest, src, and len + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, + ParamTypes); + return true; + } + if (Name.startswith("memmove.") && F->arg_size() == 5) { + rename(F); + // Get the types of dest, src, and len + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, + ParamTypes); + return true; + } + if (Name.startswith("memset.") && F->arg_size() == 5) { + rename(F); + // Get the types of dest, and len + const auto *FT = F->getFunctionType(); + Type *ParamTypes[2] = { + FT->getParamType(0), // Dest + FT->getParamType(2) // len + }; + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, + ParamTypes); + return true; + } break; } case 'n': { @@ -831,9 +865,11 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, // Applying mask on vector of i1's and make sure result is at least 8 bits wide. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, unsigned NumElts) { - const auto *C = dyn_cast(Mask); - if (!C || !C->isAllOnesValue()) - Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + if (Mask) { + const auto *C = dyn_cast(Mask); + if (!C || !C->isAllOnesValue()) + Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); + } if (NumElts < 8) { uint32_t Indices[8]; @@ -1065,6 +1101,24 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); + } else if (IsX86 && (Name.startswith("avx512.kunpck"))) { + unsigned NumElts = CI->getType()->getScalarSizeInBits(); + Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); + Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); + uint32_t Indices[64]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i; + + // First extract half of each vector. This gives better codegen than + // doing it in a single shuffle. + LHS = Builder.CreateShuffleVector(LHS, LHS, + makeArrayRef(Indices, NumElts / 2)); + RHS = Builder.CreateShuffleVector(RHS, RHS, + makeArrayRef(Indices, NumElts / 2)); + // Concat the vectors. + Rep = Builder.CreateShuffleVector(LHS, RHS, + makeArrayRef(Indices, NumElts)); + Rep = Builder.CreateBitCast(Rep, CI->getType()); } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { Type *I32Ty = Type::getInt32Ty(C); Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), @@ -1111,6 +1165,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); + } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || + Name.startswith("avx512.cvtw2mask.") || + Name.startswith("avx512.cvtd2mask.") || + Name.startswith("avx512.cvtq2mask."))) { + Value *Op = CI->getArgOperand(0); + Value *Zero = llvm::Constant::getNullValue(Op->getType()); + Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); + Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, + Op->getType()->getVectorNumElements()); } else if(IsX86 && (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || Name == "ssse3.pabs.d.128" || @@ -2167,14 +2230,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } - CallInst *NewCall = nullptr; - switch (NewFn->getIntrinsicID()) { - default: { + const auto &DefaultCase = [&NewFn, &CI]() -> void { // Handle generic mangling change, but nothing else assert( (CI->getCalledFunction()->getName() != NewFn->getName()) && "Unknown function for CallInst upgrade and isn't just a name change"); CI->setCalledFunction(NewFn); + }; + CallInst *NewCall = nullptr; + switch (NewFn->getIntrinsicID()) { + default: { + DefaultCase(); return; } @@ -2315,6 +2381,35 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { NewCall = Builder.CreateCall(NewFn, Args); break; } + + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // We have to make sure that the call signature is what we're expecting. + // We only want to change the old signatures by removing the alignment arg: + // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) + // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) + // @llvm.memset...(i8*, i8, i[32|64], i32, i1) + // -> @llvm.memset...(i8*, i8, i[32|64], i1) + // Note: i8*'s in the above can be any pointer type + if (CI->getNumArgOperands() != 5) { + DefaultCase(); + return; + } + // Remove alignment argument (3), and add alignment attributes to the + // dest/src pointers. + Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(4)}; + NewCall = Builder.CreateCall(NewFn, Args); + auto *MemCI = cast(NewCall); + // All mem intrinsics support dest alignment. + const ConstantInt *Align = cast(CI->getArgOperand(3)); + MemCI->setDestAlignment(Align->getZExtValue()); + // Memcpy/Memmove also support source alignment. + if (auto *MTI = dyn_cast(MemCI)) + MTI->setSourceAlignment(Align->getZExtValue()); + break; + } } assert(NewCall && "Should have either set this variable or returned through " "the default case"); diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 22513924a96d..938c40182b92 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -264,7 +264,8 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const { } iterator_range BasicBlock::phis() { - return make_range(dyn_cast(&front()), nullptr); + PHINode *P = empty() ? nullptr : dyn_cast(&*begin()); + return make_range(P, nullptr); } /// This method is used to notify a BasicBlock that the diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index c826f757e6dd..59818a1425f1 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -1674,6 +1674,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, } } } + break; } default: break; @@ -2210,17 +2211,17 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, SmallVector NewIdxs; Type *Ty = PointeeTy; Type *Prev = C->getType(); - bool Unknown = !isa(Idxs[0]); + bool Unknown = + !isa(Idxs[0]) && !isa(Idxs[0]); for (unsigned i = 1, e = Idxs.size(); i != e; Prev = Ty, Ty = cast(Ty)->getTypeAtIndex(Idxs[i]), ++i) { - auto *CI = dyn_cast(Idxs[i]); - if (!CI) { + if (!isa(Idxs[i]) && !isa(Idxs[i])) { // We don't know if it's in range or not. Unknown = true; continue; } - if (!isa(Idxs[i - 1])) - // FIXME: add the support of cosntant vector index. + if (!isa(Idxs[i - 1]) && !isa(Idxs[i - 1])) + // Skip if the type of the previous index is not supported. continue; if (InRangeIndex && i == *InRangeIndex + 1) { // If an index is marked inrange, we cannot apply this canonicalization to @@ -2238,46 +2239,91 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, Unknown = true; continue; } - if (isIndexInRangeOfArrayType(STy->getNumElements(), CI)) - // It's in range, skip to the next index. - continue; + if (ConstantInt *CI = dyn_cast(Idxs[i])) { + if (isIndexInRangeOfArrayType(STy->getNumElements(), CI)) + // It's in range, skip to the next index. + continue; + if (CI->getSExtValue() < 0) { + // It's out of range and negative, don't try to factor it. + Unknown = true; + continue; + } + } else { + auto *CV = cast(Idxs[i]); + bool InRange = true; + for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { + auto *CI = cast(CV->getElementAsConstant(I)); + InRange &= isIndexInRangeOfArrayType(STy->getNumElements(), CI); + if (CI->getSExtValue() < 0) { + Unknown = true; + break; + } + } + if (InRange || Unknown) + // It's in range, skip to the next index. + // It's out of range and negative, don't try to factor it. + continue; + } if (isa(Prev)) { // It's out of range, but the prior dimension is a struct // so we can't do anything about it. Unknown = true; continue; } - if (CI->getSExtValue() < 0) { - // It's out of range and negative, don't try to factor it. - Unknown = true; - continue; - } // It's out of range, but we can factor it into the prior // dimension. NewIdxs.resize(Idxs.size()); // Determine the number of elements in our sequential type. uint64_t NumElements = STy->getArrayNumElements(); - ConstantInt *Factor = ConstantInt::get(CI->getType(), NumElements); - NewIdxs[i] = ConstantExpr::getSRem(CI, Factor); + // Expand the current index or the previous index to a vector from a scalar + // if necessary. + Constant *CurrIdx = cast(Idxs[i]); + auto *PrevIdx = + NewIdxs[i - 1] ? NewIdxs[i - 1] : cast(Idxs[i - 1]); + bool IsCurrIdxVector = CurrIdx->getType()->isVectorTy(); + bool IsPrevIdxVector = PrevIdx->getType()->isVectorTy(); + bool UseVector = IsCurrIdxVector || IsPrevIdxVector; + + if (!IsCurrIdxVector && IsPrevIdxVector) + CurrIdx = ConstantDataVector::getSplat( + PrevIdx->getType()->getVectorNumElements(), CurrIdx); + + if (!IsPrevIdxVector && IsCurrIdxVector) + PrevIdx = ConstantDataVector::getSplat( + CurrIdx->getType()->getVectorNumElements(), PrevIdx); + + Constant *Factor = + ConstantInt::get(CurrIdx->getType()->getScalarType(), NumElements); + if (UseVector) + Factor = ConstantDataVector::getSplat( + IsPrevIdxVector ? PrevIdx->getType()->getVectorNumElements() + : CurrIdx->getType()->getVectorNumElements(), + Factor); + + NewIdxs[i] = ConstantExpr::getSRem(CurrIdx, Factor); - Constant *PrevIdx = NewIdxs[i-1] ? NewIdxs[i-1] : - cast(Idxs[i - 1]); - Constant *Div = ConstantExpr::getSDiv(CI, Factor); + Constant *Div = ConstantExpr::getSDiv(CurrIdx, Factor); unsigned CommonExtendedWidth = - std::max(PrevIdx->getType()->getIntegerBitWidth(), - Div->getType()->getIntegerBitWidth()); + std::max(PrevIdx->getType()->getScalarSizeInBits(), + Div->getType()->getScalarSizeInBits()); CommonExtendedWidth = std::max(CommonExtendedWidth, 64U); // Before adding, extend both operands to i64 to avoid // overflow trouble. - if (!PrevIdx->getType()->isIntegerTy(CommonExtendedWidth)) - PrevIdx = ConstantExpr::getSExt( - PrevIdx, Type::getIntNTy(Div->getContext(), CommonExtendedWidth)); - if (!Div->getType()->isIntegerTy(CommonExtendedWidth)) - Div = ConstantExpr::getSExt( - Div, Type::getIntNTy(Div->getContext(), CommonExtendedWidth)); + Type *ExtendedTy = Type::getIntNTy(Div->getContext(), CommonExtendedWidth); + if (UseVector) + ExtendedTy = VectorType::get( + ExtendedTy, IsPrevIdxVector + ? PrevIdx->getType()->getVectorNumElements() + : CurrIdx->getType()->getVectorNumElements()); + + if (!PrevIdx->getType()->isIntOrIntVectorTy(CommonExtendedWidth)) + PrevIdx = ConstantExpr::getSExt(PrevIdx, ExtendedTy); + + if (!Div->getType()->isIntOrIntVectorTy(CommonExtendedWidth)) + Div = ConstantExpr::getSExt(Div, ExtendedTy); NewIdxs[i - 1] = ConstantExpr::getAdd(PrevIdx, Div); } diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index 4bd17257016d..48d16f334ba3 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -199,39 +199,63 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, "NoWrapKind invalid!"); unsigned BitWidth = Other.getBitWidth(); - if (BinOp != Instruction::Add) + ConstantRange Result(BitWidth); + + switch (BinOp) { + default: // Conservative answer: empty set return ConstantRange(BitWidth, false); - if (auto *C = Other.getSingleElement()) - if (C->isNullValue()) - // Full set: nothing signed / unsigned wraps when added to 0. - return ConstantRange(BitWidth); - - ConstantRange Result(BitWidth); + case Instruction::Add: + if (auto *C = Other.getSingleElement()) + if (C->isNullValue()) + // Full set: nothing signed / unsigned wraps when added to 0. + return ConstantRange(BitWidth); + if (NoWrapKind & OBO::NoUnsignedWrap) + Result = + SubsetIntersect(Result, ConstantRange(APInt::getNullValue(BitWidth), + -Other.getUnsignedMax())); + if (NoWrapKind & OBO::NoSignedWrap) { + const APInt &SignedMin = Other.getSignedMin(); + const APInt &SignedMax = Other.getSignedMax(); + if (SignedMax.isStrictlyPositive()) + Result = SubsetIntersect( + Result, + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMinValue(BitWidth) - SignedMax)); + if (SignedMin.isNegative()) + Result = SubsetIntersect( + Result, + ConstantRange(APInt::getSignedMinValue(BitWidth) - SignedMin, + APInt::getSignedMinValue(BitWidth))); + } + return Result; - if (NoWrapKind & OBO::NoUnsignedWrap) - Result = - SubsetIntersect(Result, ConstantRange(APInt::getNullValue(BitWidth), - -Other.getUnsignedMax())); - - if (NoWrapKind & OBO::NoSignedWrap) { - const APInt &SignedMin = Other.getSignedMin(); - const APInt &SignedMax = Other.getSignedMax(); - - if (SignedMax.isStrictlyPositive()) - Result = SubsetIntersect( - Result, - ConstantRange(APInt::getSignedMinValue(BitWidth), - APInt::getSignedMinValue(BitWidth) - SignedMax)); - - if (SignedMin.isNegative()) - Result = SubsetIntersect( - Result, ConstantRange(APInt::getSignedMinValue(BitWidth) - SignedMin, - APInt::getSignedMinValue(BitWidth))); + case Instruction::Sub: + if (auto *C = Other.getSingleElement()) + if (C->isNullValue()) + // Full set: nothing signed / unsigned wraps when subtracting 0. + return ConstantRange(BitWidth); + if (NoWrapKind & OBO::NoUnsignedWrap) + Result = + SubsetIntersect(Result, ConstantRange(Other.getUnsignedMax(), + APInt::getMinValue(BitWidth))); + if (NoWrapKind & OBO::NoSignedWrap) { + const APInt &SignedMin = Other.getSignedMin(); + const APInt &SignedMax = Other.getSignedMax(); + if (SignedMax.isStrictlyPositive()) + Result = SubsetIntersect( + Result, + ConstantRange(APInt::getSignedMinValue(BitWidth) + SignedMax, + APInt::getSignedMinValue(BitWidth))); + if (SignedMin.isNegative()) + Result = SubsetIntersect( + Result, + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMinValue(BitWidth) + SignedMin)); + } + return Result; } - - return Result; } bool ConstantRange::isFullSet() const { @@ -656,6 +680,8 @@ ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp, return shl(Other); case Instruction::LShr: return lshr(Other); + case Instruction::AShr: + return ashr(Other); case Instruction::And: return binaryAnd(Other); case Instruction::Or: @@ -922,6 +948,60 @@ ConstantRange::lshr(const ConstantRange &Other) const { return ConstantRange(std::move(min), std::move(max)); } +ConstantRange +ConstantRange::ashr(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + + // May straddle zero, so handle both positive and negative cases. + // 'PosMax' is the upper bound of the result of the ashr + // operation, when Upper of the LHS of ashr is a non-negative. + // number. Since ashr of a non-negative number will result in a + // smaller number, the Upper value of LHS is shifted right with + // the minimum value of 'Other' instead of the maximum value. + APInt PosMax = getSignedMax().ashr(Other.getUnsignedMin()) + 1; + + // 'PosMin' is the lower bound of the result of the ashr + // operation, when Lower of the LHS is a non-negative number. + // Since ashr of a non-negative number will result in a smaller + // number, the Lower value of LHS is shifted right with the + // maximum value of 'Other'. + APInt PosMin = getSignedMin().ashr(Other.getUnsignedMax()); + + // 'NegMax' is the upper bound of the result of the ashr + // operation, when Upper of the LHS of ashr is a negative number. + // Since 'ashr' of a negative number will result in a bigger + // number, the Upper value of LHS is shifted right with the + // maximum value of 'Other'. + APInt NegMax = getSignedMax().ashr(Other.getUnsignedMax()) + 1; + + // 'NegMin' is the lower bound of the result of the ashr + // operation, when Lower of the LHS of ashr is a negative number. + // Since 'ashr' of a negative number will result in a bigger + // number, the Lower value of LHS is shifted right with the + // minimum value of 'Other'. + APInt NegMin = getSignedMin().ashr(Other.getUnsignedMin()); + + APInt max, min; + if (getSignedMin().isNonNegative()) { + // Upper and Lower of LHS are non-negative. + min = PosMin; + max = PosMax; + } else if (getSignedMax().isNegative()) { + // Upper and Lower of LHS are negative. + min = NegMin; + max = NegMax; + } else { + // Upper is non-negative and Lower is negative. + min = NegMin; + max = PosMax; + } + if (min == max) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return ConstantRange(std::move(min), std::move(max)); +} + ConstantRange ConstantRange::inverse() const { if (isFullSet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index f56fe7089807..dccba779deb3 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -16,7 +16,6 @@ #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 837b1ec5857d..f8ae23d4395c 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -27,11 +27,11 @@ using namespace llvm::dwarf; cl::opt UseDbgAddr("use-dbg-addr", - llvm::cl::desc("Use llvm.dbg.addr for all local variables"), - cl::init(false)); + llvm::cl::desc("Use llvm.dbg.addr for all local variables"), + cl::init(false), cl::Hidden); -DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes) - : M(m), VMContext(M.getContext()), CUNode(nullptr), +DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU) + : M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr), ValueFn(nullptr), AllowUnresolvedNodes(AllowUnresolvedNodes) {} diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index df0c52d44636..7fff7526b926 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm-c/DebugInfo.h" -#include "LLVMContextImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 940c4d1f3666..75ddd47b2591 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -750,12 +750,17 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const { return false; } -DIExpression *DIExpression::prepend(const DIExpression *Expr, bool Deref, - int64_t Offset, bool StackValue) { +DIExpression *DIExpression::prepend(const DIExpression *Expr, bool DerefBefore, + int64_t Offset, bool DerefAfter, + bool StackValue) { SmallVector Ops; + if (DerefBefore) + Ops.push_back(dwarf::DW_OP_deref); + appendOffset(Ops, Offset); - if (Deref) + if (DerefAfter) Ops.push_back(dwarf::DW_OP_deref); + if (Expr) for (auto Op : Expr->expr_ops()) { // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index 6297395b4c00..0a494119c3fe 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -10,7 +10,6 @@ #include "llvm/IR/DebugLoc.h" #include "LLVMContextImpl.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/IntrinsicInst.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 946df1a836ce..b81d205306b5 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -144,7 +144,7 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Value *V else if (auto *I = dyn_cast(V)) Loc = I->getDebugLoc(); - // Only include names that correspond to user variables. FIXME: we should use + // Only include names that correspond to user variables. FIXME: We should use // debug info if available to get the name of the user variable. if (isa(V) || isa(V)) Val = GlobalValue::dropLLVMManglingEscape(V->getName()); diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp index a5900e49ad00..e44e845b324d 100644 --- a/lib/IR/Dominators.cpp +++ b/lib/IR/Dominators.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -33,9 +34,9 @@ bool llvm::VerifyDomInfo = true; #else bool llvm::VerifyDomInfo = false; #endif -static cl::opt -VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), - cl::desc("Verify dominator info (time consuming)")); +static cl::opt + VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), cl::Hidden, + cl::desc("Verify dominator info (time consuming)")); bool BasicBlockEdge::isSingleEdge() const { const TerminatorInst *TI = Start->getTerminator(); @@ -315,7 +316,8 @@ void DominatorTree::verifyDomTree() const { DominatorTree OtherDT; OtherDT.recalculate(F); if (compare(OtherDT)) { - errs() << "DominatorTree is not up to date!\nComputed:\n"; + errs() << "DominatorTree for function " << F.getName() + << " is not up to date!\nComputed:\n"; print(errs()); errs() << "\nActual:\n"; OtherDT.print(errs()); @@ -388,3 +390,190 @@ void DominatorTreeWrapperPass::print(raw_ostream &OS, const Module *) const { DT.print(OS); } +//===----------------------------------------------------------------------===// +// DeferredDominance Implementation +//===----------------------------------------------------------------------===// +// +// The implementation details of the DeferredDominance class which allows +// one to queue updates to a DominatorTree. +// +//===----------------------------------------------------------------------===// + +/// \brief Queues multiple updates and discards duplicates. +void DeferredDominance::applyUpdates( + ArrayRef Updates) { + SmallVector Seen; + for (auto U : Updates) + // Avoid duplicates to applyUpdate() to save on analysis. + if (std::none_of(Seen.begin(), Seen.end(), + [U](DominatorTree::UpdateType S) { return S == U; })) { + Seen.push_back(U); + applyUpdate(U.getKind(), U.getFrom(), U.getTo()); + } +} + +/// \brief Helper method for a single edge insertion. It's almost always better +/// to batch updates and call applyUpdates to quickly remove duplicate edges. +/// This is best used when there is only a single insertion needed to update +/// Dominators. +void DeferredDominance::insertEdge(BasicBlock *From, BasicBlock *To) { + applyUpdate(DominatorTree::Insert, From, To); +} + +/// \brief Helper method for a single edge deletion. It's almost always better +/// to batch updates and call applyUpdates to quickly remove duplicate edges. +/// This is best used when there is only a single deletion needed to update +/// Dominators. +void DeferredDominance::deleteEdge(BasicBlock *From, BasicBlock *To) { + applyUpdate(DominatorTree::Delete, From, To); +} + +/// \brief Delays the deletion of a basic block until a flush() event. +void DeferredDominance::deleteBB(BasicBlock *DelBB) { + assert(DelBB && "Invalid push_back of nullptr DelBB."); + assert(pred_empty(DelBB) && "DelBB has one or more predecessors."); + // DelBB is unreachable and all its instructions are dead. + while (!DelBB->empty()) { + Instruction &I = DelBB->back(); + // Replace used instructions with an arbitrary value (undef). + if (!I.use_empty()) + I.replaceAllUsesWith(llvm::UndefValue::get(I.getType())); + DelBB->getInstList().pop_back(); + } + // Make sure DelBB has a valid terminator instruction. As long as DelBB is a + // Child of Function F it must contain valid IR. + new UnreachableInst(DelBB->getContext(), DelBB); + DeletedBBs.insert(DelBB); +} + +/// \brief Returns true if DelBB is awaiting deletion at a flush() event. +bool DeferredDominance::pendingDeletedBB(BasicBlock *DelBB) { + if (DeletedBBs.empty()) + return false; + return DeletedBBs.count(DelBB) != 0; +} + +/// \brief Flushes all pending updates and block deletions. Returns a +/// correct DominatorTree reference to be used by the caller for analysis. +DominatorTree &DeferredDominance::flush() { + // Updates to DT must happen before blocks are deleted below. Otherwise the + // DT traversal will encounter badref blocks and assert. + if (!PendUpdates.empty()) { + DT.applyUpdates(PendUpdates); + PendUpdates.clear(); + } + flushDelBB(); + return DT; +} + +/// \brief Drops all internal state and forces a (slow) recalculation of the +/// DominatorTree based on the current state of the LLVM IR in F. This should +/// only be used in corner cases such as the Entry block of F being deleted. +void DeferredDominance::recalculate(Function &F) { + // flushDelBB must be flushed before the recalculation. The state of the IR + // must be consistent before the DT traversal algorithm determines the + // actual DT. + if (flushDelBB() || !PendUpdates.empty()) { + DT.recalculate(F); + PendUpdates.clear(); + } +} + +/// \brief Debug method to help view the state of pending updates. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DeferredDominance::dump() const { + raw_ostream &OS = llvm::dbgs(); + OS << "PendUpdates:\n"; + int I = 0; + for (auto U : PendUpdates) { + OS << " " << I << " : "; + ++I; + if (U.getKind() == DominatorTree::Insert) + OS << "Insert, "; + else + OS << "Delete, "; + BasicBlock *From = U.getFrom(); + if (From) { + auto S = From->getName(); + if (!From->hasName()) + S = "(no name)"; + OS << S << "(" << From << "), "; + } else { + OS << "(badref), "; + } + BasicBlock *To = U.getTo(); + if (To) { + auto S = To->getName(); + if (!To->hasName()) + S = "(no_name)"; + OS << S << "(" << To << ")\n"; + } else { + OS << "(badref)\n"; + } + } + OS << "DeletedBBs:\n"; + I = 0; + for (auto BB : DeletedBBs) { + OS << " " << I << " : "; + ++I; + if (BB->hasName()) + OS << BB->getName() << "("; + else + OS << "(no_name)("; + OS << BB << ")\n"; + } +} +#endif + +/// Apply an update (Kind, From, To) to the internal queued updates. The +/// update is only added when determined to be necessary. Checks for +/// self-domination, unnecessary updates, duplicate requests, and balanced +/// pairs of requests are all performed. Returns true if the update is +/// queued and false if it is discarded. +bool DeferredDominance::applyUpdate(DominatorTree::UpdateKind Kind, + BasicBlock *From, BasicBlock *To) { + if (From == To) + return false; // Cannot dominate self; discard update. + + // Discard updates by inspecting the current state of successors of From. + // Since applyUpdate() must be called *after* the Terminator of From is + // altered we can determine if the update is unnecessary. + bool HasEdge = std::any_of(succ_begin(From), succ_end(From), + [To](BasicBlock *B) { return B == To; }); + if (Kind == DominatorTree::Insert && !HasEdge) + return false; // Unnecessary Insert: edge does not exist in IR. + if (Kind == DominatorTree::Delete && HasEdge) + return false; // Unnecessary Delete: edge still exists in IR. + + // Analyze pending updates to determine if the update is unnecessary. + DominatorTree::UpdateType Update = {Kind, From, To}; + DominatorTree::UpdateType Invert = {Kind != DominatorTree::Insert + ? DominatorTree::Insert + : DominatorTree::Delete, + From, To}; + for (auto I = PendUpdates.begin(), E = PendUpdates.end(); I != E; ++I) { + if (Update == *I) + return false; // Discard duplicate updates. + if (Invert == *I) { + // Update and Invert are both valid (equivalent to a no-op). Remove + // Invert from PendUpdates and discard the Update. + PendUpdates.erase(I); + return false; + } + } + PendUpdates.push_back(Update); // Save the valid update. + return true; +} + +/// Performs all pending basic block deletions. We have to defer the deletion +/// of these blocks until after the DominatorTree updates are applied. The +/// internal workings of the DominatorTree code expect every update's From +/// and To blocks to exist and to be a member of the same Function. +bool DeferredDominance::flushDelBB() { + if (DeletedBBs.empty()) + return false; + for (auto *BB : DeletedBBs) + BB->eraseFromParent(); + DeletedBBs.clear(); + return true; +} diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index d47f63a9b157..24f2f3bab886 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Function.h" -#include "LLVMContextImpl.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" @@ -57,6 +56,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file... @@ -1321,24 +1321,43 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) { setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } -void Function::setEntryCount(uint64_t Count, +void Function::setEntryCount(ProfileCount Count, const DenseSet *S) { + assert(Count.hasValue()); +#if !defined(NDEBUG) + auto PrevCount = getEntryCount(); + assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType()); +#endif MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S)); + setMetadata( + LLVMContext::MD_prof, + MDB.createFunctionEntryCount(Count.getCount(), Count.isSynthetic(), S)); } -Optional Function::getEntryCount() const { +void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type, + const DenseSet *Imports) { + setEntryCount(ProfileCount(Count, Type), Imports); +} + +ProfileCount Function::getEntryCount() const { MDNode *MD = getMetadata(LLVMContext::MD_prof); if (MD && MD->getOperand(0)) - if (MDString *MDS = dyn_cast(MD->getOperand(0))) + if (MDString *MDS = dyn_cast(MD->getOperand(0))) { if (MDS->getString().equals("function_entry_count")) { ConstantInt *CI = mdconst::extract(MD->getOperand(1)); uint64_t Count = CI->getValue().getZExtValue(); - if (Count == 0) - return None; - return Count; + // A value of -1 is used for SamplePGO when there were no samples. + // Treat this the same as unknown. + if (Count == (uint64_t)-1) + return ProfileCount::getInvalid(); + return ProfileCount(Count, PCT_Real); + } else if (MDS->getString().equals("synthetic_function_entry_count")) { + ConstantInt *CI = mdconst::extract(MD->getOperand(1)); + uint64_t Count = CI->getValue().getZExtValue(); + return ProfileCount(Count, PCT_Synthetic); } - return None; + } + return ProfileCount::getInvalid(); } DenseSet Function::getImportGUIDs() const { diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 027c0255bcec..99795f54138c 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" @@ -83,13 +84,16 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { Ptr = getCastedInt8PtrValue(Ptr); - Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)}; Type *Tys[] = { Ptr->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); - + + if (Align > 0) + cast(CI)->setDestAlignment(Align); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); @@ -99,7 +103,7 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - + return CI; } @@ -107,16 +111,20 @@ CallInst *IRBuilderBase:: CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { + assert((Align == 0 || isPowerOf2_32(Align)) && "Must be 0 or a power of 2"); Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)}; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); - + + if (Align > 0) + cast(CI)->setAlignment(Align); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); @@ -130,7 +138,7 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - + return CI; } @@ -154,8 +162,9 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the alignment of the pointer args. - CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); - CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign)); + auto *AMCI = cast(CI); + AMCI->setDestAlignment(DstAlign); + AMCI->setSourceAlignment(SrcAlign); // Set the TBAA info if present. if (TBAATag) @@ -178,16 +187,21 @@ CallInst *IRBuilderBase:: CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { + assert((Align == 0 || isPowerOf2_32(Align)) && "Must be 0 or a power of 2"); Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + + Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)}; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); - + + auto *MMI = cast(CI); + if (Align > 0) + MMI->setAlignment(Align); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 4c8afda18b71..3b32814bed5c 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -44,8 +44,12 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner) PreservedAnalyses PrintFunctionPass::run(Function &F, FunctionAnalysisManager &) { - if (isFunctionInPrintList(F.getName())) - OS << Banner << static_cast(F); + if (isFunctionInPrintList(F.getName())) { + if (forcePrintModuleIR()) + OS << Banner << " (function: " << F.getName() << ")\n" << *F.getParent(); + else + OS << Banner << static_cast(F); + } return PreservedAnalyses::all(); } diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 1d371b019018..215c69072568 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -13,11 +13,9 @@ #include "llvm/IR/Instruction.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -591,6 +589,11 @@ bool Instruction::mayThrow() const { return isa(this); } +bool Instruction::isSafeToRemove() const { + return (!isa(this) || !this->mayHaveSideEffects()) && + !isa(this); +} + bool Instruction::isAssociative() const { unsigned Opcode = getOpcode(); if (isAssociative(Opcode)) diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 995e1e570340..8bd9ed6ef0fa 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -75,21 +75,25 @@ PrintAfter("print-after", llvm::cl::desc("Print IR after specified passes"), cl::Hidden); +static cl::opt PrintBeforeAll("print-before-all", + llvm::cl::desc("Print IR before each pass"), + cl::init(false), cl::Hidden); +static cl::opt PrintAfterAll("print-after-all", + llvm::cl::desc("Print IR after each pass"), + cl::init(false), cl::Hidden); + static cl::opt -PrintBeforeAll("print-before-all", - llvm::cl::desc("Print IR before each pass"), - cl::init(false)); -static cl::opt -PrintAfterAll("print-after-all", - llvm::cl::desc("Print IR after each pass"), - cl::init(false)); + PrintModuleScope("print-module-scope", + cl::desc("When printing IR for print-[before|after]{-all} " + "always print a module IR"), + cl::init(false)); static cl::list PrintFuncsList("filter-print-funcs", cl::value_desc("function names"), cl::desc("Only print IR for functions whose name " "match this for all print-[before|after][-all] " "options"), - cl::CommaSeparated); + cl::CommaSeparated, cl::Hidden); /// This is a helper to determine whether to print IR before or /// after a pass. @@ -117,6 +121,8 @@ static bool ShouldPrintAfterPass(const PassInfo *PI) { return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter); } +bool llvm::forcePrintModuleIR() { return PrintModuleScope; } + bool llvm::isFunctionInPrintList(StringRef FunctionName) { static std::unordered_set PrintFuncNames(PrintFuncsList.begin(), PrintFuncsList.end()); @@ -1729,9 +1735,9 @@ bool PassManager::run(Module &M) { // TimingInfo implementation bool llvm::TimePassesIsEnabled = false; -static cl::opt -EnableTiming("time-passes", cl::location(TimePassesIsEnabled), - cl::desc("Time each pass, printing elapsed time for each on exit")); +static cl::opt EnableTiming( + "time-passes", cl::location(TimePassesIsEnabled), cl::Hidden, + cl::desc("Time each pass, printing elapsed time for each on exit")); // createTheTimeInfo - This method either initializes the TheTimeInfo pointer to // a non-null value (if the -time-passes option is enabled) or it leaves it diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index d8e64db7c5d8..c32a989ef2c7 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -58,10 +58,14 @@ MDNode *MDBuilder::createUnpredictable() { } MDNode *MDBuilder::createFunctionEntryCount( - uint64_t Count, const DenseSet *Imports) { + uint64_t Count, bool Synthetic, + const DenseSet *Imports) { Type *Int64Ty = Type::getInt64Ty(Context); SmallVector Ops; - Ops.push_back(createString("function_entry_count")); + if (Synthetic) + Ops.push_back(createString("synthetic_function_entry_count")); + else + Ops.push_back(createString("function_entry_count")); Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count))); if (Imports) { SmallVector OrderID(Imports->begin(), Imports->end()); @@ -157,7 +161,7 @@ MDNode *MDBuilder::createTBAAStructNode(ArrayRef Fields) { for (unsigned i = 0, e = Fields.size(); i != e; ++i) { Vals[i * 3 + 0] = createConstant(ConstantInt::get(Int64, Fields[i].Offset)); Vals[i * 3 + 1] = createConstant(ConstantInt::get(Int64, Fields[i].Size)); - Vals[i * 3 + 2] = Fields[i].TBAA; + Vals[i * 3 + 2] = Fields[i].Type; } return MDNode::get(Context, Vals); } @@ -198,6 +202,63 @@ MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)}); } +MDNode *MDBuilder::createTBAATypeNode(MDNode *Parent, uint64_t Size, + Metadata *Id, + ArrayRef Fields) { + SmallVector Ops(3 + Fields.size() * 3); + Type *Int64 = Type::getInt64Ty(Context); + Ops[0] = Parent; + Ops[1] = createConstant(ConstantInt::get(Int64, Size)); + Ops[2] = Id; + for (unsigned I = 0, E = Fields.size(); I != E; ++I) { + Ops[I * 3 + 3] = Fields[I].Type; + Ops[I * 3 + 4] = createConstant(ConstantInt::get(Int64, Fields[I].Offset)); + Ops[I * 3 + 5] = createConstant(ConstantInt::get(Int64, Fields[I].Size)); + } + return MDNode::get(Context, Ops); +} + +MDNode *MDBuilder::createTBAAAccessTag(MDNode *BaseType, MDNode *AccessType, + uint64_t Offset, uint64_t Size, + bool IsImmutable) { + IntegerType *Int64 = Type::getInt64Ty(Context); + auto *OffsetNode = createConstant(ConstantInt::get(Int64, Offset)); + auto *SizeNode = createConstant(ConstantInt::get(Int64, Size)); + if (IsImmutable) { + auto *ImmutabilityFlagNode = createConstant(ConstantInt::get(Int64, 1)); + return MDNode::get(Context, {BaseType, AccessType, OffsetNode, SizeNode, + ImmutabilityFlagNode}); + } + return MDNode::get(Context, {BaseType, AccessType, OffsetNode, SizeNode}); +} + +MDNode *MDBuilder::createMutableTBAAAccessTag(MDNode *Tag) { + MDNode *BaseType = cast(Tag->getOperand(1)); + MDNode *AccessType = cast(Tag->getOperand(1)); + Metadata *OffsetNode = Tag->getOperand(2); + uint64_t Offset = mdconst::extract(OffsetNode)->getZExtValue(); + + bool NewFormat = isa(AccessType->getOperand(0)); + + // See if the tag is already mutable. + unsigned ImmutabilityFlagOp = NewFormat ? 4 : 3; + if (Tag->getNumOperands() <= ImmutabilityFlagOp) + return Tag; + + // If Tag is already mutable then return it. + Metadata *ImmutabilityFlagNode = Tag->getOperand(ImmutabilityFlagOp); + if (!mdconst::extract(ImmutabilityFlagNode)->getValue()) + return Tag; + + // Otherwise, create another node. + if (!NewFormat) + return createTBAAStructTagNode(BaseType, AccessType, Offset); + + Metadata *SizeNode = Tag->getOperand(3); + uint64_t Size = mdconst::extract(SizeNode)->getZExtValue(); + return createTBAAAccessTag(BaseType, AccessType, Offset, Size); +} + MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { SmallVector Vals(2); Vals[0] = createString("loop_header_weight"); diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp index 03723bfd2ddb..7adcc59f571e 100644 --- a/lib/IR/Mangler.cpp +++ b/lib/IR/Mangler.cpp @@ -204,3 +204,13 @@ void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV, OS << ",data"; } } + +void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV, + const Triple &T, Mangler &M) { + if (!T.isKnownWindowsMSVCEnvironment()) + return; + + OS << " /INCLUDE:"; + M.getNameWithPrefix(OS, GV, false); +} + diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp index 51c4bae3332e..ce4c8cc3c807 100644 --- a/lib/IR/ModuleSummaryIndex.cpp +++ b/lib/IR/ModuleSummaryIndex.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Support/Path.h" using namespace llvm; // Collect for the given module the list of function it defines @@ -69,3 +70,247 @@ bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { return true; return false; } + +namespace { +struct Attributes { + void add(const Twine &Name, const Twine &Value, + const Twine &Comment = Twine()); + std::string getAsString() const; + + std::vector Attrs; + std::string Comments; +}; + +struct Edge { + uint64_t SrcMod; + int Hotness; + GlobalValue::GUID Src; + GlobalValue::GUID Dst; +}; +} + +void Attributes::add(const Twine &Name, const Twine &Value, + const Twine &Comment) { + std::string A = Name.str(); + A += "=\""; + A += Value.str(); + A += "\""; + Attrs.push_back(A); + if (!Comment.isTriviallyEmpty()) { + if (Comments.empty()) + Comments = " // "; + else + Comments += ", "; + Comments += Comment.str(); + } +} + +std::string Attributes::getAsString() const { + if (Attrs.empty()) + return ""; + + std::string Ret = "["; + for (auto &A : Attrs) + Ret += A + ","; + Ret.pop_back(); + Ret += "];"; + Ret += Comments; + return Ret; +} + +static std::string linkageToString(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::ExternalLinkage: + return "extern"; + case GlobalValue::AvailableExternallyLinkage: + return "av_ext"; + case GlobalValue::LinkOnceAnyLinkage: + return "linkonce"; + case GlobalValue::LinkOnceODRLinkage: + return "linkonce_odr"; + case GlobalValue::WeakAnyLinkage: + return "weak"; + case GlobalValue::WeakODRLinkage: + return "weak_odr"; + case GlobalValue::AppendingLinkage: + return "appending"; + case GlobalValue::InternalLinkage: + return "internal"; + case GlobalValue::PrivateLinkage: + return "private"; + case GlobalValue::ExternalWeakLinkage: + return "extern_weak"; + case GlobalValue::CommonLinkage: + return "common"; + } + + return ""; +} + +static std::string fflagsToString(FunctionSummary::FFlags F) { + auto FlagValue = [](unsigned V) { return V ? '1' : '0'; }; + char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly), + FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias), 0}; + + return FlagRep; +} + +// Get string representation of function instruction count and flags. +static std::string getSummaryAttributes(GlobalValueSummary* GVS) { + auto *FS = dyn_cast_or_null(GVS); + if (!FS) + return ""; + + return std::string("inst: ") + std::to_string(FS->instCount()) + + ", ffl: " + fflagsToString(FS->fflags()); +} + +static std::string getNodeVisualName(const ValueInfo &VI) { + return VI.name().empty() ? std::string("@") + std::to_string(VI.getGUID()) + : VI.name().str(); +} + +static std::string getNodeLabel(const ValueInfo &VI, GlobalValueSummary *GVS) { + if (isa(GVS)) + return getNodeVisualName(VI); + + std::string Attrs = getSummaryAttributes(GVS); + std::string Label = + getNodeVisualName(VI) + "|" + linkageToString(GVS->linkage()); + if (!Attrs.empty()) + Label += std::string(" (") + Attrs + ")"; + Label += "}"; + + return Label; +} + +// Write definition of external node, which doesn't have any +// specific module associated with it. Typically this is function +// or variable defined in native object or library. +static void defineExternalNode(raw_ostream &OS, const char *Pfx, + const ValueInfo &VI) { + auto StrId = std::to_string(VI.getGUID()); + OS << " " << StrId << " [label=\"" << getNodeVisualName(VI) + << "\"]; // defined externally\n"; +} + +void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const { + std::vector CrossModuleEdges; + DenseMap> NodeMap; + StringMap ModuleToDefinedGVS; + collectDefinedGVSummariesPerModule(ModuleToDefinedGVS); + + // Get node identifier in form MXXX_. The MXXX prefix is required, + // because we may have multiple linkonce functions summaries. + auto NodeId = [](uint64_t ModId, GlobalValue::GUID Id) { + return ModId == (uint64_t)-1 ? std::to_string(Id) + : std::string("M") + std::to_string(ModId) + + "_" + std::to_string(Id); + }; + + auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId, + int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { + // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown + // hotness, ... + TypeOrHotness += 2; + static const char *EdgeAttrs[] = { + " [style=dotted]; // alias", + " [style=dashed]; // ref", + " // call (hotness : Unknown)", + " [color=blue]; // call (hotness : Cold)", + " // call (hotness : None)", + " [color=brown]; // call (hotness : Hot)", + " [style=bold,color=red]; // call (hotness : Critical)"}; + + assert(static_cast(TypeOrHotness) < + sizeof(EdgeAttrs) / sizeof(EdgeAttrs[0])); + OS << Pfx << NodeId(SrcMod, SrcId) << " -> " << NodeId(DstMod, DstId) + << EdgeAttrs[TypeOrHotness] << "\n"; + }; + + OS << "digraph Summary {\n"; + for (auto &ModIt : ModuleToDefinedGVS) { + auto ModId = getModuleId(ModIt.first()); + OS << " // Module: " << ModIt.first() << "\n"; + OS << " subgraph cluster_" << std::to_string(ModId) << " {\n"; + OS << " style = filled;\n"; + OS << " color = lightgrey;\n"; + OS << " label = \"" << sys::path::filename(ModIt.first()) << "\";\n"; + OS << " node [style=filled,fillcolor=lightblue];\n"; + + auto &GVSMap = ModIt.second; + auto Draw = [&](GlobalValue::GUID IdFrom, GlobalValue::GUID IdTo, int Hotness) { + if (!GVSMap.count(IdTo)) { + CrossModuleEdges.push_back({ModId, Hotness, IdFrom, IdTo}); + return; + } + DrawEdge(" ", ModId, IdFrom, ModId, IdTo, Hotness); + }; + + for (auto &SummaryIt : GVSMap) { + NodeMap[SummaryIt.first].push_back(ModId); + auto Flags = SummaryIt.second->flags(); + Attributes A; + if (isa(SummaryIt.second)) { + A.add("shape", "record", "function"); + } else if (isa(SummaryIt.second)) { + A.add("style", "dotted,filled", "alias"); + A.add("shape", "box"); + } else { + A.add("shape", "Mrecord", "variable"); + } + + auto VI = getValueInfo(SummaryIt.first); + A.add("label", getNodeLabel(VI, SummaryIt.second)); + if (!Flags.Live) + A.add("fillcolor", "red", "dead"); + else if (Flags.NotEligibleToImport) + A.add("fillcolor", "yellow", "not eligible to import"); + + OS << " " << NodeId(ModId, SummaryIt.first) << " " << A.getAsString() + << "\n"; + } + OS << " // Edges:\n"; + + for (auto &SummaryIt : GVSMap) { + auto *GVS = SummaryIt.second; + for (auto &R : GVS->refs()) + Draw(SummaryIt.first, R.getGUID(), -1); + + if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { + auto AliaseeOrigId = AS->getAliasee().getOriginalName(); + auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId); + + Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2); + continue; + } + + if (auto *FS = dyn_cast_or_null(SummaryIt.second)) + for (auto &CGEdge : FS->calls()) + Draw(SummaryIt.first, CGEdge.first.getGUID(), + static_cast(CGEdge.second.Hotness)); + } + OS << " }\n"; + } + + OS << " // Cross-module edges:\n"; + for (auto &E : CrossModuleEdges) { + auto &ModList = NodeMap[E.Dst]; + if (ModList.empty()) { + defineExternalNode(OS, " ", getValueInfo(E.Dst)); + // Add fake module to the list to draw an edge to an external node + // in the loop below. + ModList.push_back(-1); + } + for (auto DstMod : ModList) + // The edge representing call or ref is drawn to every module where target + // symbol is defined. When target is a linkonce symbol there can be + // multiple edges representing a single call or ref, both intra-module and + // cross-module. As we've already drawn all intra-module edges before we + // skip it here. + if (DstMod != E.SrcMod) + DrawEdge(" ", E.SrcMod, E.Src, DstMod, E.Dst, E.Hotness); + } + + OS << "}"; +} diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp index 02382afb8c49..04deb434cec2 100644 --- a/lib/IR/SafepointIRVerifier.cpp +++ b/lib/IR/SafepointIRVerifier.cpp @@ -32,6 +32,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/BasicBlock.h" @@ -136,92 +137,25 @@ static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) { /// correctly relocated value at that point, and is a subset of the set of /// definitions dominating that point. +using AvailableValueSet = DenseSet; + /// State we compute and track per basic block. struct BasicBlockState { // Set of values available coming in, before the phi nodes - DenseSet AvailableIn; + AvailableValueSet AvailableIn; // Set of values available going out - DenseSet AvailableOut; + AvailableValueSet AvailableOut; // AvailableOut minus AvailableIn. // All elements are Instructions - DenseSet Contribution; + AvailableValueSet Contribution; // True if this block contains a safepoint and thus AvailableIn does not // contribute to AvailableOut. bool Cleared = false; }; - -/// Gather all the definitions dominating the start of BB into Result. This is -/// simply the Defs introduced by every dominating basic block and the function -/// arguments. -static void GatherDominatingDefs(const BasicBlock *BB, - DenseSet &Result, - const DominatorTree &DT, - DenseMap &BlockMap) { - DomTreeNode *DTN = DT[const_cast(BB)]; - - while (DTN->getIDom()) { - DTN = DTN->getIDom(); - const auto &Defs = BlockMap[DTN->getBlock()]->Contribution; - Result.insert(Defs.begin(), Defs.end()); - // If this block is 'Cleared', then nothing LiveIn to this block can be - // available after this block completes. Note: This turns out to be - // really important for reducing memory consuption of the initial available - // sets and thus peak memory usage by this verifier. - if (BlockMap[DTN->getBlock()]->Cleared) - return; - } - - for (const Argument &A : BB->getParent()->args()) - if (containsGCPtrType(A.getType())) - Result.insert(&A); -} - -/// Model the effect of an instruction on the set of available values. -static void TransferInstruction(const Instruction &I, bool &Cleared, - DenseSet &Available) { - if (isStatepoint(I)) { - Cleared = true; - Available.clear(); - } else if (containsGCPtrType(I.getType())) - Available.insert(&I); -} - -/// Compute the AvailableOut set for BB, based on the -/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set -/// when the verifier runs for the first time computing the AvailableOut set -/// for BB. -static void TransferBlock(const BasicBlock *BB, - BasicBlockState &BBS, bool FirstPass) { - - const DenseSet &AvailableIn = BBS.AvailableIn; - DenseSet &AvailableOut = BBS.AvailableOut; - - if (BBS.Cleared) { - // AvailableOut does not change no matter how the input changes, just - // leave it be. We need to force this calculation the first time so that - // we have a AvailableOut at all. - if (FirstPass) { - AvailableOut = BBS.Contribution; - } - } else { - // Otherwise, we need to reduce the AvailableOut set by things which are no - // longer in our AvailableIn - DenseSet Temp = BBS.Contribution; - set_union(Temp, AvailableIn); - AvailableOut = std::move(Temp); - } - - DEBUG(dbgs() << "Transfered block " << BB->getName() << " from "; - PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end()); - dbgs() << " to "; - PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end()); - dbgs() << "\n";); -} - /// A given derived pointer can have multiple base pointers through phi/selects. /// This type indicates when the base pointer is exclusively constant /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively @@ -293,32 +227,224 @@ static enum BaseType getBaseType(const Value *Val) { : BaseType::ExclusivelySomeConstant; } -static void Verify(const Function &F, const DominatorTree &DT) { +static bool isNotExclusivelyConstantDerived(const Value *V) { + return getBaseType(V) == BaseType::NonConstant; +} + +namespace { +class InstructionVerifier; + +/// Builds BasicBlockState for each BB of the function. +/// It can traverse function for verification and provides all required +/// information. +/// +/// GC pointer may be in one of three states: relocated, unrelocated and +/// poisoned. +/// Relocated pointer may be used without any restrictions. +/// Unrelocated pointer cannot be dereferenced, passed as argument to any call +/// or returned. Unrelocated pointer may be safely compared against another +/// unrelocated pointer or against a pointer exclusively derived from null. +/// Poisoned pointers are produced when we somehow derive pointer from relocated +/// and unrelocated pointers (e.g. phi, select). This pointers may be safely +/// used in a very limited number of situations. Currently the only way to use +/// it is comparison against constant exclusively derived from null. All +/// limitations arise due to their undefined state: this pointers should be +/// treated as relocated and unrelocated simultaneously. +/// Rules of deriving: +/// R + U = P - that's where the poisoned pointers come from +/// P + X = P +/// U + U = U +/// R + R = R +/// X + C = X +/// Where "+" - any operation that somehow derive pointer, U - unrelocated, +/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or +/// nothing (in case when "+" is unary operation). +/// Deriving of pointers by itself is always safe. +/// NOTE: when we are making decision on the status of instruction's result: +/// a) for phi we need to check status of each input *at the end of +/// corresponding predecessor BB*. +/// b) for other instructions we need to check status of each input *at the +/// current point*. +/// +/// FIXME: This works fairly well except one case +/// bb1: +/// p = *some GC-ptr def* +/// p1 = gep p, offset +/// / | +/// / | +/// bb2: | +/// safepoint | +/// \ | +/// \ | +/// bb3: +/// p2 = phi [p, bb2] [p1, bb1] +/// p3 = phi [p, bb2] [p, bb1] +/// here p and p1 is unrelocated +/// p2 and p3 is poisoned (though they shouldn't be) +/// +/// This leads to some weird results: +/// cmp eq p, p2 - illegal instruction (false-positive) +/// cmp eq p1, p2 - illegal instruction (false-positive) +/// cmp eq p, p3 - illegal instruction (false-positive) +/// cmp eq p, p1 - ok +/// To fix this we need to introduce conception of generations and be able to +/// check if two values belong to one generation or not. This way p2 will be +/// considered to be unrelocated and no false alarm will happen. +class GCPtrTracker { + const Function &F; SpecificBumpPtrAllocator BSAllocator; DenseMap BlockMap; - - DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"); - if (PrintOnly) - dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"; + // This set contains defs of unrelocated pointers that are proved to be legal + // and don't need verification. + DenseSet ValidUnrelocatedDefs; + // This set contains poisoned defs. They can be safely ignored during + // verification too. + DenseSet PoisonedDefs; + +public: + GCPtrTracker(const Function &F, const DominatorTree &DT); + + BasicBlockState *getBasicBlockState(const BasicBlock *BB); + const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const; + + bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); } + + /// Traverse each BB of the function and call + /// InstructionVerifier::verifyInstruction for each possibly invalid + /// instruction. + /// It destructively modifies GCPtrTracker so it's passed via rvalue reference + /// in order to prohibit further usages of GCPtrTracker as it'll be in + /// inconsistent state. + static void verifyFunction(GCPtrTracker &&Tracker, + InstructionVerifier &Verifier); + +private: + /// Returns true if the instruction may be safely skipped during verification. + bool instructionMayBeSkipped(const Instruction *I) const; + + /// Iterates over all BBs from BlockMap and recalculates AvailableIn/Out for + /// each of them until it converges. + void recalculateBBsStates(); + + /// Remove from Contribution all defs that legally produce unrelocated + /// pointers and saves them to ValidUnrelocatedDefs. + /// Though Contribution should belong to BBS it is passed separately with + /// different const-modifier in order to emphasize (and guarantee) that only + /// Contribution will be changed. + /// Returns true if Contribution was changed otherwise false. + bool removeValidUnrelocatedDefs(const BasicBlock *BB, + const BasicBlockState *BBS, + AvailableValueSet &Contribution); + + /// Gather all the definitions dominating the start of BB into Result. This is + /// simply the defs introduced by every dominating basic block and the + /// function arguments. + void gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result, + const DominatorTree &DT); + + /// Compute the AvailableOut set for BB, based on the BasicBlockState BBS, + /// which is the BasicBlockState for BB. + /// ContributionChanged is set when the verifier runs for the first time + /// (in this case Contribution was changed from 'empty' to its initial state) + /// or when Contribution of this BB was changed since last computation. + static void transferBlock(const BasicBlock *BB, BasicBlockState &BBS, + bool ContributionChanged); + + /// Model the effect of an instruction on the set of available values. + static void transferInstruction(const Instruction &I, bool &Cleared, + AvailableValueSet &Available); +}; +/// It is a visitor for GCPtrTracker::verifyFunction. It decides if the +/// instruction (which uses heap reference) is legal or not, given our safepoint +/// semantics. +class InstructionVerifier { + bool AnyInvalidUses = false; + +public: + void verifyInstruction(const GCPtrTracker *Tracker, const Instruction &I, + const AvailableValueSet &AvailableSet); + + bool hasAnyInvalidUses() const { return AnyInvalidUses; } + +private: + void reportInvalidUse(const Value &V, const Instruction &I); +}; +} // end anonymous namespace +GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT) : F(F) { + // First, calculate Contribution of each BB. for (const BasicBlock &BB : F) { - BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState; + BasicBlockState *BBS = new (BSAllocator.Allocate()) BasicBlockState; for (const auto &I : BB) - TransferInstruction(I, BBS->Cleared, BBS->Contribution); + transferInstruction(I, BBS->Cleared, BBS->Contribution); BlockMap[&BB] = BBS; } + // Initialize AvailableIn/Out sets of each BB using only information about + // dominating BBs. for (auto &BBI : BlockMap) { - GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap); - TransferBlock(BBI.first, *BBI.second, true); + gatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT); + transferBlock(BBI.first, *BBI.second, true); } + // Simulate the flow of defs through the CFG and recalculate AvailableIn/Out + // sets of each BB until it converges. If any def is proved to be an + // unrelocated pointer, it will be removed from all BBSs. + recalculateBBsStates(); +} + +BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) { + auto it = BlockMap.find(BB); + assert(it != BlockMap.end() && + "No such BB in BlockMap! Probably BB from another function"); + return it->second; +} + +const BasicBlockState *GCPtrTracker::getBasicBlockState( + const BasicBlock *BB) const { + return const_cast(this)->getBasicBlockState(BB); +} + +bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const { + // Poisoned defs are skipped since they are always safe by itself by + // definition (for details see comment to this class). + return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I); +} + +void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker, + InstructionVerifier &Verifier) { + // We need RPO here to a) report always the first error b) report errors in + // same order from run to run. + ReversePostOrderTraversal RPOT(&Tracker.F); + for (const BasicBlock *BB : RPOT) { + BasicBlockState *BBS = Tracker.getBasicBlockState(BB); + // We destructively modify AvailableIn as we traverse the block instruction + // by instruction. + AvailableValueSet &AvailableSet = BBS->AvailableIn; + for (const Instruction &I : *BB) { + if (Tracker.instructionMayBeSkipped(&I)) + continue; // This instruction shouldn't be added to AvailableSet. + + Verifier.verifyInstruction(&Tracker, I, AvailableSet); + + // Model the effect of current instruction on AvailableSet to keep the set + // relevant at each point of BB. + bool Cleared = false; + transferInstruction(I, Cleared, AvailableSet); + (void)Cleared; + } + } +} + +void GCPtrTracker::recalculateBBsStates() { SetVector Worklist; + // TODO: This order is suboptimal, it's better to replace it with priority + // queue where priority is RPO number of BB. for (auto &BBI : BlockMap) Worklist.insert(BBI.first); - // This loop iterates the AvailableIn and AvailableOut sets to a fixed point. + // This loop iterates the AvailableIn/Out sets until it converges. // The AvailableIn and AvailableOut sets decrease as we iterate. while (!Worklist.empty()) { const BasicBlock *BB = Worklist.pop_back_val(); @@ -328,111 +454,263 @@ static void Verify(const Function &F, const DominatorTree &DT) { for (const BasicBlock *PBB : predecessors(BB)) set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut); - if (OldInCount == BBS->AvailableIn.size()) - continue; + assert(OldInCount >= BBS->AvailableIn.size() && "invariant!"); - assert(OldInCount > BBS->AvailableIn.size() && "invariant!"); + bool InputsChanged = OldInCount != BBS->AvailableIn.size(); + bool ContributionChanged = + removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution); + if (!InputsChanged && !ContributionChanged) + continue; size_t OldOutCount = BBS->AvailableOut.size(); - TransferBlock(BB, *BBS, false); + transferBlock(BB, *BBS, ContributionChanged); if (OldOutCount != BBS->AvailableOut.size()) { assert(OldOutCount > BBS->AvailableOut.size() && "invariant!"); Worklist.insert(succ_begin(BB), succ_end(BB)); } } +} - // We now have all the information we need to decide if the use of a heap - // reference is legal or not, given our safepoint semantics. - - bool AnyInvalidUses = false; - - auto ReportInvalidUse = [&AnyInvalidUses](const Value &V, - const Instruction &I) { - errs() << "Illegal use of unrelocated value found!\n"; - errs() << "Def: " << V << "\n"; - errs() << "Use: " << I << "\n"; - if (!PrintOnly) - abort(); - AnyInvalidUses = true; - }; - - auto isNotExclusivelyConstantDerived = [](const Value *V) { - return getBaseType(V) == BaseType::NonConstant; - }; - - for (const BasicBlock &BB : F) { - // We destructively modify AvailableIn as we traverse the block instruction - // by instruction. - DenseSet &AvailableSet = BlockMap[&BB]->AvailableIn; - for (const Instruction &I : BB) { - if (const PHINode *PN = dyn_cast(&I)) { - if (containsGCPtrType(PN->getType())) - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - const BasicBlock *InBB = PN->getIncomingBlock(i); - const Value *InValue = PN->getIncomingValue(i); - - if (isNotExclusivelyConstantDerived(InValue) && - !BlockMap[InBB]->AvailableOut.count(InValue)) - ReportInvalidUse(*InValue, *PN); +bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB, + const BasicBlockState *BBS, + AvailableValueSet &Contribution) { + assert(&BBS->Contribution == &Contribution && + "Passed Contribution should be from the passed BasicBlockState!"); + AvailableValueSet AvailableSet = BBS->AvailableIn; + bool ContributionChanged = false; + // For explanation why instructions are processed this way see + // "Rules of deriving" in the comment to this class. + for (const Instruction &I : *BB) { + bool ValidUnrelocatedPointerDef = false; + bool PoisonedPointerDef = false; + // TODO: `select` instructions should be handled here too. + if (const PHINode *PN = dyn_cast(&I)) { + if (containsGCPtrType(PN->getType())) { + // If both is true, output is poisoned. + bool HasRelocatedInputs = false; + bool HasUnrelocatedInputs = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue)) { + if (isValuePoisoned(InValue)) { + // If any of inputs is poisoned, output is always poisoned too. + HasRelocatedInputs = true; + HasUnrelocatedInputs = true; + break; + } + if (BlockMap[InBB]->AvailableOut.count(InValue)) + HasRelocatedInputs = true; + else + HasUnrelocatedInputs = true; } - } else if (isa(I) && - containsGCPtrType(I.getOperand(0)->getType())) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - enum BaseType baseTyLHS = getBaseType(LHS), - baseTyRHS = getBaseType(RHS); - - // Returns true if LHS and RHS are unrelocated pointers and they are - // valid unrelocated uses. - auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () { - // A cmp instruction has valid unrelocated pointer operands only if - // both operands are unrelocated pointers. - // In the comparison between two pointers, if one is an unrelocated - // use, the other *should be* an unrelocated use, for this - // instruction to contain valid unrelocated uses. This unrelocated - // use can be a null constant as well, or another unrelocated - // pointer. - if (AvailableSet.count(LHS) || AvailableSet.count(RHS)) - return false; - // Constant pointers (that are not exclusively null) may have - // meaning in different VMs, so we cannot reorder the compare - // against constant pointers before the safepoint. In other words, - // comparison of an unrelocated use against a non-null constant - // maybe invalid. - if ((baseTyLHS == BaseType::ExclusivelySomeConstant && - baseTyRHS == BaseType::NonConstant) || - (baseTyLHS == BaseType::NonConstant && - baseTyRHS == BaseType::ExclusivelySomeConstant)) - return false; - // All other cases are valid cases enumerated below: - // 1. Comparison between an exlusively derived null pointer and a - // constant base pointer. - // 2. Comparison between an exlusively derived null pointer and a - // non-constant unrelocated base pointer. - // 3. Comparison between 2 unrelocated pointers. - return true; - }; - if (!hasValidUnrelocatedUse()) { - // Print out all non-constant derived pointers that are unrelocated - // uses, which are invalid. - if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS)) - ReportInvalidUse(*LHS, I); - if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS)) - ReportInvalidUse(*RHS, I); } - } else { - for (const Value *V : I.operands()) - if (containsGCPtrType(V->getType()) && - isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) - ReportInvalidUse(*V, I); + if (HasUnrelocatedInputs) { + if (HasRelocatedInputs) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; + } } - + } else if ((isa(I) || isa(I)) && + containsGCPtrType(I.getType())) { + // GEP/bitcast of unrelocated pointer is legal by itself but this def + // shouldn't appear in any AvailableSet. + for (const Value *V : I.operands()) + if (containsGCPtrType(V->getType()) && + isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) { + if (isValuePoisoned(V)) + PoisonedPointerDef = true; + else + ValidUnrelocatedPointerDef = true; + break; + } + } + assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) && + "Value cannot be both unrelocated and poisoned!"); + if (ValidUnrelocatedPointerDef) { + // Remove def of unrelocated pointer from Contribution of this BB and + // trigger update of all its successors. + Contribution.erase(&I); + PoisonedDefs.erase(&I); + ValidUnrelocatedDefs.insert(&I); + DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of " + << BB->getName() << "\n"); + ContributionChanged = true; + } else if (PoisonedPointerDef) { + // Mark pointer as poisoned, remove its def from Contribution and trigger + // update of all successors. + Contribution.erase(&I); + PoisonedDefs.insert(&I); + DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of " + << BB->getName() << "\n"); + ContributionChanged = true; + } else { bool Cleared = false; - TransferInstruction(I, Cleared, AvailableSet); + transferInstruction(I, Cleared, AvailableSet); (void)Cleared; } } + return ContributionChanged; +} + +void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB, + AvailableValueSet &Result, + const DominatorTree &DT) { + DomTreeNode *DTN = DT[const_cast(BB)]; + + while (DTN->getIDom()) { + DTN = DTN->getIDom(); + const auto &Defs = BlockMap[DTN->getBlock()]->Contribution; + Result.insert(Defs.begin(), Defs.end()); + // If this block is 'Cleared', then nothing LiveIn to this block can be + // available after this block completes. Note: This turns out to be + // really important for reducing memory consuption of the initial available + // sets and thus peak memory usage by this verifier. + if (BlockMap[DTN->getBlock()]->Cleared) + return; + } + + for (const Argument &A : BB->getParent()->args()) + if (containsGCPtrType(A.getType())) + Result.insert(&A); +} + +void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS, + bool ContributionChanged) { + const AvailableValueSet &AvailableIn = BBS.AvailableIn; + AvailableValueSet &AvailableOut = BBS.AvailableOut; + + if (BBS.Cleared) { + // AvailableOut will change only when Contribution changed. + if (ContributionChanged) + AvailableOut = BBS.Contribution; + } else { + // Otherwise, we need to reduce the AvailableOut set by things which are no + // longer in our AvailableIn + AvailableValueSet Temp = BBS.Contribution; + set_union(Temp, AvailableIn); + AvailableOut = std::move(Temp); + } + + DEBUG(dbgs() << "Transfered block " << BB->getName() << " from "; + PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end()); + dbgs() << " to "; + PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end()); + dbgs() << "\n";); +} + +void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared, + AvailableValueSet &Available) { + if (isStatepoint(I)) { + Cleared = true; + Available.clear(); + } else if (containsGCPtrType(I.getType())) + Available.insert(&I); +} + +void InstructionVerifier::verifyInstruction( + const GCPtrTracker *Tracker, const Instruction &I, + const AvailableValueSet &AvailableSet) { + if (const PHINode *PN = dyn_cast(&I)) { + if (containsGCPtrType(PN->getType())) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue) && + !Tracker->getBasicBlockState(InBB)->AvailableOut.count(InValue)) + reportInvalidUse(*InValue, *PN); + } + } else if (isa(I) && + containsGCPtrType(I.getOperand(0)->getType())) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + enum BaseType baseTyLHS = getBaseType(LHS), + baseTyRHS = getBaseType(RHS); + + // Returns true if LHS and RHS are unrelocated pointers and they are + // valid unrelocated uses. + auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS, + &LHS, &RHS] () { + // A cmp instruction has valid unrelocated pointer operands only if + // both operands are unrelocated pointers. + // In the comparison between two pointers, if one is an unrelocated + // use, the other *should be* an unrelocated use, for this + // instruction to contain valid unrelocated uses. This unrelocated + // use can be a null constant as well, or another unrelocated + // pointer. + if (AvailableSet.count(LHS) || AvailableSet.count(RHS)) + return false; + // Constant pointers (that are not exclusively null) may have + // meaning in different VMs, so we cannot reorder the compare + // against constant pointers before the safepoint. In other words, + // comparison of an unrelocated use against a non-null constant + // maybe invalid. + if ((baseTyLHS == BaseType::ExclusivelySomeConstant && + baseTyRHS == BaseType::NonConstant) || + (baseTyLHS == BaseType::NonConstant && + baseTyRHS == BaseType::ExclusivelySomeConstant)) + return false; + + // If one of pointers is poisoned and other is not exclusively derived + // from null it is an invalid expression: it produces poisoned result + // and unless we want to track all defs (not only gc pointers) the only + // option is to prohibit such instructions. + if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) || + (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull)) + return false; + + // All other cases are valid cases enumerated below: + // 1. Comparison between an exclusively derived null pointer and a + // constant base pointer. + // 2. Comparison between an exclusively derived null pointer and a + // non-constant unrelocated base pointer. + // 3. Comparison between 2 unrelocated pointers. + // 4. Comparison between a pointer exclusively derived from null and a + // non-constant poisoned pointer. + return true; + }; + if (!hasValidUnrelocatedUse()) { + // Print out all non-constant derived pointers that are unrelocated + // uses, which are invalid. + if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS)) + reportInvalidUse(*LHS, I); + if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS)) + reportInvalidUse(*RHS, I); + } + } else { + for (const Value *V : I.operands()) + if (containsGCPtrType(V->getType()) && + isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) + reportInvalidUse(*V, I); + } +} + +void InstructionVerifier::reportInvalidUse(const Value &V, + const Instruction &I) { + errs() << "Illegal use of unrelocated value found!\n"; + errs() << "Def: " << V << "\n"; + errs() << "Use: " << I << "\n"; + if (!PrintOnly) + abort(); + AnyInvalidUses = true; +} + +static void Verify(const Function &F, const DominatorTree &DT) { + DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"); + if (PrintOnly) + dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"; + + GCPtrTracker Tracker(F, DT); + + // We now have all the information we need to decide if the use of a heap + // reference is legal or not, given our safepoint semantics. + + InstructionVerifier Verifier; + GCPtrTracker::verifyFunction(std::move(Tracker), Verifier); - if (PrintOnly && !AnyInvalidUses) { + if (PrintOnly && !Verifier.hasAnyInvalidUses()) { dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName() << "\n"; } diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp index d46039107f33..041593f20b57 100644 --- a/lib/IR/User.cpp +++ b/lib/IR/User.cpp @@ -10,7 +10,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Constant.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Operator.h" namespace llvm { class BasicBlock; diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 50235d8d30f8..01b7aff0f154 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -39,6 +39,10 @@ using namespace llvm; +static cl::opt NonGlobalValueMaxNameSize( + "non-global-value-max-name-size", cl::Hidden, cl::init(1024), + cl::desc("Maximum size for the name of non-global values.")); + //===----------------------------------------------------------------------===// // Value Class //===----------------------------------------------------------------------===// @@ -244,6 +248,11 @@ void Value::setNameImpl(const Twine &NewName) { if (getName() == NameRef) return; + // Cap the size of non-GlobalValue names. + if (NameRef.size() > NonGlobalValueMaxNameSize && !isa(this)) + NameRef = + NameRef.substr(0, std::max(1u, (unsigned)NonGlobalValueMaxNameSize)); + assert(!getType()->isVoidTy() && "Cannot assign a name to void values!"); // Get the symbol table to update for this object. @@ -409,7 +418,7 @@ void Value::doRAUW(Value *New, bool NoMetadata) { if (!NoMetadata && isUsedByMetadata()) ValueAsMetadata::handleRAUW(this, New); - while (!use_empty()) { + while (!materialized_use_empty()) { Use &U = *UseList; // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. @@ -619,17 +628,18 @@ const Value *Value::stripInBoundsOffsets() const { return stripPointerCastsAndOffsets(this); } -unsigned Value::getPointerDereferenceableBytes(const DataLayout &DL, +uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull) const { assert(getType()->isPointerTy() && "must be pointer"); - unsigned DerefBytes = 0; + uint64_t DerefBytes = 0; CanBeNull = false; if (const Argument *A = dyn_cast(this)) { DerefBytes = A->getDereferenceableBytes(); - if (DerefBytes == 0 && A->hasByValAttr() && A->getType()->isSized()) { - DerefBytes = DL.getTypeStoreSize(A->getType()); - CanBeNull = false; + if (DerefBytes == 0 && (A->hasByValAttr() || A->hasStructRetAttr())) { + Type *PT = cast(A->getType())->getElementType(); + if (PT->isSized()) + DerefBytes = DL.getTypeStoreSize(PT); } if (DerefBytes == 0) { DerefBytes = A->getDereferenceableOrNullBytes(); @@ -655,7 +665,7 @@ unsigned Value::getPointerDereferenceableBytes(const DataLayout &DL, CanBeNull = true; } } else if (auto *AI = dyn_cast(this)) { - if (AI->getAllocatedType()->isSized()) { + if (!AI->isArrayAllocation()) { DerefBytes = DL.getTypeStoreSize(AI->getAllocatedType()); CanBeNull = false; } diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp index ccdabe0817b4..0da1990c3a3f 100644 --- a/lib/IR/ValueSymbolTable.cpp +++ b/lib/IR/ValueSymbolTable.cpp @@ -13,7 +13,9 @@ #include "llvm/IR/ValueSymbolTable.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -45,8 +47,17 @@ ValueName *ValueSymbolTable::makeUniqueName(Value *V, // Trim any suffix off and append the next number. UniqueName.resize(BaseSize); raw_svector_ostream S(UniqueName); - if (isa(V)) - S << "."; + if (auto *GV = dyn_cast(V)) { + // A dot is appended to mark it as clone during ABI demangling so that + // for example "_Z1fv" and "_Z1fv.1" both demangle to "f()", the second + // one being a clone. + // On NVPTX we cannot use a dot because PTX only allows [A-Za-z0-9_$] for + // identifiers. This breaks ABI demangling but at least ptxas accepts and + // compiles the program. + const Module *M = GV->getParent(); + if (!(M && Triple(M->getTargetTriple()).isNVPTX())) + S << "."; + } S << ++LastUnique; // Try insert the vmap entry with this suffix. diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index cf6ee063c2d5..037c157fda89 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -149,6 +148,7 @@ std::string EVT::getEVTString() const { case MVT::v16i1: return "v16i1"; case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; + case MVT::v128i1: return "v128i1"; case MVT::v512i1: return "v512i1"; case MVT::v1024i1: return "v1024i1"; case MVT::v1i8: return "v1i8"; @@ -228,6 +228,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128); case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024); case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); @@ -271,8 +272,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); - case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); - case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); + case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); + case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); case MVT::Metadata: return Type::getMetadataTy(Context); } } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 084eaba7064c..b73f9dfe4d6b 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -55,6 +55,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -75,7 +76,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" @@ -570,6 +570,15 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) { Assert(!GV.isDSOLocal(), "GlobalValue with DLLImport Storage is dso_local!", &GV); + if (GV.hasLocalLinkage()) + Assert(GV.isDSOLocal(), + "GlobalValue with private or internal linkage must be dso_local!", + &GV); + + if (!GV.hasDefaultVisibility() && !GV.hasExternalWeakLinkage()) + Assert(GV.isDSOLocal(), + "GlobalValue with non default visibility must be dso_local!", &GV); + forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool { if (const Instruction *I = dyn_cast(V)) { if (!I->getParent() || !I->getParent()->getParent()) @@ -906,9 +915,12 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) { } } +/// Detect mutually exclusive flags. static bool hasConflictingReferenceFlags(unsigned Flags) { - return (Flags & DINode::FlagLValueReference) && - (Flags & DINode::FlagRValueReference); + return ((Flags & DINode::FlagLValueReference) && + (Flags & DINode::FlagRValueReference)) || + ((Flags & DINode::FlagTypePassByValue) && + (Flags & DINode::FlagTypePassByReference)); } void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) { @@ -965,8 +977,23 @@ void Verifier::visitDISubroutineType(const DISubroutineType &N) { void Verifier::visitDIFile(const DIFile &N) { AssertDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N); - AssertDI((N.getChecksumKind() != DIFile::CSK_None || - N.getChecksum().empty()), "invalid checksum kind", &N); + AssertDI(N.getChecksumKind() <= DIFile::CSK_Last, "invalid checksum kind", + &N); + size_t Size; + switch (N.getChecksumKind()) { + case DIFile::CSK_None: + Size = 0; + break; + case DIFile::CSK_MD5: + Size = 32; + break; + case DIFile::CSK_SHA1: + Size = 40; + break; + } + AssertDI(N.getChecksum().size() == Size, "invalid checksum length", &N); + AssertDI(N.getChecksum().find_if_not(llvm::isHexDigit) == StringRef::npos, + "invalid checksum", &N); } void Verifier::visitDICompileUnit(const DICompileUnit &N) { @@ -1375,6 +1402,7 @@ static bool isFuncOnlyAttr(Attribute::AttrKind Kind) { case Attribute::NonLazyBind: case Attribute::ReturnsTwice: case Attribute::SanitizeAddress: + case Attribute::SanitizeHWAddress: case Attribute::SanitizeThread: case Attribute::SanitizeMemory: case Attribute::MinSize: @@ -1692,8 +1720,11 @@ void Verifier::verifyFunctionMetadata( "expected string with name of the !prof annotation", MD); MDString *MDS = cast(MD->getOperand(0)); StringRef ProfName = MDS->getString(); - Assert(ProfName.equals("function_entry_count"), - "first operand should be 'function_entry_count'", MD); + Assert(ProfName.equals("function_entry_count") || + ProfName.equals("synthetic_function_entry_count"), + "first operand should be 'function_entry_count'" + " or 'synthetic_function_entry_count'", + MD); // Check second operand. Assert(MD->getOperand(1) != nullptr, "second operand should not be null", @@ -2210,24 +2241,23 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { SmallVector Preds(pred_begin(&BB), pred_end(&BB)); SmallVector, 8> Values; std::sort(Preds.begin(), Preds.end()); - PHINode *PN; - for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast(I));++I) { + for (const PHINode &PN : BB.phis()) { // Ensure that PHI nodes have at least one entry! - Assert(PN->getNumIncomingValues() != 0, + Assert(PN.getNumIncomingValues() != 0, "PHI nodes must have at least one entry. If the block is dead, " "the PHI should be removed!", - PN); - Assert(PN->getNumIncomingValues() == Preds.size(), + &PN); + Assert(PN.getNumIncomingValues() == Preds.size(), "PHINode should have one entry for each predecessor of its " "parent basic block!", - PN); + &PN); // Get and sort all incoming values in the PHI node... Values.clear(); - Values.reserve(PN->getNumIncomingValues()); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - Values.push_back(std::make_pair(PN->getIncomingBlock(i), - PN->getIncomingValue(i))); + Values.reserve(PN.getNumIncomingValues()); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + Values.push_back( + std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i))); std::sort(Values.begin(), Values.end()); for (unsigned i = 0, e = Values.size(); i != e; ++i) { @@ -2239,12 +2269,12 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { Values[i].second == Values[i - 1].second, "PHI node has multiple entries for the same basic block with " "different incoming values!", - PN, Values[i].first, Values[i].second, Values[i - 1].second); + &PN, Values[i].first, Values[i].second, Values[i - 1].second); // Check to make sure that the predecessors and PHI node entries are // matched up. Assert(Values[i].first == Preds[i], - "PHI node entries do not match predecessors!", PN, + "PHI node entries do not match predecessors!", &PN, Values[i].first, Preds[i]); } } @@ -3016,7 +3046,11 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Assert(isa(TargetTy), "GEP base pointer is not a vector or a vector of pointers", &GEP); Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); + SmallVector Idxs(GEP.idx_begin(), GEP.idx_end()); + Assert(all_of( + Idxs, [](Value* V) { return V->getType()->isIntOrIntVectorTy(); }), + "GEP indexes must be integers", &GEP); Type *ElTy = GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); @@ -4014,14 +4048,23 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: { - ConstantInt *AlignCI = dyn_cast(CS.getArgOperand(3)); - Assert(AlignCI, - "alignment argument of memory intrinsics must be a constant int", + const auto *MI = cast(CS.getInstruction()); + auto IsValidAlignment = [&](unsigned Alignment) -> bool { + return Alignment == 0 || isPowerOf2_32(Alignment); + }; + Assert(IsValidAlignment(MI->getDestAlignment()), + "alignment of arg 0 of memory intrinsic must be 0 or a power of 2", CS); - const APInt &AlignVal = AlignCI->getValue(); - Assert(AlignCI->isZero() || AlignVal.isPowerOf2(), - "alignment argument of memory intrinsics must be a power of 2", CS); - Assert(isa(CS.getArgOperand(4)), + if (const auto *MTI = dyn_cast(MI)) { + Assert(IsValidAlignment(MTI->getSourceAlignment()), + "alignment of arg 1 of memory intrinsic must be 0 or a power of 2", + CS); + // TODO: Remove this assert when we enhance IRBuilder API to create + // memcpy/memmove with separate source & dest alignments. + Assert(MTI->getSourceAlignment() == MTI->getDestAlignment(), + "TEMPORARY: source and dest alignments must be the same"); + } + Assert(isa(CS.getArgOperand(3)), "isvolatile argument of memory intrinsics must be a constant int", CS); break; @@ -4696,7 +4739,8 @@ template void TBAAVerifier::CheckFailed(Tys &&... Args) { /// TBAA scheme. This means \p BaseNode is either a scalar node, or a /// struct-type node describing an aggregate data structure (like a struct). TBAAVerifier::TBAABaseNodeSummary -TBAAVerifier::verifyTBAABaseNode(Instruction &I, const MDNode *BaseNode) { +TBAAVerifier::verifyTBAABaseNode(Instruction &I, const MDNode *BaseNode, + bool IsNewFormat) { if (BaseNode->getNumOperands() < 2) { CheckFailed("Base nodes must have at least two operands", &I, BaseNode); return {true, ~0u}; @@ -4706,7 +4750,7 @@ TBAAVerifier::verifyTBAABaseNode(Instruction &I, const MDNode *BaseNode) { if (Itr != TBAABaseNodes.end()) return Itr->second; - auto Result = verifyTBAABaseNodeImpl(I, BaseNode); + auto Result = verifyTBAABaseNodeImpl(I, BaseNode, IsNewFormat); auto InsertResult = TBAABaseNodes.insert({BaseNode, Result}); (void)InsertResult; assert(InsertResult.second && "We just checked!"); @@ -4714,7 +4758,8 @@ TBAAVerifier::verifyTBAABaseNode(Instruction &I, const MDNode *BaseNode) { } TBAAVerifier::TBAABaseNodeSummary -TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode) { +TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode, + bool IsNewFormat) { const TBAAVerifier::TBAABaseNodeSummary InvalidNode = {true, ~0u}; if (BaseNode->getNumOperands() == 2) { @@ -4724,13 +4769,32 @@ TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode) { : InvalidNode; } - if (BaseNode->getNumOperands() % 2 != 1) { - CheckFailed("Struct tag nodes must have an odd number of operands!", - BaseNode); - return InvalidNode; + if (IsNewFormat) { + if (BaseNode->getNumOperands() % 3 != 0) { + CheckFailed("Access tag nodes must have the number of operands that is a " + "multiple of 3!", BaseNode); + return InvalidNode; + } + } else { + if (BaseNode->getNumOperands() % 2 != 1) { + CheckFailed("Struct tag nodes must have an odd number of operands!", + BaseNode); + return InvalidNode; + } } - if (!isa(BaseNode->getOperand(0))) { + // Check the type size field. + if (IsNewFormat) { + auto *TypeSizeNode = mdconst::dyn_extract_or_null( + BaseNode->getOperand(1)); + if (!TypeSizeNode) { + CheckFailed("Type size nodes must be constants!", &I, BaseNode); + return InvalidNode; + } + } + + // Check the type name field. In the new format it can be anything. + if (!IsNewFormat && !isa(BaseNode->getOperand(0))) { CheckFailed("Struct tag nodes have a string as their first operand", BaseNode); return InvalidNode; @@ -4743,7 +4807,10 @@ TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode) { // We've already checked that BaseNode is not a degenerate root node with one // operand in \c verifyTBAABaseNode, so this loop should run at least once. - for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) { + unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1; + unsigned NumOpsPerField = IsNewFormat ? 3 : 2; + for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands(); + Idx += NumOpsPerField) { const MDOperand &FieldTy = BaseNode->getOperand(Idx); const MDOperand &FieldOffset = BaseNode->getOperand(Idx + 1); if (!isa(FieldTy)) { @@ -4785,6 +4852,16 @@ TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode) { } PrevOffset = OffsetEntryCI->getValue(); + + if (IsNewFormat) { + auto *MemberSizeNode = mdconst::dyn_extract_or_null( + BaseNode->getOperand(Idx + 2)); + if (!MemberSizeNode) { + CheckFailed("Member size entries must be constants!", &I, BaseNode); + Failed = true; + continue; + } + } } return Failed ? InvalidNode @@ -4834,7 +4911,8 @@ bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) { /// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode. MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode, - APInt &Offset) { + APInt &Offset, + bool IsNewFormat) { assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!"); // Scalar nodes have only one possible "field" -- their parent in the access @@ -4843,35 +4921,52 @@ MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I, if (BaseNode->getNumOperands() == 2) return cast(BaseNode->getOperand(1)); - for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) { + unsigned FirstFieldOpNo = IsNewFormat ? 3 : 1; + unsigned NumOpsPerField = IsNewFormat ? 3 : 2; + for (unsigned Idx = FirstFieldOpNo; Idx < BaseNode->getNumOperands(); + Idx += NumOpsPerField) { auto *OffsetEntryCI = mdconst::extract(BaseNode->getOperand(Idx + 1)); if (OffsetEntryCI->getValue().ugt(Offset)) { - if (Idx == 1) { + if (Idx == FirstFieldOpNo) { CheckFailed("Could not find TBAA parent in struct type node", &I, BaseNode, &Offset); return nullptr; } + unsigned PrevIdx = Idx - NumOpsPerField; auto *PrevOffsetEntryCI = - mdconst::extract(BaseNode->getOperand(Idx - 1)); + mdconst::extract(BaseNode->getOperand(PrevIdx + 1)); Offset -= PrevOffsetEntryCI->getValue(); - return cast(BaseNode->getOperand(Idx - 2)); + return cast(BaseNode->getOperand(PrevIdx)); } } + unsigned LastIdx = BaseNode->getNumOperands() - NumOpsPerField; auto *LastOffsetEntryCI = mdconst::extract( - BaseNode->getOperand(BaseNode->getNumOperands() - 1)); - + BaseNode->getOperand(LastIdx + 1)); Offset -= LastOffsetEntryCI->getValue(); - return cast(BaseNode->getOperand(BaseNode->getNumOperands() - 2)); + return cast(BaseNode->getOperand(LastIdx)); +} + +static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) { + if (!Type || Type->getNumOperands() < 3) + return false; + + // In the new format type nodes shall have a reference to the parent type as + // its first operand. + MDNode *Parent = dyn_cast_or_null(Type->getOperand(0)); + if (!Parent) + return false; + + return true; } bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { AssertTBAA(isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I), - "TBAA is only for loads, stores and calls!", &I); + "This instruction shall not have a TBAA access tag!", &I); bool IsStructPathTBAA = isa(MD->getOperand(0)) && MD->getNumOperands() >= 3; @@ -4880,18 +4975,34 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { IsStructPathTBAA, "Old-style TBAA is no longer allowed, use struct-path TBAA instead", &I); - AssertTBAA(MD->getNumOperands() < 5, - "Struct tag metadata must have either 3 or 4 operands", &I, MD); - MDNode *BaseNode = dyn_cast_or_null(MD->getOperand(0)); MDNode *AccessType = dyn_cast_or_null(MD->getOperand(1)); - if (MD->getNumOperands() == 4) { - auto *IsImmutableCI = - mdconst::dyn_extract_or_null(MD->getOperand(3)); + bool IsNewFormat = isNewFormatTBAATypeNode(AccessType); + + if (IsNewFormat) { + AssertTBAA(MD->getNumOperands() == 4 || MD->getNumOperands() == 5, + "Access tag metadata must have either 4 or 5 operands", &I, MD); + } else { + AssertTBAA(MD->getNumOperands() < 5, + "Struct tag metadata must have either 3 or 4 operands", &I, MD); + } + + // Check the access size field. + if (IsNewFormat) { + auto *AccessSizeNode = mdconst::dyn_extract_or_null( + MD->getOperand(3)); + AssertTBAA(AccessSizeNode, "Access size field must be a constant", &I, MD); + } + + // Check the immutability flag. + unsigned ImmutabilityFlagOpNo = IsNewFormat ? 4 : 3; + if (MD->getNumOperands() == ImmutabilityFlagOpNo + 1) { + auto *IsImmutableCI = mdconst::dyn_extract_or_null( + MD->getOperand(ImmutabilityFlagOpNo)); AssertTBAA(IsImmutableCI, - "Immutability tag on struct tag metadata must be a constant", &I, - MD); + "Immutability tag on struct tag metadata must be a constant", + &I, MD); AssertTBAA( IsImmutableCI->isZero() || IsImmutableCI->isOne(), "Immutability part of the struct tag metadata must be either 0 or 1", @@ -4899,13 +5010,15 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { } AssertTBAA(BaseNode && AccessType, - "Malformed struct tag metadata: base and access-type " + "Malformed struct tag metadata: base and access-type " "should be non-null and point to Metadata nodes", &I, MD, BaseNode, AccessType); - AssertTBAA(isValidScalarTBAANode(AccessType), - "Access type node must be a valid scalar type", &I, MD, - AccessType); + if (!IsNewFormat) { + AssertTBAA(isValidScalarTBAANode(AccessType), + "Access type node must be a valid scalar type", &I, MD, + AccessType); + } auto *OffsetCI = mdconst::dyn_extract_or_null(MD->getOperand(2)); AssertTBAA(OffsetCI, "Offset must be constant integer", &I, MD); @@ -4916,7 +5029,8 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { SmallPtrSet StructPath; for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode); - BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset)) { + BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset, + IsNewFormat)) { if (!StructPath.insert(BaseNode).second) { CheckFailed("Cycle detected in struct path", &I, MD); return false; @@ -4924,7 +5038,8 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { bool Invalid; unsigned BaseNodeBitWidth; - std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode); + std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode, + IsNewFormat); // If the base node is invalid in itself, then we've already printed all the // errors we wanted to print. @@ -4938,9 +5053,13 @@ bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) { &I, MD, &Offset); AssertTBAA(BaseNodeBitWidth == Offset.getBitWidth() || - (BaseNodeBitWidth == 0 && Offset == 0), + (BaseNodeBitWidth == 0 && Offset == 0) || + (IsNewFormat && BaseNodeBitWidth == ~0u), "Access bit-width not the same as description bit-width", &I, MD, BaseNodeBitWidth, Offset.getBitWidth()); + + if (IsNewFormat && SeenAccessTypeInPath) + break; } AssertTBAA(SeenAccessTypeInPath, "Did not see access type in access path!", diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp index c4ba659fd058..999f11deb15a 100644 --- a/lib/IRReader/IRReader.cpp +++ b/lib/IRReader/IRReader.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/IRReader/IRReader.h" -#include "llvm-c/Core.h" #include "llvm-c/IRReader.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/BitcodeReader.h" diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 9c737795b5a9..69d46a594d37 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -385,9 +385,11 @@ StringRef InputFile::getName() const { LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf) : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), - Ctx(Conf) {} + Ctx(Conf), CombinedModule(llvm::make_unique("ld-temp.o", Ctx)), + Mover(llvm::make_unique(*CombinedModule)) {} -LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) : Backend(Backend) { +LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) + : Backend(Backend), CombinedIndex(/*IsPeformingAnalysis*/ false) { if (!Backend) this->Backend = createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); @@ -416,8 +418,11 @@ void LTO::addModuleToGlobalRes(ArrayRef Syms, auto &GlobalRes = GlobalResolutions[Sym.getName()]; GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); - if (Res.Prevailing) + if (Res.Prevailing) { + assert(GlobalRes.IRName.empty() && + "Multiple prevailing defs are not allowed"); GlobalRes.IRName = Sym.getIRName(); + } // Set the partition to external if we know it is re-defined by the linker // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a @@ -469,6 +474,9 @@ Error LTO::add(std::unique_ptr Input, if (Conf.ResolutionFile) writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); + if (RegularLTO.CombinedModule->getTargetTriple().empty()) + RegularLTO.CombinedModule->setTargetTriple(Input->getTargetTriple()); + const SymbolResolution *ResI = Res.begin(); for (unsigned I = 0; I != Input->Mods.size(); ++I) if (Error Err = addModule(*Input, I, ResI, Res.end())) @@ -632,7 +640,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, } // Set the 'local' flag based on the linker resolution for this symbol. - GV->setDSOLocal(Res.FinalDefinitionInLinkageUnit); + if (Res.FinalDefinitionInLinkageUnit) + GV->setDSOLocal(Res.FinalDefinitionInLinkageUnit); } // Common resolution: collect the maximum size/alignment over all commons. // We also record if we see an instance of a common as prevailing, so that @@ -656,12 +665,6 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, bool LivenessFromIndex) { - if (!RegularLTO.CombinedModule) { - RegularLTO.CombinedModule = - llvm::make_unique("ld-temp.o", RegularLTO.Ctx); - RegularLTO.Mover = llvm::make_unique(*RegularLTO.CombinedModule); - } - std::vector Keep; for (GlobalValue *GV : Mod.Keep) { if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) @@ -754,16 +757,9 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); - // Save the status of having a regularLTO combined module, as - // this is needed for generating the ThinLTO Task ID, and - // the CombinedModule will be moved at the end of runRegularLTO. - bool HasRegularLTO = RegularLTO.CombinedModule != nullptr || - !RegularLTO.ModsWithSummaries.empty(); - // Invoke regular LTO if there was a regular LTO module to start with. - if (HasRegularLTO) - if (auto E = runRegularLTO(AddStream)) - return E; - return runThinLTO(AddStream, Cache, HasRegularLTO); + if (auto E = runRegularLTO(AddStream)) + return E; + return runThinLTO(AddStream, Cache); } Error LTO::runRegularLTO(AddStreamFn AddStream) { @@ -1078,8 +1074,7 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix, }; } -Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, - bool HasRegularLTO) { +Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache) { if (ThinLTO.ModuleMap.empty()) return Error::success(); @@ -1164,11 +1159,9 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); - // Task numbers start at ParallelCodeGenParallelismLevel if an LTO - // module is present, as tasks 0 through ParallelCodeGenParallelismLevel-1 - // are reserved for parallel code generation partitions. - unsigned Task = - HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0; + // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined + // module and parallel code generation partitions. + unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel; for (auto &Mod : ThinLTO.ModuleMap) { if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], ExportLists[Mod.first], diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 501d6284117b..070532677508 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -103,6 +103,12 @@ Error Config::addSaveTemps(std::string OutputFileName, if (EC) reportOpenError(Path, EC.message()); WriteIndexToFile(Index, OS); + + Path = OutputFileName + "index.dot"; + raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None); + if (EC) + reportOpenError(Path, EC.message()); + Index.exportToDot(OSDot); return true; }; diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 5d29227e9030..c7306df95d3d 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -21,9 +21,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/ParallelCG.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -469,15 +466,15 @@ void LTOCodeGenerator::restoreLinkageForExternals() { if (I == ExternalSymbols.end()) return; - GV.setLinkage(I->second); - }; - - llvm::for_each(MergedModule->functions(), externalize); - llvm::for_each(MergedModule->globals(), externalize); - llvm::for_each(MergedModule->aliases(), externalize); -} - -void LTOCodeGenerator::verifyMergedModuleOnce() { + GV.setLinkage(I->second); + }; + + llvm::for_each(MergedModule->functions(), externalize); + llvm::for_each(MergedModule->globals(), externalize); + llvm::for_each(MergedModule->aliases(), externalize); +} + +void LTOCodeGenerator::verifyMergedModuleOnce() { // Only run on the first call. if (HasVerifiedInput) return; diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 8bdc033e8530..626d2f5dc813 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -16,21 +16,16 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ObjectUtils.h" #include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" @@ -393,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { // from the ObjC data structures generated by the front end. // special case if this data blob is an ObjC class definition - std::string Section = v->getSection(); - if (Section.compare(0, 15, "__OBJC,__class,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCClass(gv); + if (const GlobalVariable *GV = dyn_cast(v)) { + StringRef Section = GV->getSection(); + if (Section.startswith("__OBJC,__class,")) { + addObjCClass(GV); } - } - // special case if this data blob is an ObjC category definition - else if (Section.compare(0, 18, "__OBJC,__category,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCCategory(gv); + // special case if this data blob is an ObjC category definition + else if (Section.startswith("__OBJC,__category,")) { + addObjCCategory(GV); } - } - // special case if this data blob is the list of referenced classes - else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCClassRef(gv); + // special case if this data blob is the list of referenced classes + else if (Section.startswith("__OBJC,__cls_refs,")) { + addObjCClassRef(GV); } } } diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index c8b3892375f6..b5ac5a77e9c1 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir, if (TempDir.empty()) return; // User asked to save temps, let dump the bitcode file after import. - std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); + std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str(); std::error_code EC; raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); if (EC) @@ -592,7 +592,7 @@ std::unique_ptr TargetMachineBuilder::create() const { */ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { std::unique_ptr CombinedIndex = - llvm::make_unique(); + llvm::make_unique(/*IsPeformingAnalysis=*/false); uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(), @@ -607,6 +607,20 @@ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { return CombinedIndex; } +static void internalizeAndPromoteInIndex( + const StringMap &ExportLists, + const DenseSet &GUIDPreservedSymbols, + ModuleSummaryIndex &Index) { + auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(ModuleIdentifier); + return (ExportList != ExportLists.end() && + ExportList->second.count(GUID)) || + GUIDPreservedSymbols.count(GUID); + }; + + thinLTOInternalizeAndPromoteInIndex(Index, isExported); +} + /** * Perform promotion and renaming of exported internal functions. * Index is updated to reflect linkage changes from weak resolution. @@ -642,13 +656,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, // Promote the exported values in the index, so that they are promoted // in the module. - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { - const auto &ExportList = ExportLists.find(ModuleIdentifier); - return (ExportList != ExportLists.end() && - ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); - }; - thinLTOInternalizeAndPromoteInIndex(Index, isExported); + internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); promoteModule(TheModule, Index); } @@ -762,13 +770,7 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, return; // Internalization - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { - const auto &ExportList = ExportLists.find(ModuleIdentifier); - return (ExportList != ExportLists.end() && - ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); - }; - thinLTOInternalizeAndPromoteInIndex(Index, isExported); + internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, Index); thinLTOInternalizeModule(TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); } @@ -918,17 +920,10 @@ void ThinLTOCodeGenerator::run() { // impacts the caching. resolveWeakForLinkerInIndex(*Index, ResolvedODR); - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { - const auto &ExportList = ExportLists.find(ModuleIdentifier); - return (ExportList != ExportLists.end() && - ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); - }; - // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback). // Changes are made in the index, consumed in the ThinLTO backends. - thinLTOInternalizeAndPromoteInIndex(*Index, isExported); + internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, *Index); // Make sure that every module has an entry in the ExportLists and // ResolvedODR maps to enable threaded access to these maps below. diff --git a/lib/LTO/UpdateCompilerUsed.cpp b/lib/LTO/UpdateCompilerUsed.cpp index c683b5050ccc..c982a5b0e5aa 100644 --- a/lib/LTO/UpdateCompilerUsed.cpp +++ b/lib/LTO/UpdateCompilerUsed.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" -#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index ee067a912e3c..f7170e714b9b 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -954,7 +954,12 @@ Expected IRLinker::linkGlobalValueProto(GlobalValue *SGV, NewGV->setLinkage(GlobalValue::InternalLinkage); Constant *C = NewGV; - if (DGV) + // Only create a bitcast if necessary. In particular, with + // DebugTypeODRUniquing we may reach metadata in the destination module + // containing a GV from the source module, in which case SGV will be + // the same as DGV and NewGV, and TypeMap.get() will assert since it + // assumes it is being invoked on a type in the source module. + if (DGV && NewGV != SGV) C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType())); if (DGV && NewGV != DGV) { diff --git a/lib/MC/MCAsmInfoWasm.cpp b/lib/MC/MCAsmInfoWasm.cpp index aa26616dda36..fc55059ff75d 100644 --- a/lib/MC/MCAsmInfoWasm.cpp +++ b/lib/MC/MCAsmInfoWasm.cpp @@ -13,8 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoWasm.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSectionWasm.h" using namespace llvm; void MCAsmInfoWasm::anchor() { } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 44bac8eabdc7..bd263902a491 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -22,17 +22,14 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" -#include "llvm/Support/SourceMgr.h" #include using namespace llvm; @@ -140,6 +137,8 @@ class MCAsmStreamer final : public MCStreamer { void EmitDataRegion(MCDataRegionType Kind) override; void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor, unsigned Update) override; + void EmitBuildVersion(unsigned Platform, unsigned Major, unsigned Minor, + unsigned Update) override; void EmitThumbFunc(MCSymbol *Func) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; @@ -152,6 +151,7 @@ class MCAsmStreamer final : public MCStreamer { void EmitCOFFSymbolType(int Type) override; void EndCOFFSymbolDef() override; void EmitCOFFSafeSEH(MCSymbol const *Symbol) override; + void EmitCOFFSymbolIndex(MCSymbol const *Symbol) override; void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override; void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; @@ -193,14 +193,9 @@ class MCAsmStreamer final : public MCStreamer { void EmitGPRel32Value(const MCExpr *Value) override; - - void emitFill(uint64_t NumBytes, uint8_t FillValue) override; - void emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc = SMLoc()) override; - void emitFill(uint64_t NumValues, int64_t Size, int64_t Expr) override; - void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()) override; @@ -218,6 +213,7 @@ class MCAsmStreamer final : public MCStreamer { void EmitFileDirective(StringRef Filename) override; unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, + MD5::MD5Result *Checksum = 0, unsigned CUID = 0) override; void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, @@ -406,9 +402,13 @@ void MCAsmStreamer::emitExplicitComments() { void MCAsmStreamer::ChangeSection(MCSection *Section, const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); - Section->PrintSwitchToSection( - *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS, - Subsection); + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->changeSection(getCurrentSectionOnly(), Section, Subsection, OS); + } else { + Section->PrintSwitchToSection( + *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS, + Subsection); + } } void MCAsmStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) { @@ -474,15 +474,39 @@ void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { EmitEOL(); } -void MCAsmStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major, +static const char *getVersionMinDirective(MCVersionMinType Type) { + switch (Type) { + case MCVM_WatchOSVersionMin: return ".watchos_version_min"; + case MCVM_TvOSVersionMin: return ".tvos_version_min"; + case MCVM_IOSVersionMin: return ".ios_version_min"; + case MCVM_OSXVersionMin: return ".macosx_version_min"; + } + llvm_unreachable("Invalid MC version min type"); +} + +void MCAsmStreamer::EmitVersionMin(MCVersionMinType Type, unsigned Major, unsigned Minor, unsigned Update) { - switch (Kind) { - case MCVM_WatchOSVersionMin: OS << "\t.watchos_version_min"; break; - case MCVM_TvOSVersionMin: OS << "\t.tvos_version_min"; break; - case MCVM_IOSVersionMin: OS << "\t.ios_version_min"; break; - case MCVM_OSXVersionMin: OS << "\t.macosx_version_min"; break; + OS << '\t' << getVersionMinDirective(Type) << ' ' << Major << ", " << Minor; + if (Update) + OS << ", " << Update; + EmitEOL(); +} + +static const char *getPlatformName(MachO::PlatformType Type) { + switch (Type) { + case MachO::PLATFORM_MACOS: return "macos"; + case MachO::PLATFORM_IOS: return "ios"; + case MachO::PLATFORM_TVOS: return "tvos"; + case MachO::PLATFORM_WATCHOS: return "watchos"; + case MachO::PLATFORM_BRIDGEOS: return "bridgeos"; } - OS << " " << Major << ", " << Minor; + llvm_unreachable("Invalid Mach-O platform type"); +} + +void MCAsmStreamer::EmitBuildVersion(unsigned Platform, unsigned Major, + unsigned Minor, unsigned Update) { + const char *PlatformName = getPlatformName((MachO::PlatformType)Platform); + OS << "\t.build_version " << PlatformName << ", " << Major << ", " << Minor; if (Update) OS << ", " << Update; EmitEOL(); @@ -626,6 +650,12 @@ void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { EmitEOL(); } +void MCAsmStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { + OS << "\t.symidx\t"; + Symbol->print(OS, MAI); + EmitEOL(); +} + void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { OS << "\t.secidx\t"; Symbol->print(OS, MAI); @@ -773,10 +803,15 @@ void MCAsmStreamer::EmitBytes(StringRef Data) { "Cannot emit contents before setting section!"); if (Data.empty()) return; - if (Data.size() == 1) { - OS << MAI->getData8bitsDirective(); - OS << (unsigned)(unsigned char)Data[0]; - EmitEOL(); + // If only single byte is provided or no ascii or asciz directives is + // supported, emit as vector of 8bits data. + if (Data.size() == 1 || + !(MAI->getAscizDirective() || MAI->getAsciiDirective())) { + const char *Directive = MAI->getData8bitsDirective(); + for (const unsigned char C : Data.bytes()) { + OS << Directive << (unsigned)C; + EmitEOL(); + } return; } @@ -861,8 +896,12 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, assert(Directive && "Invalid size for machine code value!"); OS << Directive; - Value->print(OS, MAI); - EmitEOL(); + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->emitValue(Value); + } else { + Value->print(OS, MAI); + EmitEOL(); + } } void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { @@ -929,17 +968,12 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { EmitEOL(); } -/// emitFill - Emit NumBytes bytes worth of the value specified by -/// FillValue. This implements directives such as '.space'. -void MCAsmStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - if (NumBytes == 0) return; - - const MCExpr *E = MCConstantExpr::create(NumBytes, getContext()); - emitFill(*E, FillValue); -} - void MCAsmStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) { + int64_t IntNumBytes; + if (NumBytes.evaluateAsAbsolute(IntNumBytes) && IntNumBytes == 0) + return; + if (const char *ZeroDirective = MAI->getZeroDirective()) { // FIXME: Emit location directives OS << ZeroDirective; @@ -953,14 +987,6 @@ void MCAsmStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, MCStreamer::emitFill(NumBytes, FillValue); } -void MCAsmStreamer::emitFill(uint64_t NumValues, int64_t Size, int64_t Expr) { - if (NumValues == 0) - return; - - const MCExpr *E = MCConstantExpr::create(NumValues, getContext()); - emitFill(*E, Size, Expr); -} - void MCAsmStreamer::emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc) { // FIXME: Emit location directives @@ -1050,12 +1076,13 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) { unsigned MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, + MD5::MD5Result *Checksum, unsigned CUID) { assert(CUID == 0); MCDwarfLineTable &Table = getContext().getMCDwarfLineTable(CUID); unsigned NumFiles = Table.getMCDwarfFiles().size(); - FileNo = Table.getFile(Directory, Filename, FileNo); + FileNo = Table.getFile(Directory, Filename, Checksum, FileNo); if (FileNo == 0) return 0; if (NumFiles == Table.getMCDwarfFiles().size()) @@ -1074,13 +1101,23 @@ unsigned MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, } } - OS << "\t.file\t" << FileNo << ' '; + SmallString<128> Str; + raw_svector_ostream OS1(Str); + OS1 << "\t.file\t" << FileNo << ' '; if (!Directory.empty()) { - PrintQuotedString(Directory, OS); - OS << ' '; + PrintQuotedString(Directory, OS1); + OS1 << ' '; + } + PrintQuotedString(Filename, OS1); + if (Checksum) { + OS1 << " md5 "; + PrintQuotedString(Checksum->digest(), OS1); + } + if (MCTargetStreamer *TS = getTargetStreamer()) { + TS->emitDwarfFileDirective(OS1.str()); + } else { + EmitRawText(OS1.str()); } - PrintQuotedString(Filename, OS); - EmitEOL(); return FileNo; } @@ -1296,12 +1333,17 @@ void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { void MCAsmStreamer::EmitRegisterName(int64_t Register) { if (!MAI->useDwarfRegNumForCFI()) { + // User .cfi_* directives can use arbitrary DWARF register numbers, not + // just ones that map to LLVM register numbers and have known names. + // Fall back to using the original number directly if no name is known. const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - unsigned LLVMRegister = MRI->getLLVMRegNum(Register, true); - InstPrinter->printRegName(OS, LLVMRegister); - } else { - OS << Register; + int LLVMRegister = MRI->getLLVMRegNumFromEH(Register); + if (LLVMRegister != -1) { + InstPrinter->printRegName(OS, LLVMRegister); + return; + } } + OS << Register; } void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 9a23e614f3ad..bd881b4d6e85 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -88,7 +88,7 @@ MCAssembler::MCAssembler(MCContext &Context, MCAsmBackend &Backend, : Context(Context), Backend(Backend), Emitter(Emitter), Writer(Writer), BundleAlignSize(0), RelaxAll(false), SubsectionsViaSymbols(false), IncrementalLinkerCompatible(false), ELFHeaderEFlags(0) { - VersionMinInfo.Major = 0; // Major version == 0 for "none specified" + VersionInfo.Major = 0; // Major version == 0 for "none specified" } MCAssembler::~MCAssembler() = default; @@ -107,7 +107,7 @@ void MCAssembler::reset() { IncrementalLinkerCompatible = false; ELFHeaderEFlags = 0; LOHContainer.reset(); - VersionMinInfo.Major = 0; + VersionInfo.Major = 0; // reset objects owned by us getBackend().reset(); @@ -281,8 +281,18 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, return cast(F).getContents().size(); case MCFragment::FT_CompactEncodedInst: return cast(F).getContents().size(); - case MCFragment::FT_Fill: - return cast(F).getSize(); + case MCFragment::FT_Fill: { + auto &FF = cast(F); + int64_t Size = 0; + if (!FF.getSize().evaluateAsAbsolute(Size, Layout)) + getContext().reportError(FF.getLoc(), + "expected assembly-time absolute expression"); + if (Size < 0) { + getContext().reportError(FF.getLoc(), "invalid number of bytes"); + return 0; + } + return Size; + } case MCFragment::FT_LEB: return cast(F).getContents().size(); @@ -540,7 +550,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, for (unsigned I = 1; I < MaxChunkSize; ++I) Data[I] = Data[0]; - uint64_t Size = FF.getSize(); + uint64_t Size = FragmentSize; for (unsigned ChunkSize = MaxChunkSize; ChunkSize; ChunkSize /= 2) { StringRef Ref(Data, ChunkSize); for (uint64_t I = 0, E = Size / ChunkSize; I != E; ++I) diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp index 7e4a79b8a9bc..8247db1c622d 100644 --- a/lib/MC/MCCodeView.cpp +++ b/lib/MC/MCCodeView.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCCodeView.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -77,6 +76,14 @@ bool CodeViewContext::addFile(MCStreamer &OS, unsigned FileNumber, return true; } +MCCVFunctionInfo *CodeViewContext::getCVFunctionInfo(unsigned FuncId) { + if (FuncId >= Functions.size()) + return nullptr; + if (Functions[FuncId].isUnallocatedFunctionInfo()) + return nullptr; + return &Functions[FuncId]; +} + bool CodeViewContext::recordFunctionId(unsigned FuncId) { if (FuncId >= Functions.size()) Functions.resize(FuncId + 1); @@ -248,6 +255,67 @@ void CodeViewContext::emitFileChecksumOffset(MCObjectStreamer &OS, OS.EmitValueImpl(SRE, 4); } +void CodeViewContext::addLineEntry(const MCCVLineEntry &LineEntry) { + size_t Offset = MCCVLines.size(); + auto I = MCCVLineStartStop.insert( + {LineEntry.getFunctionId(), {Offset, Offset + 1}}); + if (!I.second) + I.first->second.second = Offset + 1; + MCCVLines.push_back(LineEntry); +} + +std::vector +CodeViewContext::getFunctionLineEntries(unsigned FuncId) { + std::vector FilteredLines; + auto I = MCCVLineStartStop.find(FuncId); + if (I != MCCVLineStartStop.end()) { + MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(FuncId); + for (size_t Idx = I->second.first, End = I->second.second; Idx != End; + ++Idx) { + unsigned LocationFuncId = MCCVLines[Idx].getFunctionId(); + if (LocationFuncId == FuncId) { + // This was a .cv_loc directly for FuncId, so record it. + FilteredLines.push_back(MCCVLines[Idx]); + } else { + // Check if the current location is inlined in this function. If it is, + // synthesize a statement .cv_loc at the original inlined call site. + auto I = SiteInfo->InlinedAtMap.find(LocationFuncId); + if (I != SiteInfo->InlinedAtMap.end()) { + MCCVFunctionInfo::LineInfo &IA = I->second; + // Only add the location if it differs from the previous location. + // Large inlined calls will have many .cv_loc entries and we only need + // one line table entry in the parent function. + if (FilteredLines.empty() || + FilteredLines.back().getFileNum() != IA.File || + FilteredLines.back().getLine() != IA.Line || + FilteredLines.back().getColumn() != IA.Col) { + FilteredLines.push_back(MCCVLineEntry( + MCCVLines[Idx].getLabel(), + MCCVLoc(FuncId, IA.File, IA.Line, IA.Col, false, false))); + } + } + } + } + } + return FilteredLines; +} + +std::pair CodeViewContext::getLineExtent(unsigned FuncId) { + auto I = MCCVLineStartStop.find(FuncId); + // Return an empty extent if there are no cv_locs for this function id. + if (I == MCCVLineStartStop.end()) + return {~0ULL, 0}; + return I->second; +} + +ArrayRef CodeViewContext::getLinesForExtent(size_t L, size_t R) { + if (R <= L) + return None; + if (L >= MCCVLines.size()) + return None; + return makeArrayRef(&MCCVLines[L], R - L); +} + void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, unsigned FuncId, const MCSymbol *FuncBegin, @@ -508,7 +576,7 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout, if (!LocAfter.empty()) { // Only try to compute this difference if we're in the same section. const MCCVLineEntry &Loc = LocAfter[0]; - if (&Loc.getLabel()->getSection(false) == &LastLabel->getSection(false)) + if (&Loc.getLabel()->getSection() == &LastLabel->getSection()) LocAfterLength = computeLabelDiff(Layout, LastLabel, Loc.getLabel()); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 5c25e902bbe7..c7c6ca7a86e5 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -490,8 +490,10 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K, const Twine &Group, unsigned UniqueID, const char *BeginSymName) { MCSymbolWasm *GroupSym = nullptr; - if (!Group.isTriviallyEmpty() && !Group.str().empty()) + if (!Group.isTriviallyEmpty() && !Group.str().empty()) { GroupSym = cast(getOrCreateSymbol(Group)); + GroupSym->setComdat(true); + } return getWasmSection(Section, K, GroupSym, UniqueID, BeginSymName); } @@ -535,9 +537,10 @@ MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) { /// error and zero is returned and the client reports the error, else the /// allocated file number is returned. The file numbers may be in any order. unsigned MCContext::getDwarfFile(StringRef Directory, StringRef FileName, - unsigned FileNumber, unsigned CUID) { + unsigned FileNumber, MD5::MD5Result *Checksum, + unsigned CUID) { MCDwarfLineTable &Table = MCDwarfLineTablesCUMap[CUID]; - return Table.getFile(Directory, FileName, FileNumber); + return Table.getFile(Directory, FileName, Checksum, FileNumber); } /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index a36ff4cb9072..62af6d851bfd 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -257,6 +257,75 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) { OS.EmitValue(ABS, Size); } +static void +emitV2FileDirTables(MCStreamer *MCOS, + const SmallVectorImpl &MCDwarfDirs, + const SmallVectorImpl &MCDwarfFiles) { + // First the directory table. + for (auto Dir : MCDwarfDirs) { + MCOS->EmitBytes(Dir); // The DirectoryName, and... + MCOS->EmitBytes(StringRef("\0", 1)); // its null terminator. + } + MCOS->EmitIntValue(0, 1); // Terminate the directory list. + + // Second the file table. + for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { + assert(!MCDwarfFiles[i].Name.empty()); + MCOS->EmitBytes(MCDwarfFiles[i].Name); // FileName and... + MCOS->EmitBytes(StringRef("\0", 1)); // its null terminator. + MCOS->EmitULEB128IntValue(MCDwarfFiles[i].DirIndex); // Directory number. + MCOS->EmitIntValue(0, 1); // Last modification timestamp (always 0). + MCOS->EmitIntValue(0, 1); // File size (always 0). + } + MCOS->EmitIntValue(0, 1); // Terminate the file list. +} + +static void +emitV5FileDirTables(MCStreamer *MCOS, + const SmallVectorImpl &MCDwarfDirs, + const SmallVectorImpl &MCDwarfFiles, + StringRef CompilationDir, bool HasMD5) { + // The directory format, which is just inline null-terminated strings. + MCOS->EmitIntValue(1, 1); + MCOS->EmitULEB128IntValue(dwarf::DW_LNCT_path); + MCOS->EmitULEB128IntValue(dwarf::DW_FORM_string); + // Then the list of directory paths. CompilationDir comes first. + MCOS->EmitULEB128IntValue(MCDwarfDirs.size() + 1); + MCOS->EmitBytes(CompilationDir); + MCOS->EmitBytes(StringRef("\0", 1)); + for (auto Dir : MCDwarfDirs) { + MCOS->EmitBytes(Dir); // The DirectoryName, and... + MCOS->EmitBytes(StringRef("\0", 1)); // its null terminator. + } + + // The file format, which is the inline null-terminated filename and a + // directory index. We don't track file size/timestamp so don't emit them + // in the v5 table. Emit MD5 checksums if we have them. + MCOS->EmitIntValue(HasMD5 ? 3 : 2, 1); + MCOS->EmitULEB128IntValue(dwarf::DW_LNCT_path); + MCOS->EmitULEB128IntValue(dwarf::DW_FORM_string); + MCOS->EmitULEB128IntValue(dwarf::DW_LNCT_directory_index); + MCOS->EmitULEB128IntValue(dwarf::DW_FORM_udata); + if (HasMD5) { + MCOS->EmitULEB128IntValue(dwarf::DW_LNCT_MD5); + MCOS->EmitULEB128IntValue(dwarf::DW_FORM_data16); + } + // Then the list of file names. These start at 1. + MCOS->EmitULEB128IntValue(MCDwarfFiles.size() - 1); + for (unsigned i = 1; i < MCDwarfFiles.size(); ++i) { + assert(!MCDwarfFiles[i].Name.empty()); + MCOS->EmitBytes(MCDwarfFiles[i].Name); // FileName and... + MCOS->EmitBytes(StringRef("\0", 1)); // its null terminator. + MCOS->EmitULEB128IntValue(MCDwarfFiles[i].DirIndex); // Directory number. + if (HasMD5) { + MD5::MD5Result *Cksum = MCDwarfFiles[i].Checksum; + MCOS->EmitBinaryData( + StringRef(reinterpret_cast(Cksum->Bytes.data()), + Cksum->Bytes.size())); + } + } +} + std::pair MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, ArrayRef StandardOpcodeLengths) const { @@ -277,22 +346,41 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, emitAbsValue(*MCOS, MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym, 4), 4); - // Next 2 bytes is the Version, which is Dwarf 2. - MCOS->EmitIntValue(2, 2); + // Next 2 bytes is the Version. + // FIXME: On Darwin we still default to V2. + unsigned LineTableVersion = context.getDwarfVersion(); + if (context.getObjectFileInfo()->getTargetTriple().isOSDarwin()) + LineTableVersion = 2; + MCOS->EmitIntValue(LineTableVersion, 2); + + // Keep track of the bytes between the very start and where the header length + // comes out. + unsigned PreHeaderLengthBytes = 4 + 2; + + // In v5, we get address info next. + if (LineTableVersion >= 5) { + MCOS->EmitIntValue(context.getAsmInfo()->getCodePointerSize(), 1); + MCOS->EmitIntValue(0, 1); // Segment selector; same as EmitGenDwarfAranges. + PreHeaderLengthBytes += 2; + } // Create a symbol for the end of the prologue (to be set when we get there). MCSymbol *ProEndSym = context.createTempSymbol(); // Lprologue_end - // Length of the prologue, is the next 4 bytes. Which is the start of the - // section to the end of the prologue. Not including the 4 bytes for the - // total length, the 2 bytes for the version, and these 4 bytes for the - // length of the prologue. - emitAbsValue( - *MCOS, - MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym, (4 + 2 + 4)), 4); + // Length of the prologue, is the next 4 bytes. This is actually the length + // from after the length word, to the end of the prologue. + emitAbsValue(*MCOS, + MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym, + (PreHeaderLengthBytes + 4)), + 4); // Parameters of the state machine, are next. MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1); + // maximum_operations_per_instruction + // For non-VLIW architectures this field is always 1. + // FIXME: VLIW architectures need to update this field accordingly. + if (LineTableVersion >= 4) + MCOS->EmitIntValue(1, 1); MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1); MCOS->EmitIntValue(Params.DWARF2LineBase, 1); MCOS->EmitIntValue(Params.DWARF2LineRange, 1); @@ -302,26 +390,13 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, for (char Length : StandardOpcodeLengths) MCOS->EmitIntValue(Length, 1); - // Put out the directory and file tables. - - // First the directory table. - for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { - MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName - MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string - } - MCOS->EmitIntValue(0, 1); // Terminate the directory list - - // Second the file table. - for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { - assert(!MCDwarfFiles[i].Name.empty()); - MCOS->EmitBytes(MCDwarfFiles[i].Name); // FileName - MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string - // the Directory num - MCOS->EmitULEB128IntValue(MCDwarfFiles[i].DirIndex); - MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0) - MCOS->EmitIntValue(0, 1); // filesize (always 0) - } - MCOS->EmitIntValue(0, 1); // Terminate the file list + // Put out the directory and file tables. The formats vary depending on + // the version. + if (LineTableVersion >= 5) + emitV5FileDirTables(MCOS, MCDwarfDirs, MCDwarfFiles, CompilationDir, + HasMD5); + else + emitV2FileDirTables(MCOS, MCDwarfDirs, MCDwarfFiles); // This is the end of the prologue, so set the value of the symbol at the // end of the prologue (that was used in a previous expression). @@ -344,12 +419,14 @@ void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS, } unsigned MCDwarfLineTable::getFile(StringRef &Directory, StringRef &FileName, + MD5::MD5Result *Checksum, unsigned FileNumber) { - return Header.getFile(Directory, FileName, FileNumber); + return Header.getFile(Directory, FileName, Checksum, FileNumber); } unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory, StringRef &FileName, + MD5::MD5Result *Checksum, unsigned FileNumber) { if (Directory == CompilationDir) Directory = ""; @@ -370,7 +447,8 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory, return IterBool.first->second; } // Make space for this FileNumber in the MCDwarfFiles vector if needed. - MCDwarfFiles.resize(FileNumber + 1); + if (FileNumber >= MCDwarfFiles.size()) + MCDwarfFiles.resize(FileNumber + 1); // Get the new MCDwarfFile slot for this FileNumber. MCDwarfFile &File = MCDwarfFiles[FileNumber]; @@ -379,6 +457,10 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory, if (!File.Name.empty()) return 0; + // If any files have an MD5 checksum, they all must. + if (FileNumber > 1) + assert(HasMD5 == (Checksum != nullptr)); + if (Directory.empty()) { // Separate the directory part from the basename of the FileName. StringRef tFileName = sys::path::filename(FileName); @@ -412,6 +494,9 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory, File.Name = FileName; File.DirIndex = DirIndex; + File.Checksum = Checksum; + if (Checksum) + HasMD5 = true; // return the allocated FileNumber. return FileNumber; @@ -1057,8 +1142,8 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { unsigned Reg1 = Instr.getRegister(); unsigned Reg2 = Instr.getRegister2(); if (!IsEH) { - Reg1 = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg1, true), false); - Reg2 = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg2, true), false); + Reg1 = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg1); + Reg2 = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg2); } Streamer.EmitIntValue(dwarf::DW_CFA_register, 1); Streamer.EmitULEB128IntValue(Reg1); @@ -1094,7 +1179,7 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpDefCfa: { unsigned Reg = Instr.getRegister(); if (!IsEH) - Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false); + Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1); Streamer.EmitULEB128IntValue(Reg); CFAOffset = -Instr.getOffset(); @@ -1105,7 +1190,7 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpDefCfaRegister: { unsigned Reg = Instr.getRegister(); if (!IsEH) - Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false); + Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg); Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1); Streamer.EmitULEB128IntValue(Reg); @@ -1118,7 +1203,7 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { unsigned Reg = Instr.getRegister(); if (!IsEH) - Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false); + Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg); int Offset = Instr.getOffset(); if (IsRelative) @@ -1154,7 +1239,7 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) { case MCCFIInstruction::OpRestore: { unsigned Reg = Instr.getRegister(); if (!IsEH) - Reg = MRI->getDwarfRegNum(MRI->getLLVMRegNum(Reg, true), false); + Reg = MRI->getDwarfRegNumFromDwarfEHRegNum(Reg); Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1); return; } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 38a8af49c194..f8fff4414f49 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -224,6 +224,13 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_SBREL: return "sbrel"; case VK_ARM_TLSLDO: return "tlsldo"; case VK_ARM_TLSDESCSEQ: return "tlsdescseq"; + case VK_AVR_NONE: return "none"; + case VK_AVR_LO8: return "lo8"; + case VK_AVR_HI8: return "hi8"; + case VK_AVR_HLO8: return "hlo8"; + case VK_AVR_DIFF8: return "diff8"; + case VK_AVR_DIFF16: return "diff16"; + case VK_AVR_DIFF32: return "diff32"; case VK_PPC_LO: return "l"; case VK_PPC_HI: return "h"; case VK_PPC_HA: return "ha"; @@ -389,6 +396,9 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("prel31", VK_ARM_PREL31) .Case("sbrel", VK_ARM_SBREL) .Case("tlsldo", VK_ARM_TLSLDO) + .Case("lo8", VK_AVR_LO8) + .Case("hi8", VK_AVR_HI8) + .Case("hlo8", VK_AVR_HLO8) .Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO) .Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI) .Case("rel32@lo", VK_AMDGPU_REL32_LO) diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index a5c1b13df7ce..3969143bb2c7 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -88,6 +88,8 @@ class MCMachOStreamer : public MCObjectStreamer { void EmitDataRegion(MCDataRegionType Kind) override; void EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor, unsigned Update) override; + void EmitBuildVersion(unsigned Platform, unsigned Major, + unsigned Minor, unsigned Update) override; void EmitThumbFunc(MCSymbol *Func) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; @@ -265,7 +267,13 @@ void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) { void MCMachOStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major, unsigned Minor, unsigned Update) { - getAssembler().setVersionMinInfo(Kind, Major, Minor, Update); + getAssembler().setVersionMin(Kind, Major, Minor, Update); +} + +void MCMachOStreamer::EmitBuildVersion(unsigned Platform, unsigned Major, + unsigned Minor, unsigned Update) { + getAssembler().setBuildVersion((MachO::PlatformType)Platform, Major, Minor, + Update); } void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { @@ -403,29 +411,19 @@ void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { - getAssembler().registerSection(*Section); - - // The symbol may not be present, which only creates the section. - if (!Symbol) - return; - // On darwin all virtual sections have zerofill type. assert(Section->isVirtualSection() && "Section does not have zerofill type!"); - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + PushSection(); + SwitchSection(Section); - getAssembler().registerSymbol(*Symbol); - - // Emit an align fragment if necessary. - if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section); - - MCFragment *F = new MCFillFragment(0, Size, Section); - Symbol->setFragment(F); - - // Update the maximum alignment on the zero fill section if necessary. - if (ByteAlignment > Section->getAlignment()) - Section->setAlignment(ByteAlignment); + // The symbol may not be present, which only creates the section. + if (Symbol) { + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); + EmitZeros(Size); + } + PopSection(); } // This should always be called with the thread local bss section. Like the @@ -494,26 +492,8 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCMachOStreamer *S = new MCMachOStreamer(Context, std::move(MAB), OS, std::move(CE), DWARFMustBeAtTheEnd, LabelSections); - const Triple &TT = Context.getObjectFileInfo()->getTargetTriple(); - if (TT.isOSDarwin()) { - unsigned Major, Minor, Update; - TT.getOSVersion(Major, Minor, Update); - // If there is a version specified, Major will be non-zero. - if (Major) { - MCVersionMinType VersionType; - if (TT.isWatchOS()) - VersionType = MCVM_WatchOSVersionMin; - else if (TT.isTvOS()) - VersionType = MCVM_TvOSVersionMin; - else if (TT.isMacOSX()) - VersionType = MCVM_OSXVersionMin; - else { - assert(TT.isiOS() && "Must only be iOS platform left"); - VersionType = MCVM_IOSVersionMin; - } - S->EmitVersionMin(VersionType, Major, Minor, Update); - } - } + const Triple &Target = Context.getObjectFileInfo()->getTargetTriple(); + S->EmitVersionForTarget(Target); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 4db9a2c8d8de..ccf658e1d135 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -7,9 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index d8077df14698..f0f4dee8fc14 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -185,6 +185,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { COFFDebugSymbolsSection = nullptr; COFFDebugTypesSection = nullptr; + COFFGlobalTypeHashesSection = nullptr; if (useCompactUnwind(T)) { CompactUnwindSection = @@ -594,6 +595,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) { EHFrameSection = Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags); + + StackSizesSection = Ctx->getELFSection(".stack_sizes", ELF::SHT_PROGBITS, 0); } void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { @@ -653,6 +656,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ), SectionKind::getMetadata()); + COFFGlobalTypeHashesSection = Ctx->getCOFFSection( + ".debug$H", + (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ), + SectionKind::getMetadata()); DwarfAbbrevSection = Ctx->getCOFFSection( ".debug_abbrev", @@ -811,6 +819,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); + GFIDsSection = Ctx->getCOFFSection(".gfids$y", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + TLSDataSection = Ctx->getCOFFSection( ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index f226c2f0a308..230c02188a6d 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -10,7 +10,6 @@ #include "llvm/MC/MCObjectStreamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCCodeView.h" @@ -22,7 +21,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, @@ -579,28 +577,13 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name, return false; } -void MCObjectStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - assert(getCurrentSectionOnly() && "need a section"); - insert(new MCFillFragment(FillValue, NumBytes)); -} - void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, SMLoc Loc) { MCDataFragment *DF = getOrCreateDataFragment(); flushPendingLabels(DF, DF->getContents().size()); - int64_t IntNumBytes; - if (!NumBytes.evaluateAsAbsolute(IntNumBytes, getAssembler())) { - getContext().reportError(Loc, "expected absolute expression"); - return; - } - - if (IntNumBytes <= 0) { - getContext().reportError(Loc, "invalid number of bytes"); - return; - } - - emitFill(IntNumBytes, FillValue); + assert(getCurrentSectionOnly() && "need a section"); + insert(new MCFillFragment(FillValue, NumBytes, Loc)); } void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, @@ -618,7 +601,13 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, return; } - MCStreamer::emitFill(IntNumValues, Size, Expr); + int64_t NonZeroSize = Size > 4 ? 4 : Size; + Expr &= ~0ULL >> (64 - NonZeroSize * 8); + for (uint64_t i = 0, e = IntNumValues; i != e; ++i) { + EmitIntValue(Expr, NonZeroSize); + if (NonZeroSize < Size) + EmitIntValue(0, Size - NonZeroSize); + } } void MCObjectStreamer::EmitFileDirective(StringRef Filename) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 2259136c6ec4..17ad4e561e30 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" @@ -3294,8 +3295,8 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { } /// parseDirectiveFile -/// ::= .file [number] filename -/// ::= .file number directory filename +/// ::= .file filename +/// ::= .file number [directory] filename [md5 checksum] bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { // FIXME: I'm not sure what this is. int64_t FileNumber = -1; @@ -3331,19 +3332,43 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { Filename = Path; } - if (parseToken(AsmToken::EndOfStatement, - "unexpected token in '.file' directive")) - return true; + std::string Checksum; + if (!parseOptionalToken(AsmToken::EndOfStatement)) { + StringRef Keyword; + if (check(getTok().isNot(AsmToken::Identifier), + "unexpected token in '.file' directive") || + parseIdentifier(Keyword) || + check(Keyword != "md5", "unexpected token in '.file' directive")) + return true; + if (getLexer().is(AsmToken::String) && + check(FileNumber == -1, "MD5 checksum specified, but no file number")) + return true; + if (check(getTok().isNot(AsmToken::String), + "unexpected token in '.file' directive") || + parseEscapedString(Checksum) || + check(Checksum.size() != 32, "invalid MD5 checksum specified") || + parseToken(AsmToken::EndOfStatement, + "unexpected token in '.file' directive")) + return true; + } if (FileNumber == -1) getStreamer().EmitFileDirective(Filename); else { + MD5::MD5Result *CKMem = nullptr; + if (!Checksum.empty()) { + Checksum = fromHex(Checksum); + if (check(Checksum.size() != 16, "invalid MD5 checksum specified")) + return true; + CKMem = (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1); + memcpy(&CKMem->Bytes, Checksum.data(), 16); + } // If there is -g option as well as debug info from directive file, // we turn off -g option, directly use the existing debug info instead. if (getContext().getGenDwarfForAssembly()) getContext().setGenDwarfForAssembly(false); - else if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename) == - 0) + else if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, + Filename, CKMem) == 0) return Error(FileNumberLoc, "file number already allocated"); } diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 687e0cc1faa5..2a754eab05bb 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -65,8 +65,9 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); - addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymIdx>(".symidx"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSafeSEH>(".safeseh"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecIdx>(".secidx"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); // Win64 EH directives. @@ -130,6 +131,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseDirectiveSecRel32(StringRef, SMLoc); bool ParseDirectiveSecIdx(StringRef, SMLoc); bool ParseDirectiveSafeSEH(StringRef, SMLoc); + bool ParseDirectiveSymIdx(StringRef, SMLoc); bool parseCOMDATType(COFF::COMDATType &Type); bool ParseDirectiveLinkOnce(StringRef, SMLoc); @@ -520,6 +522,21 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { return false; } +bool COFFAsmParser::ParseDirectiveSymIdx(StringRef, SMLoc) { + StringRef SymbolID; + if (getParser().parseIdentifier(SymbolID)) + return TokError("expected identifier in directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); + + Lex(); + getStreamer().EmitCOFFSymbolIndex(Symbol); + return false; +} + /// ::= [ identifier ] bool COFFAsmParser::parseCOMDATType(COFF::COMDATType &Type) { StringRef TypeId = getTok().getIdentifier(); diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index f4152a9067a0..5bbf49290f17 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -54,7 +54,7 @@ class DarwinAsmParser : public MCAsmParserExtension { unsigned TAA = 0, unsigned ImplicitAlign = 0, unsigned StubSize = 0); - SMLoc LastVersionMinDirective; + SMLoc LastVersionDirective; public: DarwinAsmParser() = default; @@ -186,14 +186,17 @@ class DarwinAsmParser : public MCAsmParserExtension { addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveTLV>(".tlv"); addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveIdent>(".ident"); - addDirectiveHandler<&DarwinAsmParser::parseVersionMin>( + addDirectiveHandler<&DarwinAsmParser::parseWatchOSVersionMin>( ".watchos_version_min"); - addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".tvos_version_min"); - addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".ios_version_min"); - addDirectiveHandler<&DarwinAsmParser::parseVersionMin>( + addDirectiveHandler<&DarwinAsmParser::parseTvOSVersionMin>( + ".tvos_version_min"); + addDirectiveHandler<&DarwinAsmParser::parseIOSVersionMin>( + ".ios_version_min"); + addDirectiveHandler<&DarwinAsmParser::parseMacOSXVersionMin>( ".macosx_version_min"); + addDirectiveHandler<&DarwinAsmParser::parseBuildVersion>(".build_version"); - LastVersionMinDirective = SMLoc(); + LastVersionDirective = SMLoc(); } bool parseDirectiveAltEntry(StringRef, SMLoc); @@ -441,7 +444,24 @@ class DarwinAsmParser : public MCAsmParserExtension { MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); } - bool parseVersionMin(StringRef, SMLoc); + bool parseWatchOSVersionMin(StringRef Directive, SMLoc Loc) { + return parseVersionMin(Directive, Loc, MCVM_WatchOSVersionMin); + } + bool parseTvOSVersionMin(StringRef Directive, SMLoc Loc) { + return parseVersionMin(Directive, Loc, MCVM_TvOSVersionMin); + } + bool parseIOSVersionMin(StringRef Directive, SMLoc Loc) { + return parseVersionMin(Directive, Loc, MCVM_IOSVersionMin); + } + bool parseMacOSXVersionMin(StringRef Directive, SMLoc Loc) { + return parseVersionMin(Directive, Loc, MCVM_OSXVersionMin); + } + + bool parseBuildVersion(StringRef Directive, SMLoc Loc); + bool parseVersionMin(StringRef Directive, SMLoc Loc, MCVersionMinType Type); + bool parseVersion(unsigned *Major, unsigned *Minor, unsigned *Update); + void checkVersion(StringRef Directive, StringRef Arg, SMLoc Loc, + Triple::OSType ExpectedOS); }; } // end anonymous namespace @@ -978,70 +998,144 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) { return false; } -/// parseVersionMin -/// ::= .ios_version_min major,minor[,update] -/// ::= .macosx_version_min major,minor[,update] -bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc) { - int64_t Major = 0, Minor = 0, Update = 0; - int Kind = StringSwitch(Directive) - .Case(".watchos_version_min", MCVM_WatchOSVersionMin) - .Case(".tvos_version_min", MCVM_TvOSVersionMin) - .Case(".ios_version_min", MCVM_IOSVersionMin) - .Case(".macosx_version_min", MCVM_OSXVersionMin); +/// parseVersion ::= major, minor [, update] +bool DarwinAsmParser::parseVersion(unsigned *Major, unsigned *Minor, + unsigned *Update) { // Get the major version number. if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid OS major version number, integer expected"); + int64_t MajorVal = getLexer().getTok().getIntVal(); + if (MajorVal > 65535 || MajorVal <= 0) return TokError("invalid OS major version number"); - Major = getLexer().getTok().getIntVal(); - if (Major > 65535 || Major <= 0) - return TokError("invalid OS major version number"); + *Major = (unsigned)MajorVal; Lex(); if (getLexer().isNot(AsmToken::Comma)) - return TokError("minor OS version number required, comma expected"); + return TokError("OS minor version number required, comma expected"); Lex(); // Get the minor version number. if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid OS minor version number, integer expected"); + int64_t MinorVal = getLexer().getTok().getIntVal(); + if (MinorVal > 255 || MinorVal < 0) return TokError("invalid OS minor version number"); - Minor = getLexer().getTok().getIntVal(); - if (Minor > 255 || Minor < 0) - return TokError("invalid OS minor version number"); + *Minor = MinorVal; Lex(); + // Get the update level, if specified - if (getLexer().isNot(AsmToken::EndOfStatement)) { - if (getLexer().isNot(AsmToken::Comma)) - return TokError("invalid update specifier, comma expected"); - Lex(); - if (getLexer().isNot(AsmToken::Integer)) - return TokError("invalid OS update number"); - Update = getLexer().getTok().getIntVal(); - if (Update > 255 || Update < 0) - return TokError("invalid OS update number"); - Lex(); + *Update = 0; + if (getLexer().is(AsmToken::EndOfStatement)) + return false; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("invalid OS update specifier, comma expected"); + Lex(); + if (getLexer().isNot(AsmToken::Integer)) + return TokError("invalid OS update version number, integer expected"); + int64_t UpdateVal = getLexer().getTok().getIntVal(); + if (UpdateVal > 255 || UpdateVal < 0) + return TokError("invalid OS update version number"); + *Update = UpdateVal; + Lex(); + return false; +} + +void DarwinAsmParser::checkVersion(StringRef Directive, StringRef Arg, + SMLoc Loc, Triple::OSType ExpectedOS) { + const Triple &Target = getContext().getObjectFileInfo()->getTargetTriple(); + if (Target.getOS() != ExpectedOS) + Warning(Loc, Twine(Directive) + + (Arg.empty() ? Twine() : Twine(' ') + Arg) + + " used while targeting " + Target.getOSName()); + + if (LastVersionDirective.isValid()) { + Warning(Loc, "overriding previous version directive"); + Note(LastVersionDirective, "previous definition is here"); } + LastVersionDirective = Loc; +} - const Triple &T = getContext().getObjectFileInfo()->getTargetTriple(); - Triple::OSType ExpectedOS = Triple::UnknownOS; - switch ((MCVersionMinType)Kind) { - case MCVM_WatchOSVersionMin: ExpectedOS = Triple::WatchOS; break; - case MCVM_TvOSVersionMin: ExpectedOS = Triple::TvOS; break; - case MCVM_IOSVersionMin: ExpectedOS = Triple::IOS; break; - case MCVM_OSXVersionMin: ExpectedOS = Triple::MacOSX; break; +static Triple::OSType getOSTypeFromMCVM(MCVersionMinType Type) { + switch (Type) { + case MCVM_WatchOSVersionMin: return Triple::WatchOS; + case MCVM_TvOSVersionMin: return Triple::TvOS; + case MCVM_IOSVersionMin: return Triple::IOS; + case MCVM_OSXVersionMin: return Triple::MacOSX; } - if (T.getOS() != ExpectedOS) - Warning(Loc, Directive + " should only be used for " + - Triple::getOSTypeName(ExpectedOS) + " targets"); + llvm_unreachable("Invalid mc version min type"); +} + +/// parseVersionMin +/// ::= .ios_version_min parseVersion +/// | .macosx_version_min parseVersion +/// | .tvos_version_min parseVersion +/// | .watchos_version_min parseVersion +bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc, + MCVersionMinType Type) { + unsigned Major; + unsigned Minor; + unsigned Update; + if (parseVersion(&Major, &Minor, &Update)) + return true; + + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(Twine(" in '") + Directive + "' directive"); + + Triple::OSType ExpectedOS = getOSTypeFromMCVM(Type); + checkVersion(Directive, StringRef(), Loc, ExpectedOS); + + getStreamer().EmitVersionMin(Type, Major, Minor, Update); + return false; +} - if (LastVersionMinDirective.isValid()) { - Warning(Loc, "overriding previous version_min directive"); - Note(LastVersionMinDirective, "previous definition is here"); +static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) { + switch (Type) { + case MachO::PLATFORM_MACOS: return Triple::MacOSX; + case MachO::PLATFORM_IOS: return Triple::IOS; + case MachO::PLATFORM_TVOS: return Triple::TvOS; + case MachO::PLATFORM_WATCHOS: return Triple::WatchOS; + case MachO::PLATFORM_BRIDGEOS: /* silence warning */break; } - LastVersionMinDirective = Loc; + llvm_unreachable("Invalid mach-o platform type"); +} - // We've parsed a correct version specifier, so send it to the streamer. - getStreamer().EmitVersionMin((MCVersionMinType)Kind, Major, Minor, Update); +/// parseBuildVersion +/// ::= .build_version (macos|ios|tvos|watchos), parseVersion +bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) { + StringRef PlatformName; + SMLoc PlatformLoc = getTok().getLoc(); + if (getParser().parseIdentifier(PlatformName)) + return TokError("platform name expected"); + + unsigned Platform = StringSwitch(PlatformName) + .Case("macos", MachO::PLATFORM_MACOS) + .Case("ios", MachO::PLATFORM_IOS) + .Case("tvos", MachO::PLATFORM_TVOS) + .Case("watchos", MachO::PLATFORM_WATCHOS) + .Default(0); + if (Platform == 0) + return Error(PlatformLoc, "unknown platform name"); + if (getLexer().isNot(AsmToken::Comma)) + return TokError("version number required, comma expected"); + Lex(); + + unsigned Major; + unsigned Minor; + unsigned Update; + if (parseVersion(&Major, &Minor, &Update)) + return true; + + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in '.build_version' directive"); + + Triple::OSType ExpectedOS + = getOSTypeFromPlatform((MachO::PlatformType)Platform); + checkVersion(Directive, PlatformName, Loc, ExpectedOS); + + getStreamer().EmitBuildVersion(Platform, Major, Minor, Update); return false; } + namespace llvm { MCAsmParserExtension *createDarwinAsmParser() { diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 38720c23ff26..c634df99a115 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -423,13 +423,17 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) { if (L.isNot(AsmToken::Comma)) return TokError("expected group name"); Lex(); - if (getParser().parseIdentifier(GroupName)) - return true; + if (L.is(AsmToken::Integer)) { + GroupName = getTok().getString(); + Lex(); + } else if (getParser().parseIdentifier(GroupName)) { + return TokError("invalid group name"); + } if (L.is(AsmToken::Comma)) { Lex(); StringRef Linkage; if (getParser().parseIdentifier(Linkage)) - return true; + return TokError("invalid linkage"); if (Linkage != "comdat") return TokError("Linkage must be 'comdat'"); } @@ -443,7 +447,7 @@ bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) { Lex(); StringRef Name; if (getParser().parseIdentifier(Name)) - return true; + return TokError("invalid metadata symbol"); Associated = dyn_cast_or_null(getContext().lookupSymbol(Name)); if (!Associated || !Associated->isInSection()) return TokError("symbol is not in a section: " + Name); diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp index 0f76c1838b51..8e47963b4418 100644 --- a/lib/MC/MCRegisterInfo.cpp +++ b/lib/MC/MCRegisterInfo.cpp @@ -88,6 +88,34 @@ int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { return I->ToReg; } +int MCRegisterInfo::getLLVMRegNumFromEH(unsigned RegNum) const { + const DwarfLLVMRegPair *M = EHDwarf2LRegs; + unsigned Size = EHDwarf2LRegsSize; + + if (!M) + return -1; + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + if (I == M+Size || I->FromReg != RegNum) + return -1; + return I->ToReg; +} + +int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const { + // On ELF platforms, DWARF EH register numbers are the same as DWARF + // other register numbers. On Darwin x86, they differ and so need to be + // mapped. The .cfi_* directives accept integer literals as well as + // register names and should generate exactly what the assembly code + // asked for, so there might be DWARF/EH register numbers that don't have + // a corresponding LLVM register number at all. So if we can't map the + // EH register number to an LLVM register number, assume it's just a + // valid DWARF register number as is. + int LRegNum = getLLVMRegNumFromEH(RegNum); + if (LRegNum != -1) + return getDwarfRegNum(LRegNum, false); + return RegNum; +} + int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const { const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); if (I == L2SEHRegs.end()) return (int)RegNum; diff --git a/lib/MC/MCSectionWasm.cpp b/lib/MC/MCSectionWasm.cpp index c61f28e129f5..626027a24f97 100644 --- a/lib/MC/MCSectionWasm.cpp +++ b/lib/MC/MCSectionWasm.cpp @@ -9,7 +9,6 @@ #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 4067df0eaf57..9dcd1e9101cf 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -49,6 +49,28 @@ void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {} void MCTargetStreamer::finish() {} +void MCTargetStreamer::changeSection(const MCSection *CurSection, + MCSection *Section, + const MCExpr *Subsection, + raw_ostream &OS) { + Section->PrintSwitchToSection( + *Streamer.getContext().getAsmInfo(), + Streamer.getContext().getObjectFileInfo()->getTargetTriple(), OS, + Subsection); +} + +void MCTargetStreamer::emitDwarfFileDirective(StringRef Directive) { + Streamer.EmitRawText(Directive); +} + +void MCTargetStreamer::emitValue(const MCExpr *Value) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + + Value->print(OS, Streamer.getContext().getAsmInfo()); + Streamer.EmitRawText(OS.str()); +} + void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} MCStreamer::MCStreamer(MCContext &Ctx) @@ -162,18 +184,7 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { /// Emit NumBytes bytes worth of the value specified by FillValue. /// This implements directives such as '.space'. void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) { - for (uint64_t i = 0, e = NumBytes; i != e; ++i) - EmitIntValue(FillValue, 1); -} - -void MCStreamer::emitFill(uint64_t NumValues, int64_t Size, int64_t Expr) { - int64_t NonZeroSize = Size > 4 ? 4 : Size; - Expr &= ~0ULL >> (64 - NonZeroSize * 8); - for (uint64_t i = 0, e = NumValues; i != e; ++i) { - EmitIntValue(Expr, NonZeroSize); - if (NonZeroSize < Size) - EmitIntValue(0, Size - NonZeroSize); - } + emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue); } /// The implementation in this class just redirects to emitFill. @@ -183,8 +194,10 @@ void MCStreamer::EmitZeros(uint64_t NumBytes) { unsigned MCStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename, unsigned CUID) { - return getContext().getDwarfFile(Directory, Filename, FileNo, CUID); + StringRef Filename, + MD5::MD5Result *Checksum, + unsigned CUID) { + return getContext().getDwarfFile(Directory, Filename, FileNo, Checksum, CUID); } void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, @@ -782,6 +795,8 @@ void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { } +void MCStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) {} + void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { } @@ -959,3 +974,32 @@ MCSymbol *MCStreamer::endSection(MCSection *Section) { EmitLabel(Sym); return Sym; } + +void MCStreamer::EmitVersionForTarget(const Triple &Target) { + if (!Target.isOSBinFormatMachO() || !Target.isOSDarwin()) + return; + // Do we even know the version? + if (Target.getOSMajorVersion() == 0) + return; + + unsigned Major; + unsigned Minor; + unsigned Update; + MCVersionMinType VersionType; + if (Target.isWatchOS()) { + VersionType = MCVM_WatchOSVersionMin; + Target.getWatchOSVersion(Major, Minor, Update); + } else if (Target.isTvOS()) { + VersionType = MCVM_TvOSVersionMin; + Target.getiOSVersion(Major, Minor, Update); + } else if (Target.isMacOSX()) { + VersionType = MCVM_OSXVersionMin; + if (!Target.getMacOSXVersion(Major, Minor, Update)) + Major = 0; + } else { + VersionType = MCVM_IOSVersionMin; + Target.getiOSVersion(Major, Minor, Update); + } + if (Major != 0) + EmitVersionMin(VersionType, Major, Minor, Update); +} diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp index 67449eb6dcf9..12c724f6b1ee 100644 --- a/lib/MC/MCSymbolELF.cpp +++ b/lib/MC/MCSymbolELF.cpp @@ -9,7 +9,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCFixupKindInfo.h" namespace llvm { diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp index 287b7cf7b23f..d9cefbd3994f 100644 --- a/lib/MC/MCWasmStreamer.cpp +++ b/lib/MC/MCWasmStreamer.cpp @@ -15,16 +15,13 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbol.h" @@ -98,10 +95,13 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: case MCSA_IndirectSymbol: - case MCSA_Hidden: case MCSA_Protected: return false; + case MCSA_Hidden: + Symbol->setHidden(true); + break; + case MCSA_Weak: case MCSA_WeakReference: Symbol->setWeak(true); diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 44dd8f1385a0..1407f25e6f2a 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -11,8 +11,6 @@ #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Win64EH.h" diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index c2583d95c5ed..efb60b7a03bf 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -193,6 +193,17 @@ void MCWinCOFFStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { << COFF::SCT_COMPLEX_TYPE_SHIFT); } +void MCWinCOFFStreamer::EmitCOFFSymbolIndex(MCSymbol const *Symbol) { + MCSection *Sec = getCurrentSectionOnly(); + getAssembler().registerSection(*Sec); + if (Sec->getAlignment() < 4) + Sec->setAlignment(4); + + new MCSymbolIdFragment(Symbol, getCurrentSectionOnly()); + + getAssembler().registerSymbol(*Symbol); +} + void MCWinCOFFStreamer::EmitCOFFSectionIndex(const MCSymbol *Symbol) { visitUsedSymbol(*Symbol); MCDataFragment *DF = getOrCreateDataFragment(); @@ -257,20 +268,13 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size, auto *Symbol = cast(S); MCSection *Section = getContext().getObjectFileInfo()->getBSSSection(); - getAssembler().registerSection(*Section); - if (Section->getAlignment() < ByteAlignment) - Section->setAlignment(ByteAlignment); - - getAssembler().registerSymbol(*Symbol); + PushSection(); + SwitchSection(Section); + EmitValueToAlignment(ByteAlignment, 0, 1, 0); + EmitLabel(Symbol); Symbol->setExternal(false); - - if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0, - ByteAlignment, Section); - - MCFillFragment *Fragment = new MCFillFragment( - /*Value=*/0, Size, Section); - Symbol->setFragment(Fragment); + EmitZeros(Size); + PopSection(); } void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol, diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 7dbb84e166f2..c7eaa76ace3c 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -721,6 +721,16 @@ bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( return false; } +static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) { + switch (Type) { + case MCVM_OSXVersionMin: return MachO::LC_VERSION_MIN_MACOSX; + case MCVM_IOSVersionMin: return MachO::LC_VERSION_MIN_IPHONEOS; + case MCVM_TvOSVersionMin: return MachO::LC_VERSION_MIN_TVOS; + case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS; + } + llvm_unreachable("Invalid mc version min type"); +} + void MachObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { // Compute symbol table information and bind symbol indices. @@ -728,8 +738,8 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, UndefinedSymbolData); unsigned NumSections = Asm.size(); - const MCAssembler::VersionMinInfoType &VersionInfo = - Layout.getAssembler().getVersionMinInfo(); + const MCAssembler::VersionInfoType &VersionInfo = + Layout.getAssembler().getVersionInfo(); // The section data starts after the header, the segment load command (and // section headers) and the symbol table. @@ -741,7 +751,10 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, // Add the deployment target version info load command size, if used. if (VersionInfo.Major != 0) { ++NumLoadCommands; - LoadCommandsSize += sizeof(MachO::version_min_command); + if (VersionInfo.EmitBuildVersion) + LoadCommandsSize += sizeof(MachO::build_version_command); + else + LoadCommandsSize += sizeof(MachO::version_min_command); } // Add the data-in-code load command size, if used. @@ -832,25 +845,22 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, assert(VersionInfo.Major < 65536 && "unencodable major target version"); uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) | (VersionInfo.Major << 16); - MachO::LoadCommandType LCType; - switch (VersionInfo.Kind) { - case MCVM_OSXVersionMin: - LCType = MachO::LC_VERSION_MIN_MACOSX; - break; - case MCVM_IOSVersionMin: - LCType = MachO::LC_VERSION_MIN_IPHONEOS; - break; - case MCVM_TvOSVersionMin: - LCType = MachO::LC_VERSION_MIN_TVOS; - break; - case MCVM_WatchOSVersionMin: - LCType = MachO::LC_VERSION_MIN_WATCHOS; - break; + if (VersionInfo.EmitBuildVersion) { + // FIXME: Currently empty tools. Add clang version in the future. + write32(MachO::LC_BUILD_VERSION); + write32(sizeof(MachO::build_version_command)); + write32(VersionInfo.TypeOrPlatform.Platform); + write32(EncodedVersion); + write32(0); // SDK version. + write32(0); // Empty tools list. + } else { + MachO::LoadCommandType LCType + = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type); + write32(LCType); + write32(sizeof(MachO::version_min_command)); + write32(EncodedVersion); + write32(0); // reserved. } - write32(LCType); - write32(sizeof(MachO::version_min_command)); - write32(EncodedVersion); - write32(0); // reserved. } // Write the data-in-code load command, if used. diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 229708425b17..473f9fe7ede2 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -15,13 +15,11 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbolWasm.h" @@ -40,6 +38,10 @@ using namespace llvm; namespace { +// Went we ceate the indirect function table we start at 1, so that there is +// and emtpy slot at 0 and therefore calling a null function pointer will trap. +static const uint32_t kInitialTableOffset = 1; + // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. @@ -115,6 +117,7 @@ struct WasmImport { StringRef FieldName; unsigned Kind; int32_t Type; + bool IsMutable; }; // A wasm function to be written into the function section. @@ -139,6 +142,14 @@ struct WasmGlobal { uint32_t ImportIndex; }; +// Information about a single item which is part of a COMDAT. For each data +// segment or function which is in the COMDAT, there is a corresponding +// WasmComdatEntry. +struct WasmComdatEntry { + unsigned Kind; + uint32_t Index; +}; + // Information about a single relocation. struct WasmRelocationEntry { uint64_t Offset; // Where is the relocation. @@ -214,6 +225,7 @@ class WasmObjectWriter : public MCObjectWriter { FunctionTypeIndices; SmallVector FunctionTypes; SmallVector Globals; + unsigned NumFunctionImports = 0; unsigned NumGlobalImports = 0; // TargetObjectWriter wrappers. @@ -232,9 +244,9 @@ class WasmObjectWriter : public MCObjectWriter { : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(std::move(MOTW)) {} -private: ~WasmObjectWriter() override; +private: void reset() override { CodeRelocations.clear(); DataRelocations.clear(); @@ -245,6 +257,7 @@ class WasmObjectWriter : public MCObjectWriter { FunctionTypes.clear(); Globals.clear(); MCObjectWriter::reset(); + NumFunctionImports = 0; NumGlobalImports = 0; } @@ -269,10 +282,9 @@ class WasmObjectWriter : public MCObjectWriter { } void writeTypeSection(ArrayRef FunctionTypes); - void writeImportSection(ArrayRef Imports); + void writeImportSection(ArrayRef Imports, uint32_t DataSize, + uint32_t NumElements); void writeFunctionSection(ArrayRef Functions); - void writeTableSection(uint32_t NumElements); - void writeMemorySection(uint32_t DataSize); void writeGlobalSection(); void writeExportSection(ArrayRef Exports); void writeElemSection(ArrayRef TableElems); @@ -280,14 +292,14 @@ class WasmObjectWriter : public MCObjectWriter { ArrayRef Functions); void writeDataSection(ArrayRef Segments); void writeNameSection(ArrayRef Functions, - ArrayRef Imports, - uint32_t NumFuncImports); + ArrayRef Imports); void writeCodeRelocSection(); void writeDataRelocSection(); void writeLinkingMetaDataSection( ArrayRef Segments, uint32_t DataSize, - SmallVector, 4> SymbolFlags, - bool HasStackPointer, uint32_t StackPointerGlobal); + ArrayRef> SymbolFlags, + ArrayRef> InitFuncs, + const std::map>& Comdats); uint32_t getProvisionalValue(const WasmRelocationEntry &RelEntry); void applyRelocations(ArrayRef Relocations, @@ -369,6 +381,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); + // The .init_array isn't translated as data, so don't do relocations in it. + if (FixupSection.getSectionName().startswith(".init_array")) + return; + if (const MCSymbolRefExpr *RefB = Target.getSymB()) { assert(RefB->getKind() == MCSymbolRefExpr::VK_None && "Should not have constructed this"); @@ -490,9 +506,9 @@ uint32_t WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) { const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol); - // For undefined symbols, use a hopefully invalid value. - if (!Sym->isDefined(/*SetUsed=*/false)) - return UINT32_MAX; + // For undefined symbols, use zero + if (!Sym->isDefined()) + return 0; uint32_t GlobalIndex = SymbolIndices[Sym]; const WasmGlobal& Global = Globals[GlobalIndex - NumGlobalImports]; @@ -526,7 +542,10 @@ static void addData(SmallVectorImpl &DataBytes, Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast(&Frag)) { - DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue()); + int64_t Size; + if (!Fill->getSize().evaluateAsAbsolute(Size)) + llvm_unreachable("The fill should be an assembler constant"); + DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); } else { const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); @@ -551,7 +570,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: if (!IndirectSymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found table index space: " + + report_fatal_error("symbol not found in table index space: " + RelEntry.Symbol->getName()); return IndirectSymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: @@ -560,7 +579,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: if (!SymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found function/global index space: " + + report_fatal_error("symbol not found in function/global index space: " + RelEntry.Symbol->getName()); return SymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: @@ -661,10 +680,14 @@ void WasmObjectWriter::writeTypeSection( endSection(Section); } -void WasmObjectWriter::writeImportSection(ArrayRef Imports) { +void WasmObjectWriter::writeImportSection(ArrayRef Imports, + uint32_t DataSize, + uint32_t NumElements) { if (Imports.empty()) return; + uint32_t NumPages = (DataSize + wasm::WasmPageSize - 1) / wasm::WasmPageSize; + SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_IMPORT); @@ -681,7 +704,16 @@ void WasmObjectWriter::writeImportSection(ArrayRef Imports) { break; case wasm::WASM_EXTERNAL_GLOBAL: encodeSLEB128(int32_t(Import.Type), getStream()); - encodeULEB128(0, getStream()); // mutability + encodeULEB128(int32_t(Import.IsMutable), getStream()); + break; + case wasm::WASM_EXTERNAL_MEMORY: + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumPages, getStream()); // initial + break; + case wasm::WASM_EXTERNAL_TABLE: + encodeSLEB128(int32_t(Import.Type), getStream()); + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumElements, getStream()); // initial break; default: llvm_unreachable("unsupported import kind"); @@ -705,39 +737,6 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef Functions) { endSection(Section); } -void WasmObjectWriter::writeTableSection(uint32_t NumElements) { - // For now, always emit the table section, since indirect calls are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no indirect calls. - - SectionBookkeeping Section; - startSection(Section, wasm::WASM_SEC_TABLE); - - encodeULEB128(1, getStream()); // The number of tables. - // Fixed to 1 for now. - encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table - encodeULEB128(0, getStream()); // flags - encodeULEB128(NumElements, getStream()); // initial - - endSection(Section); -} - -void WasmObjectWriter::writeMemorySection(uint32_t DataSize) { - // For now, always emit the memory section, since loads and stores are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no loads or stores. - SectionBookkeeping Section; - uint32_t NumPages = (DataSize + wasm::WasmPageSize - 1) / wasm::WasmPageSize; - - startSection(Section, wasm::WASM_SEC_MEMORY); - encodeULEB128(1, getStream()); // number of memory spaces - - encodeULEB128(0, getStream()); // flags - encodeULEB128(NumPages, getStream()); // initial - - endSection(Section); -} - void WasmObjectWriter::writeGlobalSection() { if (Globals.empty()) return; @@ -794,7 +793,7 @@ void WasmObjectWriter::writeElemSection(ArrayRef TableElems) { // init expr for starting offset write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); + encodeSLEB128(kInitialTableOffset, getStream()); write8(wasm::WASM_OPCODE_END); encodeULEB128(TableElems.size(), getStream()); @@ -858,11 +857,9 @@ void WasmObjectWriter::writeDataSection(ArrayRef Segments) { endSection(Section); } -void WasmObjectWriter::writeNameSection( - ArrayRef Functions, - ArrayRef Imports, - unsigned NumFuncImports) { - uint32_t TotalFunctions = NumFuncImports + Functions.size(); +void WasmObjectWriter::writeNameSection(ArrayRef Functions, + ArrayRef Imports) { + uint32_t TotalFunctions = NumFunctionImports + Functions.size(); if (TotalFunctions == 0) return; @@ -928,18 +925,13 @@ void WasmObjectWriter::writeDataRelocSection() { void WasmObjectWriter::writeLinkingMetaDataSection( ArrayRef Segments, uint32_t DataSize, - SmallVector, 4> SymbolFlags, - bool HasStackPointer, uint32_t StackPointerGlobal) { + ArrayRef> SymbolFlags, + ArrayRef> InitFuncs, + const std::map>& Comdats) { SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); SectionBookkeeping SubSection; - if (HasStackPointer) { - startSection(SubSection, wasm::WASM_STACK_POINTER); - encodeULEB128(StackPointerGlobal, getStream()); // id - endSection(SubSection); - } - if (SymbolFlags.size() != 0) { startSection(SubSection, wasm::WASM_SYMBOL_INFO); encodeULEB128(SymbolFlags.size(), getStream()); @@ -967,6 +959,31 @@ void WasmObjectWriter::writeLinkingMetaDataSection( endSection(SubSection); } + if (!InitFuncs.empty()) { + startSection(SubSection, wasm::WASM_INIT_FUNCS); + encodeULEB128(InitFuncs.size(), getStream()); + for (auto &StartFunc : InitFuncs) { + encodeULEB128(StartFunc.first, getStream()); // priority + encodeULEB128(StartFunc.second, getStream()); // function index + } + endSection(SubSection); + } + + if (Comdats.size()) { + startSection(SubSection, wasm::WASM_COMDAT_INFO); + encodeULEB128(Comdats.size(), getStream()); + for (const auto &C : Comdats) { + writeString(C.first); + encodeULEB128(0, getStream()); // flags for future use + encodeULEB128(C.second.size(), getStream()); + for (const WasmComdatEntry &Entry : C.second) { + encodeULEB128(Entry.Kind, getStream()); + encodeULEB128(Entry.Index, getStream()); + } + } + endSection(SubSection); + } + endSection(Section); } @@ -1007,66 +1024,10 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, SmallVector Imports; SmallVector Exports; SmallVector, 4> SymbolFlags; - SmallPtrSet IsAddressTaken; - unsigned NumFuncImports = 0; + SmallVector, 2> InitFuncs; + std::map> Comdats; SmallVector DataSegments; - uint32_t StackPointerGlobal = 0; uint32_t DataSize = 0; - bool HasStackPointer = false; - - // Populate the IsAddressTaken set. - for (const WasmRelocationEntry &RelEntry : CodeRelocations) { - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: - case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: - IsAddressTaken.insert(RelEntry.Symbol); - break; - default: - break; - } - } - for (const WasmRelocationEntry &RelEntry : DataRelocations) { - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: - case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: - IsAddressTaken.insert(RelEntry.Symbol); - break; - default: - break; - } - } - - // Populate FunctionTypeIndices and Imports. - for (const MCSymbol &S : Asm.symbols()) { - const auto &WS = static_cast(S); - - if (WS.isTemporary()) - continue; - - if (WS.isFunction()) - registerFunctionType(WS); - - // If the symbol is not defined in this translation unit, import it. - if (!WS.isDefined(/*SetUsed=*/false)) { - WasmImport Import; - Import.ModuleName = WS.getModuleName(); - Import.FieldName = WS.getName(); - - if (WS.isFunction()) { - Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; - Import.Type = getFunctionType(WS); - SymbolIndices[&WS] = NumFuncImports; - ++NumFuncImports; - } else { - Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; - Import.Type = int32_t(PtrType); - SymbolIndices[&WS] = NumGlobalImports; - ++NumGlobalImports; - } - - Imports.push_back(Import); - } - } // In the special .global_variables section, we've encoded global // variables used by the function. Translate them into the Globals @@ -1126,24 +1087,68 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } } - // In the special .stack_pointer section, we've encoded the stack pointer - // index. - MCSectionWasm *StackPtr = - Ctx.getWasmSection(".stack_pointer", SectionKind::getMetadata()); - if (!StackPtr->getFragmentList().empty()) { - if (StackPtr->getFragmentList().size() != 1) - report_fatal_error("only one .stack_pointer fragment supported"); - const MCFragment &Frag = *StackPtr->begin(); - if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) - report_fatal_error("only data supported in .stack_pointer"); - const auto &DataFrag = cast(Frag); - if (!DataFrag.getFixups().empty()) - report_fatal_error("fixups not supported in .stack_pointer"); - const SmallVectorImpl &Contents = DataFrag.getContents(); - if (Contents.size() != 4) - report_fatal_error("only one entry supported in .stack_pointer"); - HasStackPointer = true; - StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data(); + // For now, always emit the memory import, since loads and stores are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no loads or stores. + MCSymbolWasm *MemorySym = + cast(Ctx.getOrCreateSymbol("__linear_memory")); + WasmImport MemImport; + MemImport.ModuleName = MemorySym->getModuleName(); + MemImport.FieldName = MemorySym->getName(); + MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY; + Imports.push_back(MemImport); + + // For now, always emit the table section, since indirect calls are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no indirect calls. + MCSymbolWasm *TableSym = + cast(Ctx.getOrCreateSymbol("__indirect_function_table")); + WasmImport TableImport; + TableImport.ModuleName = TableSym->getModuleName(); + TableImport.FieldName = TableSym->getName(); + TableImport.Kind = wasm::WASM_EXTERNAL_TABLE; + TableImport.Type = wasm::WASM_TYPE_ANYFUNC; + Imports.push_back(TableImport); + + // Populate FunctionTypeIndices and Imports. + for (const MCSymbol &S : Asm.symbols()) { + const auto &WS = static_cast(S); + + // Register types for all functions, including those with private linkage + // (because wasm always needs a type signature). + if (WS.isFunction()) + registerFunctionType(WS); + + if (WS.isTemporary()) + continue; + + // If the symbol is not defined in this translation unit, import it. + if ((!WS.isDefined() && !WS.isComdat()) || + WS.isVariable()) { + WasmImport Import; + Import.ModuleName = WS.getModuleName(); + Import.FieldName = WS.getName(); + + if (WS.isFunction()) { + Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; + Import.Type = getFunctionType(WS); + SymbolIndices[&WS] = NumFunctionImports; + ++NumFunctionImports; + } else { + Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; + Import.Type = int32_t(PtrType); + Import.IsMutable = false; + SymbolIndices[&WS] = NumGlobalImports; + + // If this global is the stack pointer, make it mutable. + if (WS.getName() == "__stack_pointer") + Import.IsMutable = true; + + ++NumGlobalImports; + } + + Imports.push_back(Import); + } } for (MCSection &Sec : Asm) { @@ -1151,6 +1156,10 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (!Section.isWasmData()) continue; + // .init_array sections are handled specially elsewhere. + if (cast(Sec).getSectionName().startswith(".init_array")) + continue; + DataSize = alignTo(DataSize, Section.getAlignment()); DataSegments.emplace_back(); WasmDataSegment &Segment = DataSegments.back(); @@ -1162,6 +1171,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Segment.Flags = 0; DataSize += Segment.Data.size(); Section.setMemoryOffset(Segment.Offset); + + if (const MCSymbolWasm *C = Section.getGroup()) { + Comdats[C->getName()].emplace_back( + WasmComdatEntry{wasm::WASM_COMDAT_DATA, + static_cast(DataSegments.size()) - 1}); + } } // Handle regular defined and undefined symbols. @@ -1177,10 +1192,14 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, << S.isExternal() << " isTemporary=" << S.isTemporary() << " isFunction=" << WS.isFunction() << " isWeak=" << WS.isWeak() + << " isHidden=" << WS.isHidden() << " isVariable=" << WS.isVariable() << "\n"); - if (WS.isWeak()) - SymbolFlags.emplace_back(WS.getName(), wasm::WASM_SYMBOL_BINDING_WEAK); + if (WS.isWeak() || WS.isHidden()) { + uint32_t Flags = (WS.isWeak() ? wasm::WASM_SYMBOL_BINDING_WEAK : 0) | + (WS.isHidden() ? wasm::WASM_SYMBOL_VISIBILITY_HIDDEN : 0); + SymbolFlags.emplace_back(WS.getName(), Flags); + } if (WS.isVariable()) continue; @@ -1188,7 +1207,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, unsigned Index; if (WS.isFunction()) { - if (WS.isDefined(/*SetUsed=*/false)) { + if (WS.isDefined()) { if (WS.getOffset() != 0) report_fatal_error( "function sections must contain one function each"); @@ -1198,7 +1217,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, "function symbols must have a size set with .size"); // A definition. Take the next available index. - Index = NumFuncImports + Functions.size(); + Index = NumFunctionImports + Functions.size(); // Prepare the function. WasmFunction Func; @@ -1212,18 +1231,11 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } DEBUG(dbgs() << " -> function index: " << Index << "\n"); - - // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS) != 0) { - IndirectSymbolIndices[&WS] = TableElems.size(); - DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n"); - TableElems.push_back(Index); - } - } else { + } else { if (WS.isTemporary() && !WS.getSize()) continue; - if (!WS.isDefined(/*SetUsed=*/false)) + if (!WS.isDefined()) continue; if (!WS.getSize()) @@ -1238,6 +1250,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // address. For externals these will also be named exports. Index = NumGlobalImports + Globals.size(); auto &DataSection = static_cast(WS.getSection()); + assert(DataSection.isWasmData()); WasmGlobal Global; Global.Type = PtrType; @@ -1251,7 +1264,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // If the symbol is visible outside this translation unit, export it. - if (WS.isDefined(/*SetUsed=*/false)) { + if (WS.isDefined()) { WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; @@ -1261,8 +1274,16 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); + if (!WS.isExternal()) SymbolFlags.emplace_back(WS.getName(), wasm::WASM_SYMBOL_BINDING_LOCAL); + + if (WS.isFunction()) { + auto &Section = static_cast(WS.getSection()); + if (const MCSymbolWasm *C = Section.getGroup()) + Comdats[C->getName()].emplace_back( + WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index}); + } } } @@ -1273,7 +1294,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (!S.isVariable()) continue; - assert(S.isDefined(/*SetUsed=*/false)); + assert(S.isDefined()); // Find the target symbol of this weak alias and export that index const auto &WS = static_cast(S); @@ -1283,7 +1304,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, uint32_t Index = SymbolIndices.find(ResolvedSym)->second; DEBUG(dbgs() << " -> index:" << Index << "\n"); - SymbolIndices[&WS] = Index; WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; @@ -1298,33 +1318,104 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, SymbolFlags.emplace_back(WS.getName(), wasm::WASM_SYMBOL_BINDING_LOCAL); } - // Add types for indirect function calls. - for (const WasmRelocationEntry &Fixup : CodeRelocations) { - if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) - continue; + { + auto HandleReloc = [&](const WasmRelocationEntry &Rel) { + // Functions referenced by a relocation need to prepared to be called + // indirectly. + const MCSymbolWasm& WS = *Rel.Symbol; + if (WS.isFunction() && IndirectSymbolIndices.count(&WS) == 0) { + switch (Rel.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: + case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: { + uint32_t Index = SymbolIndices.find(&WS)->second; + IndirectSymbolIndices[&WS] = TableElems.size() + kInitialTableOffset; + DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n"); + TableElems.push_back(Index); + registerFunctionType(WS); + break; + } + default: + break; + } + } + }; - registerFunctionType(*Fixup.Symbol); + for (const WasmRelocationEntry &RelEntry : CodeRelocations) + HandleReloc(RelEntry); + for (const WasmRelocationEntry &RelEntry : DataRelocations) + HandleReloc(RelEntry); + } + + // Translate .init_array section contents into start functions. + for (const MCSection &S : Asm) { + const auto &WS = static_cast(S); + if (WS.getSectionName().startswith(".fini_array")) + report_fatal_error(".fini_array sections are unsupported"); + if (!WS.getSectionName().startswith(".init_array")) + continue; + if (WS.getFragmentList().empty()) + continue; + if (WS.getFragmentList().size() != 2) + report_fatal_error("only one .init_array section fragment supported"); + const MCFragment &AlignFrag = *WS.begin(); + if (AlignFrag.getKind() != MCFragment::FT_Align) + report_fatal_error(".init_array section should be aligned"); + if (cast(AlignFrag).getAlignment() != (is64Bit() ? 8 : 4)) + report_fatal_error(".init_array section should be aligned for pointers"); + const MCFragment &Frag = *std::next(WS.begin()); + if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) + report_fatal_error("only data supported in .init_array section"); + uint16_t Priority = UINT16_MAX; + if (WS.getSectionName().size() != 11) { + if (WS.getSectionName()[11] != '.') + report_fatal_error(".init_array section priority should start with '.'"); + if (WS.getSectionName().substr(12).getAsInteger(10, Priority)) + report_fatal_error("invalid .init_array section priority"); + } + const auto &DataFrag = cast(Frag); + const SmallVectorImpl &Contents = DataFrag.getContents(); + for (const uint8_t *p = (const uint8_t *)Contents.data(), + *end = (const uint8_t *)Contents.data() + Contents.size(); + p != end; ++p) { + if (*p != 0) + report_fatal_error("non-symbolic data in .init_array section"); + } + for (const MCFixup &Fixup : DataFrag.getFixups()) { + assert(Fixup.getKind() == MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); + const MCExpr *Expr = Fixup.getValue(); + auto *Sym = dyn_cast(Expr); + if (!Sym) + report_fatal_error("fixups in .init_array should be symbol references"); + if (Sym->getKind() != MCSymbolRefExpr::VK_WebAssembly_FUNCTION) + report_fatal_error("symbols in .init_array should be for functions"); + auto I = SymbolIndices.find(cast(&Sym->getSymbol())); + if (I == SymbolIndices.end()) + report_fatal_error("symbols in .init_array should be defined"); + uint32_t Index = I->second; + InitFuncs.push_back(std::make_pair(Priority, Index)); + } } // Write out the Wasm header. writeHeader(Asm); writeTypeSection(FunctionTypes); - writeImportSection(Imports); + writeImportSection(Imports, DataSize, TableElems.size()); writeFunctionSection(Functions); - writeTableSection(TableElems.size()); - writeMemorySection(DataSize); + // Skip the "table" section; we import the table instead. + // Skip the "memory" section; we import the memory instead. writeGlobalSection(); writeExportSection(Exports); - // TODO: Start Section writeElemSection(TableElems); writeCodeSection(Asm, Layout, Functions); writeDataSection(DataSegments); - writeNameSection(Functions, Imports, NumFuncImports); + writeNameSection(Functions, Imports); writeCodeRelocSection(); writeDataRelocSection(); writeLinkingMetaDataSection(DataSegments, DataSize, SymbolFlags, - HasStackPointer, StackPointerGlobal); + InitFuncs, Comdats); // TODO: Translate the .comment section to the output. // TODO: Translate debug sections to the output. @@ -1333,8 +1424,5 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, std::unique_ptr llvm::createWasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - // FIXME: Can't use make_unique(...) as WasmObjectWriter's - // destructor is private. Is that necessary? - return std::unique_ptr( - new WasmObjectWriter(std::move(MOTW), OS)); + return llvm::make_unique(std::move(MOTW), OS); } diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 8448b617b78b..b3b812daae2e 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -35,6 +35,15 @@ using namespace llvm; +// The SYM64 format is used when an archive's member offsets are larger than +// 32-bits can hold. The need for this shift in format is detected by +// writeArchive. To test this we need to generate a file with a member that has +// an offset larger than 32-bits but this demands a very slow test. To speed +// the test up we use this flag to pretend like the cutoff happens before +// 32-bits and instead happens at some much smaller value. +static cl::opt Sym64Threshold("sym64-threshold", cl::Hidden, + cl::init(32)); + NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} @@ -484,7 +493,7 @@ Error llvm::writeArchive(StringRef ArcName, // If LastOffset isn't going to fit in a 32-bit varible we need to switch // to 64-bit. Note that the file can be larger than 4GB as long as the last // member starts before the 4GB offset. - if (LastOffset >> 32 != 0) + if (LastOffset >= (1ULL << Sym64Threshold)) Kind = object::Archive::K_GNU64; } diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index acac1e6d56a2..c249a6d97b4a 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -20,8 +20,6 @@ #include "llvm/Support/Path.h" #include -#include -#include #include #include @@ -93,7 +91,15 @@ static void writeStringTable(std::vector &B, } static ImportNameType getNameType(StringRef Sym, StringRef ExtName, - MachineTypes Machine) { + MachineTypes Machine, bool MinGW) { + // A decorated stdcall function in MSVC is exported with the + // type IMPORT_NAME, and the exported function name includes the + // the leading underscore. In MinGW on the other hand, a decorated + // stdcall function still omits the underscore (IMPORT_NAME_NOPREFIX). + // See the comment in isDecorated in COFFModuleDefinition.cpp for more + // details. + if (ExtName.startswith("_") && ExtName.contains('@') && !MinGW) + return IMPORT_NAME; if (Sym != ExtName) return IMPORT_NAME_UNDECORATE; if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) @@ -190,7 +196,7 @@ ObjectFactory::createImportDescriptor(std::vector &Buffer) { (ImportName.size() + 1)), u32(NumberOfSymbols), u16(0), - u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); @@ -326,7 +332,7 @@ ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { sizeof(coff_import_directory_table_entry)), u32(NumberOfSymbols), u16(0), - u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); @@ -389,7 +395,7 @@ NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { VASize), u32(NumberOfSymbols), u16(0), - u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); @@ -560,7 +566,8 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, Error writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, - MachineTypes Machine, bool MakeWeakAliases) { + MachineTypes Machine, bool MakeWeakAliases, + bool MinGW) { std::vector Members; ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); @@ -591,7 +598,7 @@ Error writeImportLibrary(StringRef ImportName, StringRef Path, ImportType = IMPORT_CONST; StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; - ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine, MinGW); Expected Name = E.ExtName.empty() ? SymbolName : replace(SymbolName, E.Name, E.ExtName); diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp index e2208016eb57..a571354648d6 100644 --- a/lib/Object/COFFModuleDefinition.cpp +++ b/lib/Object/COFFModuleDefinition.cpp @@ -117,7 +117,7 @@ class Lexer { return Token(Identifier, S); } default: { - size_t End = Buf.find_first_of("=,\r\n \t\v"); + size_t End = Buf.find_first_of("=,;\r\n \t\v"); StringRef Word = Buf.substr(0, End); Kind K = llvm::StringSwitch(Word) .Case("BASE", KwBase) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 06ac6df79ad6..b544fa5c1470 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -895,7 +895,7 @@ StringRef COFFObjectFile::getFileFormatName() const { } } -unsigned COFFObjectFile::getArch() const { +Triple::ArchType COFFObjectFile::getArch() const { switch (getMachine()) { case COFF::IMAGE_FILE_MACHINE_I386: return Triple::x86; diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index c72a1258c1ee..5906dc5f5307 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -138,6 +138,7 @@ StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, default: break; } + break; case ELF::EM_BPF: switch (Type) { #include "llvm/BinaryFormat/ELFRelocs/BPF.def" diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index ed6d6b1cb4e3..1ecb26d60bce 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/IRObjectFile.h" -#include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -20,17 +19,8 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 4620fdde81d2..3140316b50e8 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1659,6 +1659,10 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Expected MachOObjectFile::getSymbolName(DataRefImpl Symb) const { StringRef StringTable = getStringTableData(); MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb); + if (Entry.n_strx == 0) + // A n_strx value of 0 indicates that no name is associated with a + // particular symbol table entry. + return StringRef(); const char *Start = &StringTable.data()[Entry.n_strx]; if (Start < getData().begin() || Start >= getData().end()) { return malformedError("bad string index: " + Twine(Entry.n_strx) + @@ -1960,6 +1964,7 @@ MachOObjectFile::section_rel_end(DataRefImpl Sec) const { relocation_iterator MachOObjectFile::extrel_begin() const { DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 0 for external relocations Ret.d.a = 0; // Would normally be a section index. Ret.d.b = 0; // Index into the external relocations return relocation_iterator(RelocationRef(Ret, this)); @@ -1968,11 +1973,29 @@ relocation_iterator MachOObjectFile::extrel_begin() const { relocation_iterator MachOObjectFile::extrel_end() const { MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 0 for external relocations Ret.d.a = 0; // Would normally be a section index. Ret.d.b = DysymtabLoadCmd.nextrel; // Index into the external relocations return relocation_iterator(RelocationRef(Ret, this)); } +relocation_iterator MachOObjectFile::locrel_begin() const { + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 1 for local relocations + Ret.d.a = 1; // Would normally be a section index. + Ret.d.b = 0; // Index into the local relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + +relocation_iterator MachOObjectFile::locrel_end() const { + MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); + DataRefImpl Ret; + // for DYSYMTAB symbols, Ret.d.a == 1 for local relocations + Ret.d.a = 1; // Would normally be a section index. + Ret.d.b = DysymtabLoadCmd.nlocrel; // Index into the local relocations + return relocation_iterator(RelocationRef(Ret, this)); +} + void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } @@ -2573,7 +2596,7 @@ bool MachOObjectFile::isValidArch(StringRef ArchFlag) { .Default(false); } -unsigned MachOObjectFile::getArch() const { +Triple::ArchType MachOObjectFile::getArch() const { return getArch(getCPUType(*this)); } @@ -4301,7 +4324,10 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const { } } else { MachO::dysymtab_command DysymtabLoadCmd = getDysymtabLoadCommand(); - Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations + if (Rel.d.a == 0) + Offset = DysymtabLoadCmd.extreloff; // Offset to the external relocations + else + Offset = DysymtabLoadCmd.locreloff; // Offset to the local relocations } auto P = reinterpret_cast( diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 86ce9c2209c2..132471ab7f5b 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/SubtargetFeature.h" @@ -267,6 +269,12 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) { } Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { + llvm::DenseSet Seen; + if (Functions.size() != FunctionTypes.size()) { + return make_error("Names must come after code section", + object_error::parse_failed); + } + while (Ptr < End) { uint8_t Type = readVarint7(Ptr); uint32_t Size = readVaruint32(Ptr); @@ -276,11 +284,19 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { uint32_t Count = readVaruint32(Ptr); while (Count--) { uint32_t Index = readVaruint32(Ptr); + if (!Seen.insert(Index).second) + return make_error("Function named more than once", + object_error::parse_failed); StringRef Name = readString(Ptr); - if (!Name.empty()) - Symbols.emplace_back(Name, - WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME, - Sections.size(), Index); + if (!isValidFunctionIndex(Index) || Name.empty()) + return make_error("Invalid name entry", + object_error::parse_failed); + DebugNames.push_back(wasm::WasmFunctionName{Index, Name}); + if (Index >= NumImportedFunctions) { + // Override any existing name; the name specified by the "names" + // section is the Function's canonical name. + Functions[Index - NumImportedFunctions].Name = Name; + } } break; } @@ -303,7 +319,6 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { void WasmObjectFile::populateSymbolTable() { // Add imports to symbol table - size_t ImportIndex = 0; size_t GlobalIndex = 0; size_t FunctionIndex = 0; for (const wasm::WasmImport& Import : Imports) { @@ -312,7 +327,7 @@ void WasmObjectFile::populateSymbolTable() { assert(Import.Global.Type == wasm::WASM_TYPE_I32); SymbolMap.try_emplace(Import.Field, Symbols.size()); Symbols.emplace_back(Import.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, - ImportSection, GlobalIndex++, ImportIndex); + ImportSection, GlobalIndex++); DEBUG(dbgs() << "Adding import: " << Symbols.back() << " sym index:" << Symbols.size() << "\n"); break; @@ -320,14 +335,13 @@ void WasmObjectFile::populateSymbolTable() { SymbolMap.try_emplace(Import.Field, Symbols.size()); Symbols.emplace_back(Import.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, - ImportSection, FunctionIndex++, ImportIndex); + ImportSection, FunctionIndex++, Import.SigIndex); DEBUG(dbgs() << "Adding import: " << Symbols.back() << " sym index:" << Symbols.size() << "\n"); break; default: break; } - ImportIndex++; } // Add exports to symbol table @@ -338,11 +352,30 @@ void WasmObjectFile::populateSymbolTable() { Export.Kind == wasm::WASM_EXTERNAL_FUNCTION ? WasmSymbol::SymbolType::FUNCTION_EXPORT : WasmSymbol::SymbolType::GLOBAL_EXPORT; - SymbolMap.try_emplace(Export.Name, Symbols.size()); - Symbols.emplace_back(Export.Name, ExportType, - ExportSection, Export.Index); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); + auto Pair = SymbolMap.try_emplace(Export.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Export.Name, ExportType, + ExportSection, Export.Index); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + const WasmSymbol &OldSym = Symbols[SymIndex]; + WasmSymbol NewSym(Export.Name, ExportType, ExportSection, Export.Index); + NewSym.setAltIndex(OldSym.ElementIndex); + Symbols[SymIndex] = NewSym; + + DEBUG(dbgs() << "Replacing existing symbol: " << NewSym + << " sym index:" << SymIndex << "\n"); + } + } + if (Export.Kind == wasm::WASM_EXTERNAL_FUNCTION) { + auto &Function = Functions[Export.Index - NumImportedFunctions]; + if (Function.Name.empty()) { + // Use the export's name to set a name for the Function, but only if one + // hasn't already been set. + Function.Name = Export.Name; + } } } } @@ -350,6 +383,10 @@ void WasmObjectFile::populateSymbolTable() { Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, const uint8_t *End) { HasLinkingSection = true; + if (Functions.size() != FunctionTypes.size()) { + return make_error( + "Linking data must come after code section", object_error::parse_failed); + } // Only populate the symbol table with imports and exports if the object // has a linking section (i.e. its a relocatable object file). Otherwise @@ -365,7 +402,6 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, uint32_t Count = readVaruint32(Ptr); while (Count--) { StringRef Symbol = readString(Ptr); - DEBUG(dbgs() << "reading syminfo: " << Symbol << "\n"); uint32_t Flags = readVaruint32(Ptr); auto iter = SymbolMap.find(Symbol); if (iter == SymbolMap.end()) { @@ -378,7 +414,7 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, Symbols[SymIndex].Flags = Flags; DEBUG(dbgs() << "Set symbol flags index:" << SymIndex << " name:" - << Symbols[SymIndex].Name << " exptected:" + << Symbols[SymIndex].Name << " expected:" << Symbol << " flags: " << Flags << "\n"); } break; @@ -398,7 +434,25 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, } break; } - case wasm::WASM_STACK_POINTER: + case wasm::WASM_INIT_FUNCS: { + uint32_t Count = readVaruint32(Ptr); + LinkingData.InitFunctions.reserve(Count); + for (uint32_t i = 0; i < Count; i++) { + wasm::WasmInitFunc Init; + Init.Priority = readVaruint32(Ptr); + Init.FunctionIndex = readVaruint32(Ptr); + if (!isValidFunctionIndex(Init.FunctionIndex)) + return make_error("Invalid function index: " + + Twine(Init.FunctionIndex), + object_error::parse_failed); + LinkingData.InitFunctions.emplace_back(Init); + } + break; + } + case wasm::WASM_COMDAT_INFO: + if (Error Err = parseLinkingSectionComdat(Ptr, SubSectionEnd)) + return Err; + break; default: Ptr += Size; break; @@ -413,6 +467,55 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, return Error::success(); } +Error WasmObjectFile::parseLinkingSectionComdat(const uint8_t *&Ptr, + const uint8_t *End) +{ + uint32_t ComdatCount = readVaruint32(Ptr); + StringSet<> ComdatSet; + while (ComdatCount--) { + StringRef Name = readString(Ptr); + if (Name.empty() || !ComdatSet.insert(Name).second) + return make_error("Bad/duplicate COMDAT name " + Twine(Name), + object_error::parse_failed); + Comdats.emplace_back(Name); + uint32_t Flags = readVaruint32(Ptr); + if (Flags != 0) + return make_error("Unsupported COMDAT flags", + object_error::parse_failed); + + uint32_t EntryCount = readVaruint32(Ptr); + while (EntryCount--) { + unsigned Kind = readVaruint32(Ptr); + unsigned Index = readVaruint32(Ptr); + switch (Kind) { + default: + return make_error("Invalid COMDAT entry type", + object_error::parse_failed); + case wasm::WASM_COMDAT_DATA: + if (Index >= DataSegments.size()) + return make_error("COMDAT data index out of range", + object_error::parse_failed); + if (!DataSegments[Index].Data.Comdat.empty()) + return make_error("Data segment in two COMDATs", + object_error::parse_failed); + DataSegments[Index].Data.Comdat = Name; + break; + case wasm::WASM_COMDAT_FUNCTION: + if (Index < NumImportedFunctions || !isValidFunctionIndex(Index)) + return make_error("COMDAT function index out of range", + object_error::parse_failed); + Index -= NumImportedFunctions; + if (!Functions[Index].Comdat.empty()) + return make_error("Function in two COMDATs", + object_error::parse_failed); + Functions[Index].Comdat = Name; + break; + } + } + } + return Error::success(); +} + WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) { for (WasmSection& Section : Sections) { if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name) @@ -609,6 +712,7 @@ Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) Globals.reserve(Count); while (Count--) { wasm::WasmGlobal Global; + Global.Index = NumImportedGlobals + Globals.size(); Global.Type = readVarint7(Ptr); Global.Mutable = readVaruint1(Ptr); if (Error Err = readInitExpr(Global.InitExpr, Ptr)) @@ -657,27 +761,35 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) return Error::success(); } +bool WasmObjectFile::isValidFunctionIndex(uint32_t Index) const { + return Index < FunctionTypes.size() + NumImportedFunctions; +} + Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { StartFunction = readVaruint32(Ptr); - if (StartFunction >= FunctionTypes.size()) + if (!isValidFunctionIndex(StartFunction)) return make_error("Invalid start function", object_error::parse_failed); return Error::success(); } Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *CodeSectionStart = Ptr; uint32_t FunctionCount = readVaruint32(Ptr); if (FunctionCount != FunctionTypes.size()) { return make_error("Invalid function count", object_error::parse_failed); } - CodeSection = ArrayRef(Ptr, End - Ptr); - while (FunctionCount--) { wasm::WasmFunction Function; - uint32_t FunctionSize = readVaruint32(Ptr); - const uint8_t *FunctionEnd = Ptr + FunctionSize; + const uint8_t *FunctionStart = Ptr; + uint32_t Size = readVaruint32(Ptr); + const uint8_t *FunctionEnd = Ptr + Size; + + Function.Index = NumImportedFunctions + Functions.size(); + Function.CodeSectionOffset = FunctionStart - CodeSectionStart; + Function.Size = FunctionEnd - FunctionStart; uint32_t NumLocalDecls = readVaruint32(Ptr); Function.Locals.reserve(NumLocalDecls); @@ -766,6 +878,8 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { Result |= SymbolRef::SF_Weak; if (!Sym.isLocal()) Result |= SymbolRef::SF_Global; + if (Sym.isHidden()) + Result |= SymbolRef::SF_Hidden; switch (Sym.Type) { case WasmSymbol::SymbolType::FUNCTION_IMPORT: @@ -774,10 +888,6 @@ uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { case WasmSymbol::SymbolType::FUNCTION_EXPORT: Result |= SymbolRef::SF_Executable; break; - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: - Result |= SymbolRef::SF_Executable; - Result |= SymbolRef::SF_FormatSpecific; - break; case WasmSymbol::SymbolType::GLOBAL_IMPORT: Result |= SymbolRef::SF_Undefined; break; @@ -821,12 +931,11 @@ uint64_t WasmObjectFile::getWasmSymbolValue(const WasmSymbol& Sym) const { case WasmSymbol::SymbolType::FUNCTION_IMPORT: case WasmSymbol::SymbolType::GLOBAL_IMPORT: case WasmSymbol::SymbolType::FUNCTION_EXPORT: - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: return Sym.ElementIndex; case WasmSymbol::SymbolType::GLOBAL_EXPORT: { uint32_t GlobalIndex = Sym.ElementIndex - NumImportedGlobals; assert(GlobalIndex < Globals.size()); - const wasm::WasmGlobal& Global = Globals[GlobalIndex]; + const wasm::WasmGlobal &Global = Globals[GlobalIndex]; // WasmSymbols correspond only to I32_CONST globals assert(Global.InitExpr.Opcode == wasm::WASM_OPCODE_I32_CONST); return Global.InitExpr.Value.Int32; @@ -856,7 +965,6 @@ WasmObjectFile::getSymbolType(DataRefImpl Symb) const { switch (Sym.Type) { case WasmSymbol::SymbolType::FUNCTION_IMPORT: case WasmSymbol::SymbolType::FUNCTION_EXPORT: - case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: return SymbolRef::ST_Function; case WasmSymbol::SymbolType::GLOBAL_IMPORT: case WasmSymbol::SymbolType::GLOBAL_EXPORT: @@ -994,7 +1102,7 @@ void WasmObjectFile::getRelocationTypeName( break; switch (Rel.Type) { -#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs.def" } #undef WASM_RELOC @@ -1018,7 +1126,7 @@ uint8_t WasmObjectFile::getBytesInAddress() const { return 4; } StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; } -unsigned WasmObjectFile::getArch() const { return Triple::wasm32; } +Triple::ArchType WasmObjectFile::getArch() const { return Triple::wasm32; } SubtargetFeatures WasmObjectFile::getFeatures() const { return SubtargetFeatures(); diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index b844955caa8f..1b7282f13db0 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -14,10 +14,10 @@ #include "llvm/Object/WindowsResource.h" #include "llvm/Object/COFF.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include #include -#include #include using namespace llvm; @@ -334,7 +334,7 @@ class WindowsResourceCOFFWriter { void writeDirectoryTree(); void writeDirectoryStringTable(); void writeFirstSectionRelocations(); - std::unique_ptr OutputBuffer; + std::unique_ptr OutputBuffer; char *BufferStart; uint64_t CurrentOffset = 0; COFF::MachineTypes MachineType; @@ -360,7 +360,7 @@ WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( Data(Parser.getData()), StringTable(Parser.getStringTable()) { performFileLayout(); - OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); + OutputBuffer = WritableMemoryBuffer::getNewMemBuffer(FileSize); } void WindowsResourceCOFFWriter::performFileLayout() { @@ -425,7 +425,7 @@ static std::time_t getTime() { } std::unique_ptr WindowsResourceCOFFWriter::write() { - BufferStart = const_cast(OutputBuffer->getBufferStart()); + BufferStart = OutputBuffer->getBufferStart(); writeCOFFHeader(); writeFirstSectionHeader(); @@ -561,10 +561,9 @@ void WindowsResourceCOFFWriter::writeSymbolTable() { // Now write a symbol for each relocation. for (unsigned i = 0; i < Data.size(); i++) { - char RelocationName[9]; - sprintf(RelocationName, "$R%06X", DataOffsets[i]); + auto RelocationName = formatv("$R{0:X-6}", i & 0xffffff).sstr(); Symbol = reinterpret_cast(BufferStart + CurrentOffset); - strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); + memcpy(Symbol->Name.ShortName, RelocationName.data(), (size_t) COFF::NameSize); Symbol->Value = DataOffsets[i]; Symbol->SectionNumber = 2; Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; diff --git a/lib/ObjectYAML/CMakeLists.txt b/lib/ObjectYAML/CMakeLists.txt index 7af0b9c194e6..d24f879836f9 100644 --- a/lib/ObjectYAML/CMakeLists.txt +++ b/lib/ObjectYAML/CMakeLists.txt @@ -1,7 +1,8 @@ add_llvm_library(LLVMObjectYAML - CodeViewYAMLTypes.cpp - CodeViewYAMLSymbols.cpp CodeViewYAMLDebugSections.cpp + CodeViewYAMLSymbols.cpp + CodeViewYAMLTypeHashing.cpp + CodeViewYAMLTypes.cpp COFFYAML.cpp DWARFEmitter.cpp DWARFVisitor.cpp diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp index 056a1aa3ca14..937b8dc029fa 100644 --- a/lib/ObjectYAML/COFFYAML.cpp +++ b/lib/ObjectYAML/COFFYAML.cpp @@ -562,14 +562,16 @@ void MappingTraits::mapping(IO &IO, COFFYAML::Section &Sec) { IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U); IO.mapOptional("Alignment", Sec.Alignment, 0U); - // If this is a .debug$S or .debug$T section parse the semantic representation - // of the symbols/types. If it is any other kind of section, just deal in raw - // bytes. + // If this is a .debug$S .debug$T, or .debug$H section parse the semantic + // representation of the symbols/types. If it is any other kind of section, + // just deal in raw bytes. IO.mapOptional("SectionData", Sec.SectionData); if (Sec.Name == ".debug$S") IO.mapOptional("Subsections", Sec.DebugS); else if (Sec.Name == ".debug$T") IO.mapOptional("Types", Sec.DebugT); + else if (Sec.Name == ".debug$H") + IO.mapOptional("GlobalHashes", Sec.DebugH); IO.mapOptional("Relocations", Sec.Relocations); } diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index 60b0ea28030a..6debd8ab0c6e 100644 --- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -66,7 +66,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport) LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport) LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData) -LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, QuotingType::None) LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind) LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind) LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags) diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp index dbe4e2a6d6fd..f67a0db690eb 100644 --- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -40,10 +40,11 @@ using namespace llvm::CodeViewYAML::detail; using namespace llvm::yaml; LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) +LLVM_YAML_IS_SEQUENCE_VECTOR(LocalVariableAddrGap) // We only need to declare these, the definitions are in CodeViewYAMLTypes.cpp -LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) -LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, QuotingType::None) +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, QuotingType::None) LLVM_YAML_DECLARE_ENUM_TRAITS(SymbolKind) LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind) @@ -62,7 +63,7 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal) LLVM_YAML_STRONG_TYPEDEF(StringRef, TypeName) -LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true) +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, QuotingType::Single) StringRef ScalarTraits::input(StringRef S, void *V, TypeName &T) { return ScalarTraits::input(S, V, T.value); @@ -180,6 +181,24 @@ void ScalarEnumerationTraits::enumeration( } } +namespace llvm { +namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &io, LocalVariableAddrRange &Range) { + io.mapRequired("OffsetStart", Range.OffsetStart); + io.mapRequired("ISectStart", Range.ISectStart); + io.mapRequired("Range", Range.Range); + } +}; +template <> struct MappingTraits { + static void mapping(IO &io, LocalVariableAddrGap &Gap) { + io.mapRequired("GapStartOffset", Gap.GapStartOffset); + io.mapRequired("Range", Gap.Range); + } +}; +} // namespace yaml +} // namespace llvm + namespace llvm { namespace CodeViewYAML { namespace detail { @@ -353,32 +372,50 @@ template <> void SymbolRecordImpl::map(IO &IO) { } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Program", Symbol.Program); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Program", Symbol.Program); + IO.mapRequired("OffsetInParent", Symbol.OffsetInParent); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Register", Symbol.Hdr.Register); + IO.mapRequired("MayHaveNoName", Symbol.Hdr.MayHaveNoName); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Register", Symbol.Hdr.Register); + IO.mapRequired("MayHaveNoName", Symbol.Hdr.MayHaveNoName); + IO.mapRequired("OffsetInParent", Symbol.Hdr.OffsetInParent); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Register", Symbol.Offset); } template <> void SymbolRecordImpl::map(IO &IO) { - // TODO: Print the subfields + IO.mapRequired("Register", Symbol.Hdr.Register); + IO.mapRequired("Flags", Symbol.Hdr.Flags); + IO.mapRequired("BasePointerOffset", Symbol.Hdr.BasePointerOffset); + IO.mapRequired("Range", Symbol.Range); + IO.mapRequired("Gaps", Symbol.Gaps); } template <> void SymbolRecordImpl::map(IO &IO) { diff --git a/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp new file mode 100644 index 000000000000..bbbd7c067720 --- /dev/null +++ b/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp @@ -0,0 +1,84 @@ +//===- CodeViewYAMLTypeHashing.cpp - CodeView YAMLIO type hashing ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLTypeHashing.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::yaml; + +namespace llvm { +namespace yaml { + +void MappingTraits::mapping(IO &io, DebugHSection &DebugH) { + io.mapRequired("Version", DebugH.Version); + io.mapRequired("HashAlgorithm", DebugH.HashAlgorithm); + io.mapOptional("HashValues", DebugH.Hashes); +} + +void ScalarTraits::output(const GlobalHash &GH, void *Ctx, + raw_ostream &OS) { + ScalarTraits::output(GH.Hash, Ctx, OS); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, + GlobalHash &GH) { + return ScalarTraits::input(Scalar, Ctx, GH.Hash); +} + +} // end namespace yaml +} // end namespace llvm + +DebugHSection llvm::CodeViewYAML::fromDebugH(ArrayRef DebugH) { + assert(DebugH.size() >= 8); + assert((DebugH.size() - 8) % 20 == 0); + + BinaryStreamReader Reader(DebugH, llvm::support::little); + DebugHSection DHS; + cantFail(Reader.readInteger(DHS.Magic)); + cantFail(Reader.readInteger(DHS.Version)); + cantFail(Reader.readInteger(DHS.HashAlgorithm)); + while (Reader.bytesRemaining() != 0) { + ArrayRef S; + cantFail(Reader.readBytes(S, 20)); + DHS.Hashes.emplace_back(S); + } + assert(Reader.bytesRemaining() == 0); + return DHS; +} + +ArrayRef llvm::CodeViewYAML::toDebugH(const DebugHSection &DebugH, + BumpPtrAllocator &Alloc) { + uint32_t Size = 8 + 20 * DebugH.Hashes.size(); + uint8_t *Data = Alloc.Allocate(Size); + MutableArrayRef Buffer(Data, Size); + BinaryStreamWriter Writer(Buffer, llvm::support::little); + cantFail(Writer.writeInteger(DebugH.Magic)); + cantFail(Writer.writeInteger(DebugH.Version)); + cantFail(Writer.writeInteger(DebugH.HashAlgorithm)); + SmallString<20> Hash; + for (const auto &H : DebugH.Hashes) { + Hash.clear(); + raw_svector_ostream OS(Hash); + H.Hash.writeAsBinary(OS); + assert((Hash.size() == 20) && "Invalid hash size!"); + cantFail(Writer.writeFixedString(Hash)); + } + assert(Writer.bytesRemaining() == 0); + return Buffer; +} diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp index 81046b217862..ba4ad9382ce5 100644 --- a/lib/ObjectYAML/CodeViewYAMLTypes.cpp +++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -17,12 +17,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamReader.h" @@ -47,8 +48,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord) LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind) LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) -LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) -LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, QuotingType::None) +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, QuotingType::None) LLVM_YAML_DECLARE_ENUM_TRAITS(TypeLeafKind) LLVM_YAML_DECLARE_ENUM_TRAITS(PointerToMemberRepresentation) @@ -82,7 +83,7 @@ struct LeafRecordBase { virtual ~LeafRecordBase() = default; virtual void map(yaml::IO &io) = 0; - virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0; + virtual CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const = 0; virtual Error fromCodeViewRecord(CVType Type) = 0; }; @@ -96,9 +97,9 @@ template struct LeafRecordImpl : public LeafRecordBase { return TypeDeserializer::deserializeAs(Type, Record); } - CVType toCodeViewRecord(TypeTableBuilder &TTB) const override { - TTB.writeKnownType(Record); - return CVType(Kind, TTB.records().back()); + CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const override { + TS.writeLeafType(Record); + return CVType(Kind, TS.records().back()); } mutable T Record; @@ -108,7 +109,7 @@ template <> struct LeafRecordImpl : public LeafRecordBase { explicit LeafRecordImpl(TypeLeafKind K) : LeafRecordBase(K) {} void map(yaml::IO &io) override; - CVType toCodeViewRecord(TypeTableBuilder &TTB) const override; + CVType toCodeViewRecord(AppendingTypeTableBuilder &TS) const override; Error fromCodeViewRecord(CVType Type) override; std::vector Members; @@ -121,7 +122,7 @@ struct MemberRecordBase { virtual ~MemberRecordBase() = default; virtual void map(yaml::IO &io) = 0; - virtual void writeTo(FieldListRecordBuilder &FLRB) = 0; + virtual void writeTo(ContinuationRecordBuilder &CRB) = 0; }; template struct MemberRecordImpl : public MemberRecordBase { @@ -130,8 +131,8 @@ template struct MemberRecordImpl : public MemberRecordBase { void map(yaml::IO &io) override; - void writeTo(FieldListRecordBuilder &FLRB) override { - FLRB.writeMemberType(Record); + void writeTo(ContinuationRecordBuilder &CRB) override { + CRB.writeMemberType(Record); } mutable T Record; @@ -488,15 +489,15 @@ Error LeafRecordImpl::fromCodeViewRecord(CVType Type) { return visitMemberRecordStream(Type.content(), V); } -CVType -LeafRecordImpl::toCodeViewRecord(TypeTableBuilder &TTB) const { - FieldListRecordBuilder FLRB(TTB); - FLRB.begin(); +CVType LeafRecordImpl::toCodeViewRecord( + AppendingTypeTableBuilder &TS) const { + ContinuationRecordBuilder CRB; + CRB.begin(ContinuationRecordKind::FieldList); for (const auto &Member : Members) { - Member.Member->writeTo(FLRB); + Member.Member->writeTo(CRB); } - FLRB.end(true); - return CVType(Kind, TTB.records().back()); + TS.insertRecord(CRB); + return CVType(Kind, TS.records().back()); } void MappingTraits::mapping(IO &io, OneMethodRecord &Record) { @@ -681,13 +682,9 @@ Expected LeafRecord::fromCodeViewRecord(CVType Type) { return make_error(cv_error_code::corrupt_record); } -CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Alloc) const { - TypeTableBuilder TTB(Alloc); - return Leaf->toCodeViewRecord(TTB); -} - -CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const { - return Leaf->toCodeViewRecord(TTB); +CVType +LeafRecord::toCodeViewRecord(AppendingTypeTableBuilder &Serializer) const { + return Leaf->toCodeViewRecord(Serializer); } namespace llvm { @@ -786,10 +783,10 @@ llvm::CodeViewYAML::fromDebugT(ArrayRef DebugT) { ArrayRef llvm::CodeViewYAML::toDebugT(ArrayRef Leafs, BumpPtrAllocator &Alloc) { - TypeTableBuilder TTB(Alloc, false); + AppendingTypeTableBuilder TS(Alloc); uint32_t Size = sizeof(uint32_t); for (const auto &Leaf : Leafs) { - CVType T = Leaf.toCodeViewRecord(TTB); + CVType T = Leaf.Leaf->toCodeViewRecord(TS); Size += T.length(); assert(T.length() % 4 == 0 && "Improper type record alignment!"); } @@ -798,7 +795,7 @@ ArrayRef llvm::CodeViewYAML::toDebugT(ArrayRef Leafs, BinaryStreamWriter Writer(Output, support::little); ExitOnError Err("Error writing type record to .debug$T section"); Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); - for (const auto &R : TTB.records()) { + for (const auto &R : TS.records()) { Err(Writer.writeBytes(R)); } assert(Writer.bytesRemaining() == 0 && "Didn't write all type record bytes!"); diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp index 85079f2605f1..e00a4ea93074 100644 --- a/lib/ObjectYAML/MachOYAML.cpp +++ b/lib/ObjectYAML/MachOYAML.cpp @@ -52,7 +52,9 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, char_16 &Val) { return StringRef(); } -bool ScalarTraits::mustQuote(StringRef S) { return needsQuotes(S); } +QuotingType ScalarTraits::mustQuote(StringRef S) { + return needsQuotes(S); +} void ScalarTraits::output(const uuid_t &Val, void *, raw_ostream &Out) { Out.write_uuid(Val); @@ -75,7 +77,9 @@ StringRef ScalarTraits::input(StringRef Scalar, void *, uuid_t &Val) { return StringRef(); } -bool ScalarTraits::mustQuote(StringRef S) { return needsQuotes(S); } +QuotingType ScalarTraits::mustQuote(StringRef S) { + return needsQuotes(S); +} void MappingTraits::mapping( IO &IO, MachOYAML::FileHeader &FileHdr) { diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 2f961cf68a04..4ae6dccccb19 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -60,6 +60,8 @@ static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) { IO.mapRequired("DataSize", Section.DataSize); IO.mapOptional("SymbolInfo", Section.SymbolInfos); IO.mapOptional("SegmentInfo", Section.SegmentInfos); + IO.mapOptional("InitFunctions", Section.InitFunctions); + IO.mapOptional("Comdats", Section.Comdats); } static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) { @@ -235,7 +237,7 @@ void ScalarEnumerationTraits::enumeration( void MappingTraits::mapping( IO &IO, WasmYAML::Signature &Signature) { - IO.mapOptional("Index", Signature.Index); + IO.mapRequired("Index", Signature.Index); IO.mapRequired("ReturnType", Signature.ReturnType); IO.mapRequired("ParamTypes", Signature.ParamTypes); } @@ -247,6 +249,7 @@ void MappingTraits::mapping(IO &IO, WasmYAML::Table &Table) { void MappingTraits::mapping(IO &IO, WasmYAML::Function &Function) { + IO.mapRequired("Index", Function.Index); IO.mapRequired("Locals", Function.Locals); IO.mapRequired("Body", Function.Body); } @@ -322,6 +325,7 @@ void MappingTraits::mapping(IO &IO, void MappingTraits::mapping(IO &IO, WasmYAML::Global &Global) { + IO.mapRequired("Index", Global.Index); IO.mapRequired("Type", Global.Type); IO.mapRequired("Mutable", Global.Mutable); IO.mapRequired("InitExpr", Global.InitExpr); @@ -359,12 +363,60 @@ void MappingTraits::mapping( IO.mapRequired("Content", Segment.Content); } +void MappingTraits::mapping( + IO &IO, WasmYAML::InitFunction &Init) { + IO.mapRequired("Priority", Init.Priority); + IO.mapRequired("FunctionIndex", Init.FunctionIndex); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, WasmYAML::ComdatKind &Kind) { +#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_COMDAT_##X); + ECase(FUNCTION); + ECase(DATA); +#undef ECase +} + +void MappingTraits::mapping( + IO &IO, WasmYAML::ComdatEntry &ComdatEntry) { + IO.mapRequired("Kind", ComdatEntry.Kind); + IO.mapRequired("Index", ComdatEntry.Index); +} + +void MappingTraits::mapping( + IO &IO, WasmYAML::Comdat &Comdat) { + IO.mapRequired("Name", Comdat.Name); + IO.mapRequired("Entries", Comdat.Entries); +} + void MappingTraits::mapping(IO &IO, WasmYAML::SymbolInfo &Info) { IO.mapRequired("Name", Info.Name); IO.mapRequired("Flags", Info.Flags); } +void ScalarBitSetTraits::bitset( + IO &IO, WasmYAML::LimitFlags &Value) { +#define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_LIMITS_FLAG_##X) + BCase(HAS_MAX); +#undef BCase +} + +void ScalarBitSetTraits::bitset( + IO &IO, WasmYAML::SegmentFlags &Value) { +} + +void ScalarBitSetTraits::bitset( + IO &IO, WasmYAML::SymbolFlags &Value) { +#define BCaseMask(M, X) IO.maskedBitSetCase(Value, #X, wasm::WASM_SYMBOL_##X, wasm::WASM_SYMBOL_##M) + //BCaseMask(BINDING_MASK, BINDING_GLOBAL); + BCaseMask(BINDING_MASK, BINDING_WEAK); + BCaseMask(BINDING_MASK, BINDING_LOCAL); + //BCaseMask(VISIBILITY_MASK, VISIBILITY_DEFAULT); + BCaseMask(VISIBILITY_MASK, VISIBILITY_HIDDEN); +#undef BCaseMask +} + void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::ValueType &Type) { #define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X); @@ -410,7 +462,7 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::RelocType &Type) { #define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name); -#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs.def" #undef WASM_RELOC } diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index c1bb05e817f0..dcd1cc46d964 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -247,6 +247,69 @@ OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { return Ret; } +unsigned OptTable::findNearest(StringRef Option, std::string &NearestString, + unsigned FlagsToInclude, unsigned FlagsToExclude, + unsigned MinimumLength) const { + assert(!Option.empty()); + + // Consider each option as a candidate, finding the closest match. + unsigned BestDistance = UINT_MAX; + for (const Info &CandidateInfo : + ArrayRef(OptionInfos).drop_front(FirstSearchableIndex)) { + StringRef CandidateName = CandidateInfo.Name; + + // Ignore option candidates with empty names, such as "--", or names + // that do not meet the minimum length. + if (CandidateName.empty() || CandidateName.size() < MinimumLength) + continue; + + // If FlagsToInclude were specified, ignore options that don't include + // those flags. + if (FlagsToInclude && !(CandidateInfo.Flags & FlagsToInclude)) + continue; + // Ignore options that contain the FlagsToExclude. + if (CandidateInfo.Flags & FlagsToExclude) + continue; + + // Ignore positional argument option candidates (which do not + // have prefixes). + if (!CandidateInfo.Prefixes) + continue; + // Find the most appropriate prefix. For example, if a user asks for + // "--helm", suggest "--help" over "-help". + StringRef Prefix = CandidateInfo.Prefixes[0]; + for (int P = 1; CandidateInfo.Prefixes[P]; P++) { + if (Option.startswith(CandidateInfo.Prefixes[P])) + Prefix = CandidateInfo.Prefixes[P]; + } + + // Check if the candidate ends with a character commonly used when + // delimiting an option from its value, such as '=' or ':'. If it does, + // attempt to split the given option based on that delimiter. + std::string Delimiter = ""; + char Last = CandidateName.back(); + if (Last == '=' || Last == ':') + Delimiter = std::string(1, Last); + + StringRef LHS, RHS; + if (Delimiter.empty()) + LHS = Option; + else + std::tie(LHS, RHS) = Option.split(Last); + + std::string NormalizedName = + (LHS.drop_front(Prefix.size()) + Delimiter).str(); + unsigned Distance = + CandidateName.edit_distance(NormalizedName, /*AllowReplacements=*/true, + /*MaxEditDistance=*/BestDistance); + if (Distance < BestDistance) { + BestDistance = Distance; + NearestString = (Prefix + CandidateName + RHS).str(); + } + } + return BestDistance; +} + bool OptTable::addValues(const char *Option, const char *Values) { for (size_t I = FirstSearchableIndex, E = OptionInfos.size(); I < E; I++) { Info &In = OptionInfos[I]; diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt index 4d8c7f85d3aa..e2378a84328e 100644 --- a/lib/Passes/LLVMBuild.txt +++ b/lib/Passes/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Passes parent = Libraries -required_libraries = Analysis CodeGen Core IPO InstCombine Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = Analysis CodeGen Core IPO InstCombine Scalar Support Target TransformUtils Vectorize Instrumentation diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 843017528533..c344a3165a0f 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" @@ -81,6 +80,7 @@ #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/InstrProfiling.h" @@ -127,6 +127,7 @@ #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" @@ -144,13 +145,11 @@ #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" -#include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/SimplifyInstructions.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" -#include using namespace llvm; @@ -178,6 +177,11 @@ static cl::opt EnableGVNSink( "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt EnableSyntheticCounts( + "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Run synthetic function entry count generation " + "pass")); + static Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); @@ -414,10 +418,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass(RequireAnalysisPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1))); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2))); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); // Eliminate redundancies. if (Level != O1) { @@ -452,7 +456,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -512,7 +516,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(PGOInstrumentationGen()); FunctionPassManager FPM; - FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + FPM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); // Add the profile lowering pass. @@ -623,6 +628,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion(false, false)); } + // Synthesize function entry counts for non-PGO compilation. + if (EnableSyntheticCounts && !PGOOpt) + MPM.addPass(SyntheticCountsPropagation()); + // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); @@ -732,7 +741,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, C(OptimizePM, Level); // First rotate loops that may have been un-rotated by prior passes. - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -750,21 +760,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); - // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. - // Optimize parallel scalar instruction chains into SIMD instructions. - OptimizePM.addPass(SLPVectorizerPass()); - - // Cleanup after all of the vectorizers. Simplification passes like CVP and + // Cleanup after loop vectorization, etc. Simplification passes like CVP and // GVN, loop transforms, and others have already run, so it's now better to // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false))); + forwardSwitchCondToPhi(true). + convertSwitchToLookupTable(true). + needCanonicalLoops(false). + sinkCommonInsts(true))); + + // Optimize parallel scalar instruction chains into SIMD instructions. + OptimizePM.addPass(SLPVectorizerPass()); + OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel @@ -776,7 +789,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(LoopUnrollPass(Level)); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. @@ -1532,7 +1545,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, DebugLogging)) return false; // Add the nested pass manager with the appropriate adaptor. - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); return true; } if (auto Count = parseRepeatPassName(Name)) { diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index 3fbc549d336b..9ac95ee6fa81 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -68,10 +68,12 @@ MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs())) MODULE_PASS("print", PrintModulePass(dbgs())) MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs())) MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs())) +MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass()) MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) +MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) MODULE_PASS("verify", VerifierPass()) #undef MODULE_PASS diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index 6cde3863f188..8dbd58632f0e 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -388,6 +388,11 @@ class SegmentBuilder { if (CompletedSegmentLoc == CompletedRegion->endLoc()) continue; + // Use the count from the last completed region which ends at this loc. + for (unsigned J = I + 1; J < E; ++J) + if (CompletedRegion->endLoc() == ActiveRegions[J]->endLoc()) + CompletedRegion = ActiveRegions[J]; + startSegment(*CompletedRegion, CompletedSegmentLoc, false); } @@ -623,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const { } std::vector Result; - for (const auto &InstantiationSet : InstantiationSetCollector) { + for (auto &InstantiationSet : InstantiationSetCollector) { InstantiationGroup IG{InstantiationSet.first.first, InstantiationSet.first.second, std::move(InstantiationSet.second)}; diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 467a36ca7483..649cf507357e 100644 --- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Object/Binary.h" -#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" @@ -33,13 +32,6 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include -#include #include using namespace llvm; diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index a732bedc6fa4..8ab5df59f538 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -56,7 +56,7 @@ using namespace llvm; static cl::opt StaticFuncFullModulePrefix( - "static-func-full-module-prefix", cl::init(true), + "static-func-full-module-prefix", cl::init(true), cl::Hidden, cl::desc("Use full module build paths in the profile counter names for " "static functions.")); @@ -69,7 +69,7 @@ static cl::opt StaticFuncFullModulePrefix( // the source directory name not being stripped. A non-zero option value here // can potentially prevent some inter-module indirect-call-promotions. static cl::opt StaticFuncStripDirNamePrefix( - "static-func-strip-dirname-prefix", cl::init(0), + "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden, cl::desc("Strip specified level of directory name from source path in " "the profile counter name for static functions.")); diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index cdf50c2df0c8..23c9a2676b9e 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -61,7 +61,7 @@ InstrProfReader::create(const Twine &Path) { Expected> InstrProfReader::create(std::unique_ptr Buffer) { // Sanity check the buffer. - if (Buffer->getBufferSize() > std::numeric_limits::max()) + if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits::max()) return make_error(instrprof_error::too_large); if (Buffer->getBufferSize() == 0) @@ -99,7 +99,7 @@ IndexedInstrProfReader::create(const Twine &Path) { Expected> IndexedInstrProfReader::create(std::unique_ptr Buffer) { // Sanity check the buffer. - if (Buffer->getBufferSize() > std::numeric_limits::max()) + if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits::max()) return make_error(instrprof_error::too_large); // Create the reader. diff --git a/lib/ProfileData/ProfileSummaryBuilder.cpp b/lib/ProfileData/ProfileSummaryBuilder.cpp index 9fb2ec1b39d9..5fa1e2cf7d1e 100644 --- a/lib/ProfileData/ProfileSummaryBuilder.cpp +++ b/lib/ProfileData/ProfileSummaryBuilder.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Attributes.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index 1028c35e8c2d..44547e3dffa0 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -749,7 +749,7 @@ setupMemoryBuffer(const Twine &Filename) { auto Buffer = std::move(BufferOrErr.get()); // Sanity check the file. - if (Buffer->getBufferSize() > std::numeric_limits::max()) + if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits::max()) return sampleprof_error::too_large; return std::move(Buffer); diff --git a/lib/Support/AMDGPUMetadata.cpp b/lib/Support/AMDGPUMetadata.cpp index ec2714cfc1c5..ddb25935e0ef 100644 --- a/lib/Support/AMDGPUMetadata.cpp +++ b/lib/Support/AMDGPUMetadata.cpp @@ -148,6 +148,10 @@ struct MappingTraits { MD.mIsDynamicCallStack, false); YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled, MD.mIsXNACKEnabled, false); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs, + MD.mNumSpilledSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs, + MD.mNumSpilledVGPRs, uint16_t(0)); } }; diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f7fb0cef16bf..3489feb93a02 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -2546,12 +2546,12 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) { } bool IEEEFloat::convertFromStringSpecials(StringRef str) { - if (str.equals("inf") || str.equals("INFINITY")) { + if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) { makeInf(false); return true; } - if (str.equals("-inf") || str.equals("-INFINITY")) { + if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) { makeInf(true); return true; } diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp index a9a0c1d1a4d3..e39bddc4e8f2 100644 --- a/lib/Support/ARMAttributeParser.cpp +++ b/lib/Support/ARMAttributeParser.cpp @@ -592,7 +592,7 @@ void ARMAttributeParser::ParseAttributeList(const uint8_t *Data, bool Handled = false; for (unsigned AHI = 0, AHE = array_lengthof(DisplayRoutines); AHI != AHE && !Handled; ++AHI) { - if (DisplayRoutines[AHI].Attribute == Tag) { + if (uint64_t(DisplayRoutines[AHI].Attribute) == Tag) { (this->*DisplayRoutines[AHI].Routine)(ARMBuildAttrs::AttrType(Tag), Data, Offset); Handled = true; @@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) { ParseIndexList(Data, Offset, Indicies); break; default: - errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n'; + errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n'; return; } diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index e7f3e1764c52..34fcbde23a28 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/raw_ostream.h" #include using namespace llvm; diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 5d95a9a9a56d..5723f8fcf5bb 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -1,4 +1,7 @@ set(system_libs) +if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ ) + set(system_libs ${system_libs} ${ZLIB_LIBRARIES}) +endif() if( MSVC OR MINGW ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. set(system_libs ${system_libs} psapi shell32 ole32 uuid) @@ -21,9 +24,6 @@ elseif( CMAKE_HOST_UNIX ) set(system_libs ${system_libs} atomic) endif() set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB}) - if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ ) - set(system_libs ${system_libs} z) - endif() if( UNIX AND NOT (BEOS OR HAIKU) ) set(system_libs ${system_libs} m) endif() diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index 3e97c991f504..141573c2a1c7 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -165,12 +165,14 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { return false; } } else { + if (!Policy.Interval) + return false; if (Policy.Interval != seconds(0)) { // Check whether the time stamp is older than our pruning interval. // If not, do nothing. const auto TimeStampModTime = FileStatus.getLastModificationTime(); auto TimeStampAge = CurrentTime - TimeStampModTime; - if (TimeStampAge <= Policy.Interval) { + if (TimeStampAge <= *Policy.Interval) { DEBUG(dbgs() << "Timestamp file too recent (" << duration_cast(TimeStampAge).count() << "s old), do not prune.\n"); diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 0d662cb0375e..451c3f460369 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm-c/Support.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -689,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { O->getNumOccurrencesFlag() == cl::OneOrMore; } -static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); } +static inline bool isWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\r' || C == '\n'; +} static bool isQuote(char C) { return C == '\"' || C == '\''; } @@ -710,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, break; } + char C = Src[I]; + // Backslash escapes the next character. - if (I + 1 < E && Src[I] == '\\') { + if (I + 1 < E && C == '\\') { ++I; // Skip the escape. Token.push_back(Src[I]); continue; } // Consume a quoted string. - if (isQuote(Src[I])) { - char Quote = Src[I++]; - while (I != E && Src[I] != Quote) { + if (isQuote(C)) { + ++I; + while (I != E && Src[I] != C) { // Backslash escapes the next character. if (Src[I] == '\\' && I + 1 != E) ++I; @@ -733,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // End the token if this is whitespace. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { if (!Token.empty()) NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); @@ -741,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, } // This is a normal character. Append it. - Token.push_back(Src[I]); + Token.push_back(C); } // Append the last token after hitting EOF with no whitespace. @@ -799,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // end of the source string. enum { INIT, UNQUOTED, QUOTED } State = INIT; for (size_t I = 0, E = Src.size(); I != E; ++I) { + char C = Src[I]; + // INIT state indicates that the current input index is at the start of // the string or between tokens. if (State == INIT) { - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); State = UNQUOTED; continue; } - Token.push_back(Src[I]); + Token.push_back(C); State = UNQUOTED; continue; } @@ -826,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, // quotes. if (State == UNQUOTED) { // Whitespace means the end of the token. - if (isWhitespace(Src[I])) { + if (isWhitespace(C)) { NewArgv.push_back(Saver.save(StringRef(Token)).data()); Token.clear(); State = INIT; // Mark the end of lines in response files - if (MarkEOLs && Src[I] == '\n') + if (MarkEOLs && C == '\n') NewArgv.push_back(nullptr); continue; } - if (Src[I] == '"') { + if (C == '"') { State = QUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); continue; } // QUOTED state means that it's reading a token quoted by double quotes. if (State == QUOTED) { - if (Src[I] == '"') { + if (C == '"') { State = UNQUOTED; continue; } - if (Src[I] == '\\') { + if (C == '\\') { I = parseBackslash(Src, I, Token); continue; } - Token.push_back(Src[I]); + Token.push_back(C); } } // Append the last token after hitting EOF with no whitespace. @@ -868,6 +873,45 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, NewArgv.push_back(nullptr); } +void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs) { + for (const char *Cur = Source.begin(); Cur != Source.end();) { + SmallString<128> Line; + // Check for comment line. + if (isWhitespace(*Cur)) { + while (Cur != Source.end() && isWhitespace(*Cur)) + ++Cur; + continue; + } + if (*Cur == '#') { + while (Cur != Source.end() && *Cur != '\n') + ++Cur; + continue; + } + // Find end of the current line. + const char *Start = Cur; + for (const char *End = Source.end(); Cur != End; ++Cur) { + if (*Cur == '\\') { + if (Cur + 1 != End) { + ++Cur; + if (*Cur == '\n' || + (*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) { + Line.append(Start, Cur - 1); + if (*Cur == '\r') + ++Cur; + Start = Cur + 1; + } + } + } else if (*Cur == '\n') + break; + } + // Tokenize line. + Line.append(Start, Cur); + cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs); + } +} + // It is called byte order marker but the UTF-8 BOM is actually not affected // by the host system's endianness. static bool hasUTF8ByteOrderMark(ArrayRef S) { @@ -972,6 +1016,15 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, return AllExpanded; } +bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, + SmallVectorImpl &Argv) { + if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true)) + return false; + return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true); +} + /// ParseEnvironmentOptions - An alternative entry point to the /// CommandLine library, which allows you to read the program's name /// from the caller (as PROGNAME) and its command-line arguments from diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index e307335f8bb9..695683efa625 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -216,6 +216,37 @@ StringRef sys::detail::getHostCPUNameForARM( .Case("0xc01", "saphira") .Default("generic"); + if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. + // The Exynos chips have a convoluted ID scheme that doesn't seem to follow + // any predictive pattern across variants and parts. + unsigned Variant = 0, Part = 0; + + // Look for the CPU variant line, whose value is a 1 digit hexadecimal + // number, corresponding to the Variant bits in the CP15/C0 register. + for (auto I : Lines) + if (I.consume_front("CPU variant")) + I.ltrim("\t :").getAsInteger(0, Variant); + + // Look for the CPU part line, whose value is a 3 digit hexadecimal + // number, corresponding to the PartNum bits in the CP15/C0 register. + for (auto I : Lines) + if (I.consume_front("CPU part")) + I.ltrim("\t :").getAsInteger(0, Part); + + unsigned Exynos = (Variant << 12) | Part; + switch (Exynos) { + default: + // Default by falling through to Exynos M1. + LLVM_FALLTHROUGH; + + case 0x1001: + return "exynos-m1"; + + case 0x4001: + return "exynos-m2"; + } + } + return "generic"; } @@ -1224,6 +1255,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; + Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); Features["ibt"] = HasLeaf7 && ((EDX >> 20) & 1); diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 85e782b2c048..9cea9a281074 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -80,10 +80,12 @@ void *operator new(size_t N, const NamedBufferAlloc &Alloc) { namespace { /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. -class MemoryBufferMem : public MemoryBuffer { +template +class MemoryBufferMem : public MB { public: MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { - init(InputData.begin(), InputData.end(), RequiresNullTerminator); + MemoryBuffer::init(InputData.begin(), InputData.end(), + RequiresNullTerminator); } /// Disable sized deallocation for MemoryBufferMem, because it has @@ -95,21 +97,22 @@ class MemoryBufferMem : public MemoryBuffer { return StringRef(reinterpret_cast(this + 1)); } - BufferKind getBufferKind() const override { - return MemoryBuffer_Malloc; + MemoryBuffer::BufferKind getBufferKind() const override { + return MemoryBuffer::MemoryBuffer_Malloc; } }; } -static ErrorOr> -getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, +template +static ErrorOr> +getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile); std::unique_ptr MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, bool RequiresNullTerminator) { auto *Ret = new (NamedBufferAlloc(BufferName)) - MemoryBufferMem(InputData, RequiresNullTerminator); + MemoryBufferMem(InputData, RequiresNullTerminator); return std::unique_ptr(Ret); } @@ -119,50 +122,21 @@ MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) { Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator)); } -std::unique_ptr -MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) { - std::unique_ptr Buf = - getNewUninitMemBuffer(InputData.size(), BufferName); +static ErrorOr> +getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) { + auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName); if (!Buf) - return nullptr; - memcpy(const_cast(Buf->getBufferStart()), InputData.data(), - InputData.size()); - return Buf; -} - -std::unique_ptr -MemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) { - // Allocate space for the MemoryBuffer, the data and the name. It is important - // that MemoryBuffer and data are aligned so PointerIntPair works with them. - // TODO: Is 16-byte alignment enough? We copy small object files with large - // alignment expectations into this buffer. - SmallString<256> NameBuf; - StringRef NameRef = BufferName.toStringRef(NameBuf); - size_t AlignedStringLen = - alignTo(sizeof(MemoryBufferMem) + NameRef.size() + 1, 16); - size_t RealLen = AlignedStringLen + Size + 1; - char *Mem = static_cast(operator new(RealLen, std::nothrow)); - if (!Mem) - return nullptr; - - // The name is stored after the class itself. - CopyStringRef(Mem + sizeof(MemoryBufferMem), NameRef); - - // The buffer begins after the name and must be aligned. - char *Buf = Mem + AlignedStringLen; - Buf[Size] = 0; // Null terminate buffer. - - auto *Ret = new (Mem) MemoryBufferMem(StringRef(Buf, Size), true); - return std::unique_ptr(Ret); + return make_error_code(errc::not_enough_memory); + memcpy(Buf->getBufferStart(), InputData.data(), InputData.size()); + return std::move(Buf); } std::unique_ptr -MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { - std::unique_ptr SB = getNewUninitMemBuffer(Size, BufferName); - if (!SB) - return nullptr; - memset(const_cast(SB->getBufferStart()), 0, Size); - return SB; +MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) { + auto Buf = getMemBufferCopyImpl(InputData, BufferName); + if (Buf) + return std::move(*Buf); + return nullptr; } ErrorOr> @@ -179,10 +153,10 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize, ErrorOr> MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize, uint64_t Offset, bool IsVolatile) { - return getFileAux(FilePath, -1, MapSize, Offset, false, IsVolatile); + return getFileAux(FilePath, -1, MapSize, Offset, false, + IsVolatile); } - //===----------------------------------------------------------------------===// // MemoryBuffer::getFile implementation. //===----------------------------------------------------------------------===// @@ -191,7 +165,8 @@ namespace { /// \brief Memory maps a file descriptor using sys::fs::mapped_file_region. /// /// This handles converting the offset into a legal offset on the platform. -class MemoryBufferMMapFile : public MemoryBuffer { +template +class MemoryBufferMMapFile : public MB { sys::fs::mapped_file_region MFR; static uint64_t getLegalMapOffset(uint64_t Offset) { @@ -209,11 +184,13 @@ class MemoryBufferMMapFile : public MemoryBuffer { public: MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len, uint64_t Offset, std::error_code &EC) - : MFR(FD, sys::fs::mapped_file_region::readonly, + : MFR(FD, + MB::Writable ? sys::fs::mapped_file_region::priv + : sys::fs::mapped_file_region::readonly, getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) { if (!EC) { const char *Start = getStart(Len, Offset); - init(Start, Start + Len, RequiresNullTerminator); + MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator); } } @@ -226,13 +203,13 @@ class MemoryBufferMMapFile : public MemoryBuffer { return StringRef(reinterpret_cast(this + 1)); } - BufferKind getBufferKind() const override { - return MemoryBuffer_MMap; + MemoryBuffer::BufferKind getBufferKind() const override { + return MemoryBuffer::MemoryBuffer_MMap; } }; } -static ErrorOr> +static ErrorOr> getMemoryBufferForStream(int FD, const Twine &BufferName) { const ssize_t ChunkSize = 4096*4; SmallString Buffer; @@ -246,37 +223,89 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) { Buffer.set_size(Buffer.size() + ReadBytes); } while (ReadBytes != 0); - return MemoryBuffer::getMemBufferCopy(Buffer, BufferName); + return getMemBufferCopyImpl(Buffer, BufferName); } ErrorOr> MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize, bool RequiresNullTerminator, bool IsVolatile) { - return getFileAux(Filename, FileSize, FileSize, 0, - RequiresNullTerminator, IsVolatile); + return getFileAux(Filename, FileSize, FileSize, 0, + RequiresNullTerminator, IsVolatile); } -static ErrorOr> +template +static ErrorOr> getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize, uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, bool IsVolatile); -static ErrorOr> +template +static ErrorOr> getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize, uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) { int FD; std::error_code EC = sys::fs::openFileForRead(Filename, FD); + if (EC) return EC; - ErrorOr> Ret = - getOpenFileImpl(FD, Filename, FileSize, MapSize, Offset, - RequiresNullTerminator, IsVolatile); + auto Ret = getOpenFileImpl(FD, Filename, FileSize, MapSize, Offset, + RequiresNullTerminator, IsVolatile); close(FD); return Ret; } +ErrorOr> +WritableMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize, + bool IsVolatile) { + return getFileAux(Filename, FileSize, FileSize, 0, + /*RequiresNullTerminator*/ false, + IsVolatile); +} + +ErrorOr> +WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize, + uint64_t Offset, bool IsVolatile) { + return getFileAux(Filename, -1, MapSize, Offset, false, + IsVolatile); +} + +std::unique_ptr +WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) { + using MemBuffer = MemoryBufferMem; + // Allocate space for the MemoryBuffer, the data and the name. It is important + // that MemoryBuffer and data are aligned so PointerIntPair works with them. + // TODO: Is 16-byte alignment enough? We copy small object files with large + // alignment expectations into this buffer. + SmallString<256> NameBuf; + StringRef NameRef = BufferName.toStringRef(NameBuf); + size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16); + size_t RealLen = AlignedStringLen + Size + 1; + char *Mem = static_cast(operator new(RealLen, std::nothrow)); + if (!Mem) + return nullptr; + + // The name is stored after the class itself. + CopyStringRef(Mem + sizeof(MemBuffer), NameRef); + + // The buffer begins after the name and must be aligned. + char *Buf = Mem + AlignedStringLen; + Buf[Size] = 0; // Null terminate buffer. + + auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true); + return std::unique_ptr(Ret); +} + +std::unique_ptr +WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) { + auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName); + if (!SB) + return nullptr; + memset(SB->getBufferStart(), 0, Size); + return SB; +} + static bool shouldUseMmap(int FD, size_t FileSize, size_t MapSize, @@ -332,7 +361,8 @@ static bool shouldUseMmap(int FD, return true; } -static ErrorOr> +template +static ErrorOr> getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize, uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, bool IsVolatile) { @@ -364,22 +394,21 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize, IsVolatile)) { std::error_code EC; - std::unique_ptr Result( - new (NamedBufferAlloc(Filename)) - MemoryBufferMMapFile(RequiresNullTerminator, FD, MapSize, Offset, EC)); + std::unique_ptr Result( + new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( + RequiresNullTerminator, FD, MapSize, Offset, EC)); if (!EC) return std::move(Result); } - std::unique_ptr Buf = - MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); + auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); if (!Buf) { // Failed to create a buffer. The only way it can fail is if // new(std::nothrow) returns 0. return make_error_code(errc::not_enough_memory); } - char *BufPtr = const_cast(Buf->getBufferStart()); + char *BufPtr = Buf.get()->getBufferStart(); size_t BytesLeft = MapSize; #ifndef HAVE_PREAD @@ -412,7 +441,7 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize, ErrorOr> MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize, bool RequiresNullTerminator, bool IsVolatile) { - return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0, + return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0, RequiresNullTerminator, IsVolatile); } @@ -420,7 +449,8 @@ ErrorOr> MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize, int64_t Offset, bool IsVolatile) { assert(MapSize != uint64_t(-1)); - return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false, IsVolatile); + return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false, + IsVolatile); } ErrorOr> MemoryBuffer::getSTDIN() { diff --git a/lib/Support/NativeFormatting.cpp b/lib/Support/NativeFormatting.cpp index b951a88a38db..85b4bfb81568 100644 --- a/lib/Support/NativeFormatting.cpp +++ b/lib/Support/NativeFormatting.cpp @@ -14,6 +14,8 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" +#include + using namespace llvm; template diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index d4b9d02e030d..f229f23a4f84 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -1099,8 +1099,14 @@ Error TempFile::keep(const Twine &Name) { std::error_code RenameEC = cancelDeleteOnClose(FD); if (!RenameEC) RenameEC = rename_fd(FD, Name); + // If we can't rename, discard the temporary file. + if (RenameEC) + removeFD(FD); #else std::error_code RenameEC = fs::rename(TmpName, Name); + // If we can't rename, discard the temporary file. + if (RenameEC) + remove(TmpName); sys::DontRemoveFileOnSignal(TmpName); #endif diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp index 8ea02d709df1..47d20159200b 100644 --- a/lib/Support/RandomNumberGenerator.cpp +++ b/lib/Support/RandomNumberGenerator.cpp @@ -32,8 +32,8 @@ using namespace llvm; // // Do not change to cl::opt since this silently breaks argument parsing. static cl::opt -Seed("rng-seed", cl::value_desc("seed"), - cl::desc("Seed for the random number generator"), cl::init(0)); + Seed("rng-seed", cl::value_desc("seed"), cl::Hidden, + cl::desc("Seed for the random number generator"), cl::init(0)); RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) { DEBUG( diff --git a/lib/Support/ScopedPrinter.cpp b/lib/Support/ScopedPrinter.cpp index 537ff62c7b09..981dfbff520a 100644 --- a/lib/Support/ScopedPrinter.cpp +++ b/lib/Support/ScopedPrinter.cpp @@ -1,6 +1,5 @@ #include "llvm/Support/ScopedPrinter.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" #include diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 72ca22806c43..544ae2d0983c 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -39,12 +39,14 @@ using namespace llvm; /// -stats - Command line option to cause transformations to emit stats about /// what they did. /// -static cl::opt Stats("stats", - cl::desc("Enable statistics output from program (available with Asserts)")); - +static cl::opt Stats( + "stats", + cl::desc("Enable statistics output from program (available with Asserts)"), + cl::Hidden); static cl::opt StatsAsJSON("stats-json", - cl::desc("Display statistics as json data")); + cl::desc("Display statistics as json data"), + cl::Hidden); static bool Enabled; static bool PrintOnExit; @@ -166,9 +168,10 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) { const char *delim = ""; for (const Statistic *Stat : Stats.Stats) { OS << delim; - assert(!yaml::needsQuotes(Stat->getDebugType()) && + assert(yaml::needsQuotes(Stat->getDebugType()) == yaml::QuotingType::None && "Statistic group/type name is simple."); - assert(!yaml::needsQuotes(Stat->getName()) && "Statistic name is simple"); + assert(yaml::needsQuotes(Stat->getName()) == yaml::QuotingType::None && + "Statistic name is simple"); OS << "\t\"" << Stat->getDebugType() << '.' << Stat->getName() << "\": " << Stat->getValue(); delim = ",\n"; diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index b2f42dfcc04d..21157a14086d 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// StrInStrNoCase - Portable version of strcasestr. Locates the first @@ -56,3 +57,8 @@ void llvm::SplitString(StringRef Source, S = getToken(S.second, Delimiters); } } + +void llvm::printLowerCase(StringRef String, raw_ostream &Out) { + for (const char C : String) + Out << toLower(C); +} diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 9b7cc1c1d182..9ba7a09f9962 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/edit_distance.h" #include @@ -21,28 +22,12 @@ using namespace llvm; const size_t StringRef::npos; #endif -static char ascii_tolower(char x) { - if (x >= 'A' && x <= 'Z') - return x - 'A' + 'a'; - return x; -} - -static char ascii_toupper(char x) { - if (x >= 'a' && x <= 'z') - return x - 'a' + 'A'; - return x; -} - -static bool ascii_isdigit(char x) { - return x >= '0' && x <= '9'; -} - // strncasecmp() is not available on non-POSIX systems, so define an // alternative function here. static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { for (size_t I = 0; I < Length; ++I) { - unsigned char LHC = ascii_tolower(LHS[I]); - unsigned char RHC = ascii_tolower(RHS[I]); + unsigned char LHC = toLower(LHS[I]); + unsigned char RHC = toLower(RHS[I]); if (LHC != RHC) return LHC < RHC ? -1 : 1; } @@ -71,21 +56,21 @@ bool StringRef::endswith_lower(StringRef Suffix) const { } size_t StringRef::find_lower(char C, size_t From) const { - char L = ascii_tolower(C); - return find_if([L](char D) { return ascii_tolower(D) == L; }, From); + char L = toLower(C); + return find_if([L](char D) { return toLower(D) == L; }, From); } /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { // Check for sequences of digits. - if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) { + if (isDigit(Data[I]) && isDigit(RHS.Data[I])) { // The longer sequence of numbers is considered larger. // This doesn't really handle prefixed zeros well. size_t J; for (J = I + 1; J != E + 1; ++J) { - bool ld = J < Length && ascii_isdigit(Data[J]); - bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]); + bool ld = J < Length && isDigit(Data[J]); + bool rd = J < RHS.Length && isDigit(RHS.Data[J]); if (ld != rd) return rd ? -1 : 1; if (!rd) @@ -123,7 +108,7 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, std::string StringRef::lower() const { std::string Result(size(), char()); for (size_type i = 0, e = size(); i != e; ++i) { - Result[i] = ascii_tolower(Data[i]); + Result[i] = toLower(Data[i]); } return Result; } @@ -131,7 +116,7 @@ std::string StringRef::lower() const { std::string StringRef::upper() const { std::string Result(size(), char()); for (size_type i = 0, e = size(); i != e; ++i) { - Result[i] = ascii_toupper(Data[i]); + Result[i] = toUpper(Data[i]); } return Result; } @@ -210,7 +195,7 @@ size_t StringRef::rfind_lower(char C, size_t From) const { size_t i = From; while (i != 0) { --i; - if (ascii_tolower(Data[i]) == ascii_tolower(C)) + if (toLower(Data[i]) == toLower(C)) return i; } return npos; @@ -415,7 +400,7 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { return 8; } - if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) { + if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) { Str = Str.substr(1); return 8; } @@ -601,7 +586,7 @@ bool StringRef::getAsDouble(double &Result, bool AllowInexact) const { APFloat::opStatus Status = F.convertFromString(*this, APFloat::rmNearestTiesToEven); if (Status != APFloat::opOK) { - if (!AllowInexact || Status != APFloat::opInexact) + if (!AllowInexact || !(Status & APFloat::opInexact)) return true; } diff --git a/lib/Support/TarWriter.cpp b/lib/Support/TarWriter.cpp index 5009607a4780..abc46d076576 100644 --- a/lib/Support/TarWriter.cpp +++ b/lib/Support/TarWriter.cpp @@ -173,6 +173,10 @@ void TarWriter::append(StringRef Path, StringRef Data) { // Write Path and Data. std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(Path); + // We do not want to include the same file more than once. + if (!Files.insert(Fullpath).second) + return; + StringRef Prefix; StringRef Name; if (splitUstar(Fullpath, Prefix, Name)) { diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index 30db361e53a8..5f288ff8e4a2 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -14,7 +14,6 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/TargetParser.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include @@ -538,7 +537,7 @@ StringRef llvm::AArch64::getDefaultCPU(StringRef Arch) { } unsigned llvm::AArch64::checkArchVersion(StringRef Arch) { - if (Arch[0] == 'v' && std::isdigit(Arch[1])) + if (Arch.size() >= 2 && Arch[0] == 'v' && std::isdigit(Arch[1])) return (Arch[1] - 48); return 0; } @@ -582,7 +581,7 @@ static StringRef getArchSynonym(StringRef Arch) { .Case("v7r", "v7-r") .Case("v7m", "v7-m") .Case("v7em", "v7e-m") - .Cases("v8", "v8a", "aarch64", "arm64", "v8-a") + .Cases("v8", "v8a", "v8l", "aarch64", "arm64", "v8-a") .Case("v8.1a", "v8.1-a") .Case("v8.2a", "v8.2-a") .Case("v8.3a", "v8.3-a") @@ -634,7 +633,7 @@ StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) { // Only match non-marketing names if (offset != StringRef::npos) { // Must start with 'vN'. - if (A[0] != 'v' || !std::isdigit(A[1])) + if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1]))) return Error; // Can't have an extra 'eb'. if (A.find("eb") != StringRef::npos) @@ -740,7 +739,6 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { case ARM::ArchKind::ARMV8_2A: case ARM::ArchKind::ARMV8_3A: return ARM::ProfileKind::A; - LLVM_FALLTHROUGH; case ARM::ArchKind::ARMV2: case ARM::ArchKind::ARMV2A: case ARM::ArchKind::ARMV3: @@ -870,10 +868,10 @@ AArch64::ArchKind AArch64::parseArch(StringRef Arch) { return ArchKind::INVALID; } -unsigned llvm::AArch64::parseArchExt(StringRef ArchExt) { +AArch64::ArchExtKind llvm::AArch64::parseArchExt(StringRef ArchExt) { for (const auto A : AArch64ARCHExtNames) { if (ArchExt == A.getName()) - return A.ID; + return static_cast(A.ID); } return AArch64::AEK_INVALID; } diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 3386f2660f31..0c85faecca84 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -362,8 +362,10 @@ void TimerGroup::printAll(raw_ostream &OS) { void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R, const char *suffix, double Value) { - assert(!yaml::needsQuotes(Name) && "TimerGroup name needs no quotes"); - assert(!yaml::needsQuotes(R.Name) && "Timer name needs no quotes"); + assert(yaml::needsQuotes(Name) == yaml::QuotingType::None && + "TimerGroup name needs no quotes"); + assert(yaml::needsQuotes(R.Name) == yaml::QuotingType::None && + "Timer name needs no quotes"); OS << "\t\"time." << Name << '.' << R.Name << suffix << "\": " << Value; } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 2ecb97316c87..220162d1c19d 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -860,12 +860,12 @@ std::error_code real_path(const Twine &path, SmallVectorImpl &dest, return real_path(Storage, dest, false); } - int fd; - std::error_code EC = openFileForRead(path, fd, &dest); - - if (EC) - return EC; - ::close(fd); + SmallString<128> Storage; + StringRef P = path.toNullTerminatedStringRef(Storage); + char Buffer[PATH_MAX]; + if (::realpath(P.begin(), Buffer) == nullptr) + return std::error_code(errno, std::generic_category()); + dest.append(Buffer, Buffer + strlen(Buffer)); return std::error_code(); } diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index e43650d707e3..7a4e38614009 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -369,6 +369,21 @@ static bool terminalHasColors(int fd) { // Return true if we found a color capabilities for the current terminal. if (HasColors) return true; +#else + // When the terminfo database is not available, check if the current terminal + // is one of terminals that are known to support ANSI color escape codes. + if (const char *TermStr = std::getenv("TERM")) { + return StringSwitch(TermStr) + .Case("ansi", true) + .Case("cygwin", true) + .Case("linux", true) + .StartsWith("screen", true) + .StartsWith("xterm", true) + .StartsWith("vt100", true) + .StartsWith("rxvt", true) + .EndsWith("color", true) + .Default(false); + } #endif // Otherwise, be conservative. diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index f5b1c0ffe69d..f81790b17df5 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -391,6 +391,20 @@ std::error_code is_local(int FD, bool &Result) { return is_local_internal(FinalPath, Result); } +static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) { + FILE_DISPOSITION_INFO Disposition; + Disposition.DeleteFile = Delete; + if (!SetFileInformationByHandle(Handle, FileDispositionInfo, &Disposition, + sizeof(Disposition))) + return mapWindowsError(::GetLastError()); + return std::error_code(); +} + +static std::error_code removeFD(int FD) { + HANDLE Handle = reinterpret_cast(_get_osfhandle(FD)); + return setDeleteDisposition(Handle, true); +} + /// In order to handle temporary files we want the following properties /// /// * The temporary file is deleted on crashes @@ -425,11 +439,9 @@ static std::error_code cancelDeleteOnClose(int &FD) { if (close(FD)) return mapWindowsError(::GetLastError()); - FILE_DISPOSITION_INFO Disposition; - Disposition.DeleteFile = false; - if (!SetFileInformationByHandle(NewHandle, FileDispositionInfo, &Disposition, - sizeof(Disposition))) - return mapWindowsError(::GetLastError()); + if (std::error_code EC = setDeleteDisposition(NewHandle, false)) + return EC; + FD = ::_open_osfhandle(intptr_t(NewHandle), 0); if (FD == -1) { ::CloseHandle(NewHandle); diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 21dd2dd13754..23fc72ec10e2 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -503,7 +503,7 @@ void sys::DisableSystemDialogsOnCrash() { _set_error_mode(_OUT_TO_STDERR); } -/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or +/// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void sys::PrintStackTraceOnErrorSignal(StringRef Argv0, bool DisableCrashReporting) { diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index a80adfda8303..f8a80ba87873 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Unicode.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include @@ -330,7 +331,7 @@ void Input::endBitSetScalar() { } } -void Input::scalarString(StringRef &S, bool) { +void Input::scalarString(StringRef &S, QuotingType) { if (ScalarHNode *SN = dyn_cast(CurrentNode)) { S = SN->value(); } else { @@ -338,7 +339,7 @@ void Input::scalarString(StringRef &S, bool) { } } -void Input::blockScalarString(StringRef &S) { scalarString(S, false); } +void Input::blockScalarString(StringRef &S) { scalarString(S, QuotingType::None); } void Input::setError(HNode *hnode, const Twine &message) { assert(hnode && "HNode must not be NULL"); @@ -617,7 +618,7 @@ void Output::endBitSetScalar() { this->outputUpToEndOfLine(" ]"); } -void Output::scalarString(StringRef &S, bool MustQuote) { +void Output::scalarString(StringRef &S, QuotingType MustQuote) { this->newLineCheck(); if (S.empty()) { // Print '' for the empty string because leaving the field empty is not @@ -625,27 +626,57 @@ void Output::scalarString(StringRef &S, bool MustQuote) { this->outputUpToEndOfLine("''"); return; } - if (!MustQuote) { + if (MustQuote == QuotingType::None) { // Only quote if we must. this->outputUpToEndOfLine(S); return; } + unsigned i = 0; unsigned j = 0; unsigned End = S.size(); - output("'"); // Starting single quote. const char *Base = S.data(); + + const char *const Quote = MustQuote == QuotingType::Single ? "'" : "\""; + const char QuoteChar = MustQuote == QuotingType::Single ? '\'' : '"'; + + output(Quote); // Starting quote. + + // When using single-quoted strings, any single quote ' must be doubled to be + // escaped. + // When using double-quoted strings, print \x + hex for non-printable ASCII + // characters, and escape double quotes. while (j < End) { - // Escape a single quote by doubling it. - if (S[j] == '\'') { - output(StringRef(&Base[i], j - i + 1)); - output("'"); + if (S[j] == QuoteChar) { // Escape quotes. + output(StringRef(&Base[i], j - i)); // "flush". + if (MustQuote == QuotingType::Double) { // Print it as \" + output(StringLiteral("\\")); + output(StringRef(Quote, 1)); + } else { // Single + output(StringLiteral("''")); // Print it as '' + } + i = j + 1; + } else if (MustQuote == QuotingType::Double && + !sys::unicode::isPrintable(S[j]) && (S[j] & 0x80) == 0) { + // If we're double quoting non-printable characters, we prefer printing + // them as "\x" + their hex representation. Note that special casing is + // needed for UTF-8, where a byte may be part of a UTF-8 sequence and + // appear as non-printable, in which case we want to print the correct + // unicode character and not its hex representation. + output(StringRef(&Base[i], j - i)); // "flush" + output(StringLiteral("\\x")); + + // Output the byte 0x0F as \x0f. + auto FormattedHex = format_hex_no_prefix(S[j], 2); + Out << FormattedHex; + Column += 4; // one for the '\', one for the 'x', and two for the hex + i = j + 1; } ++j; } output(StringRef(&Base[i], j - i)); - this->outputUpToEndOfLine("'"); // Ending single quote. + this->outputUpToEndOfLine(Quote); // Ending quote. } void Output::blockScalarString(StringRef &S) { diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index fc9d0cc08885..be35f894cccd 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) { return 1; if (ErrorsPrinted > 0) - return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n"); + return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n"); // Declare success. Out.keep(); diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp index 7e510f0c2fdc..32599104f6a2 100644 --- a/lib/TableGen/StringMatcher.cpp +++ b/lib/TableGen/StringMatcher.cpp @@ -46,17 +46,18 @@ FindFirstNonCommonLetter(const std::vector &Matches, - unsigned CharNo, unsigned IndentCount) const { +bool StringMatcher::EmitStringMatcherForChar( + const std::vector &Matches, unsigned CharNo, + unsigned IndentCount, bool IgnoreDuplicates) const { assert(!Matches.empty() && "Must have at least one string to match!"); - std::string Indent(IndentCount*2+4, ' '); + std::string Indent(IndentCount * 2 + 4, ' '); // If we have verified that the entire string matches, we're done: output the // matching code. if (CharNo == Matches[0]->first.size()) { - assert(Matches.size() == 1 && "Had duplicate keys to match on"); - + if (Matches.size() > 1 && !IgnoreDuplicates) + report_fatal_error("Had duplicate keys to match on"); + // If the to-execute code has \n's in it, indent each subsequent line. StringRef Code = Matches[0]->second; @@ -100,8 +101,9 @@ EmitStringMatcherForChar(const std::vector &Matches, << NumChars << ") != 0)\n"; OS << Indent << " break;\n"; } - - return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); + + return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount, + IgnoreDuplicates); } // Otherwise, we have multiple possible things, emit a switch on the @@ -116,7 +118,8 @@ EmitStringMatcherForChar(const std::vector &Matches, << LI->second.size() << " string"; if (LI->second.size() != 1) OS << 's'; OS << " to match.\n"; - if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) + if (EmitStringMatcherForChar(LI->second, CharNo + 1, IndentCount + 1, + IgnoreDuplicates)) OS << Indent << " break;\n"; } @@ -126,7 +129,7 @@ EmitStringMatcherForChar(const std::vector &Matches, /// Emit - Top level entry point. /// -void StringMatcher::Emit(unsigned Indent) const { +void StringMatcher::Emit(unsigned Indent, bool IgnoreDuplicates) const { // If nothing to match, just fall through. if (Matches.empty()) return; @@ -146,7 +149,7 @@ void StringMatcher::Emit(unsigned Indent) const { OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " << LI->second.size() << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; - if (EmitStringMatcherForChar(LI->second, 0, Indent)) + if (EmitStringMatcherForChar(LI->second, 0, Indent, IgnoreDuplicates)) OS.indent(Indent*2+4) << "break;\n"; } diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 1dda746a6be1..edda13ce97ef 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -39,7 +39,7 @@ FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, FunctionPass *createAArch64StorePairSuppressPass(); FunctionPass *createAArch64ExpandPseudoPass(); FunctionPass *createAArch64LoadStoreOptimizationPass(); -FunctionPass *createAArch64VectorByElementOptPass(); +FunctionPass *createAArch64SIMDInstrOptPass(); ModulePass *createAArch64PromoteConstantPass(); FunctionPass *createAArch64ConditionOptimizerPass(); FunctionPass *createAArch64A57FPLoadBalancing(); @@ -64,7 +64,7 @@ void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); void initializeAArch64ExpandPseudoPass(PassRegistry&); void initializeAArch64LoadStoreOptPass(PassRegistry&); -void initializeAArch64VectorByElementOptPass(PassRegistry&); +void initializeAArch64SIMDInstrOptPass(PassRegistry&); void initializeAArch64PromoteConstantPass(PassRegistry&); void initializeAArch64RedundantCopyEliminationPass(PassRegistry&); void initializeAArch64StorePairSuppressPass(PassRegistry&); diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index ce0bce5e3ae3..a7f155df7652 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -61,6 +61,12 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; +/// ... but the floating-point version doesn't quite work in rare cases on older +/// CPUs. +def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround", + "HasZeroCycleZeroingFPWorkaround", "true", + "The zero-cycle floating-point zeroing instruction has a bug">; + def FeatureStrictAlign : SubtargetFeature<"strict-align", "StrictAlign", "true", "Disallow all unaligned memory " @@ -142,6 +148,7 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", def FeatureLSLFast : SubtargetFeature< "lsl-fast", "HasLSLFast", "true", "CPU has a fastpath logical shift of up to 3 places">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -289,7 +296,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", FeaturePerfMon, FeatureSlowMisaligned128Store, FeatureZCRegMove, - FeatureZCZeroing + FeatureZCZeroing, + FeatureZCZeroingFPWorkaround ]>; def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", @@ -308,7 +316,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", FeatureZCZeroing]>; def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", - "Samsung Exynos-M2/M3 processors", + "Samsung Exynos-M2 processors", [FeatureSlowPaired128, FeatureCRC, FeatureCrypto, @@ -321,6 +329,21 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", FeatureSlowMisaligned128Store, FeatureZCZeroing]>; +def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM1", + "Samsung Exynos-M3 processors", + [FeatureCRC, + FeatureCrypto, + FeatureCustomCheapAsMoveHandling, + FeatureFPARMv8, + FeatureFuseAES, + FeatureFuseLiterals, + FeatureNEON, + FeaturePerfMon, + FeaturePostRAScheduler, + FeatureSlowMisaligned128Store, + FeatureSlowPaired128, + FeatureZCZeroing]>; + def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", "Qualcomm Kryo processors", [ FeatureCRC, @@ -441,7 +464,7 @@ def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>; def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>; -def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>; +def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM3]>; def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>; def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>; def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>; @@ -461,12 +484,14 @@ def GenericAsmParserVariant : AsmParserVariant { int Variant = 0; string Name = "generic"; string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; } def AppleAsmParserVariant : AsmParserVariant { int Variant = 1; string Name = "apple-neon"; string BreakCharacters = "."; + string TokenizingCharacters = "[]*!/"; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index db1fbe069f4d..38a7e331bb97 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -161,9 +161,9 @@ namespace { /// A Chain is a sequence of instructions that are linked together by /// an accumulation operand. For example: /// -/// fmul d0, ? -/// fmla d1, ?, ?, d0 -/// fmla d2, ?, ?, d1 +/// fmul def d0, ? +/// fmla def d1, ?, ?, killed d0 +/// fmla def d2, ?, ?, killed d1 /// /// There may be other instructions interleaved in the sequence that /// do not belong to the chain. These other instructions must not use @@ -308,7 +308,7 @@ class Chain { //===----------------------------------------------------------------------===// bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; if (!F.getSubtarget().balanceFPOps()) @@ -538,7 +538,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C, DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n"); return false; } - DEBUG(dbgs() << " - Scavenged register: " << TRI->getName(Reg) << "\n"); + DEBUG(dbgs() << " - Scavenged register: " << printReg(Reg, TRI) << "\n"); std::map Substs; for (MachineInstr &I : *G) { @@ -611,8 +611,8 @@ void AArch64A57FPLoadBalancing::scanInstruction( // unit. unsigned DestReg = MI->getOperand(0).getReg(); - DEBUG(dbgs() << "New chain started for register " - << TRI->getName(DestReg) << " at " << *MI); + DEBUG(dbgs() << "New chain started for register " << printReg(DestReg, TRI) + << " at " << *MI); auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); @@ -632,7 +632,7 @@ void AArch64A57FPLoadBalancing::scanInstruction( if (ActiveChains.find(AccumReg) != ActiveChains.end()) { DEBUG(dbgs() << "Chain found for accumulator register " - << TRI->getName(AccumReg) << " in MI " << *MI); + << printReg(AccumReg, TRI) << " in MI " << *MI); // For simplicity we only chain together sequences of MULs/MLAs where the // accumulator register is killed on each instruction. This means we don't @@ -657,7 +657,7 @@ void AArch64A57FPLoadBalancing::scanInstruction( } DEBUG(dbgs() << "Creating new chain for dest register " - << TRI->getName(DestReg) << "\n"); + << printReg(DestReg, TRI) << "\n"); auto G = llvm::make_unique(MI, Idx, getColor(DestReg)); ActiveChains[DestReg] = G.get(); AllChains.push_back(std::move(G)); @@ -685,8 +685,8 @@ maybeKillChain(MachineOperand &MO, unsigned Idx, // If this is a KILL of a current chain, record it. if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) { - DEBUG(dbgs() << "Kill seen for chain " << TRI->getName(MO.getReg()) - << "\n"); + DEBUG(dbgs() << "Kill seen for chain " << printReg(MO.getReg(), TRI) + << "\n"); ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied()); } ActiveChains.erase(MO.getReg()); @@ -697,7 +697,7 @@ maybeKillChain(MachineOperand &MO, unsigned Idx, I != E;) { if (MO.clobbersPhysReg(I->first)) { DEBUG(dbgs() << "Kill (regmask) seen for chain " - << TRI->getName(I->first) << "\n"); + << printReg(I->first, TRI) << "\n"); I->second->setKill(MI, Idx, /*Immutable=*/true); ActiveChains.erase(I++); } else diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index bc2320dd20b3..338daecb49e5 100644 --- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -36,7 +36,6 @@ #include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64RegisterInfo.h" -#include "AArch64Subtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -394,7 +393,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { bool Changed = false; DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n"); - if (skipFunction(*mf.getFunction())) + if (skipFunction(mf.getFunction())) return false; MRI = &mf.getRegInfo(); diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 56fcff606aa7..994b8436f947 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -210,29 +210,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); SM.serializeToStackMapSection(); } - - if (TT.isOSBinFormatCOFF()) { - const auto &TLOF = - static_cast(getObjFileLowering()); - - std::string Flags; - raw_string_ostream OS(Flags); - - for (const auto &Function : M) - TLOF.emitLinkerFlagsForGlobal(OS, &Function); - for (const auto &Global : M.globals()) - TLOF.emitLinkerFlagsForGlobal(OS, &Global); - for (const auto &Alias : M.aliases()) - TLOF.emitLinkerFlagsForGlobal(OS, &Alias); - - OS.flush(); - - // Output collected flags - if (!Flags.empty()) { - OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->EmitBytes(Flags); - } - } } void AArch64AsmPrinter::EmitLOHs() { @@ -523,7 +500,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { unsigned DestReg = MI.getOperand(0).getReg(); - if (STI->hasZeroCycleZeroing()) { + if (STI->hasZeroCycleZeroing() && !STI->hasZeroCycleZeroingFPWorkaround()) { // Convert H/S/D register to corresponding Q register if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) DestReg = AArch64::Q0 + (DestReg - AArch64::H0); @@ -583,6 +560,20 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; + case AArch64::MOVIv2d_ns: + // If the target has , lower this + // instruction to movi.16b instead. + if (STI->hasZeroCycleZeroingFPWorkaround() && + MI->getOperand(1).getImm() == 0) { + MCInst TmpInst; + TmpInst.setOpcode(AArch64::MOVIv16b_ns); + TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::createImm(MI->getOperand(1).getImm())); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + break; + case AArch64::DBG_VALUE: { if (isVerbose() && OutStreamer->hasRawTextSupport()) { SmallString<128> TmpStr; diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index 5cc8881d1c16..08152c0d83d9 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -220,7 +220,7 @@ void AArch64CallLowering::splitToValueTypes( bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR); assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); @@ -259,6 +259,8 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector SplitArgs; unsigned i = 0; for (auto &Arg : F.args()) { + if (DL.getTypeStoreSize(Arg.getType()) == 0) + continue; ArgInfo OrigArg{VRegs[i], Arg.getType()}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); bool Split = false; @@ -320,7 +322,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const ArgInfo &OrigRet, ArrayRef OrigArgs) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index b3b738584b40..b88fba4452a1 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -25,7 +25,6 @@ #include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,7 +42,7 @@ struct LDTLSCleanup : public MachineFunctionPass { } bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; AArch64FunctionInfo *AFI = MF.getInfo(); diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp index 1d13e9a849b8..0a9167edcdb3 100644 --- a/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -101,18 +101,14 @@ #include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64Subtarget.h" -#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -486,7 +482,7 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { } bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index ca4915bc8213..30cefbad884c 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -291,7 +290,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, } bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp index d1bcd3dcaec4..d14bde33d94e 100644 --- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp +++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp @@ -207,7 +207,7 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare( return nullptr; } } - DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n'); return nullptr; } @@ -327,7 +327,7 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI, bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 668d21d0b162..b0bda7c43c15 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -369,7 +369,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { return nullptr; } } - DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); + DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB) << '\n'); return nullptr; } @@ -383,7 +383,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to // get right. if (!MBB->livein_empty()) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); return false; } @@ -396,7 +396,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, continue; if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " << BlockInstrLimit << " instructions.\n"); return false; } @@ -458,8 +458,9 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { return false; // The CFG topology checks out. - DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" - << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); + DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " + << printMBBReference(*CmpBB) << " -> " + << printMBBReference(*Tail) << '\n'); ++NumConsidered; // Tail is allowed to have many predecessors, but we can't handle PHIs yet. @@ -562,8 +563,9 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { } void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { - DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" - << Head->getNumber() << ":\n" << *CmpBB); + DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into " + << printMBBReference(*Head) << ":\n" + << *CmpBB); // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. @@ -922,7 +924,7 @@ bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" << "********** Function: " << MF.getName() << '\n'); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); @@ -934,7 +936,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { MBPI = &getAnalysis(); Traces = &getAnalysis(); MinInstr = nullptr; - MinSize = MF.getFunction()->optForMinSize(); + MinSize = MF.getFunction().optForMinSize(); bool Changed = false; CmpConv.runOnMachineFunction(MF, MBPI); diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 0298c76d68ec..8e7e740da6f6 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -198,7 +198,7 @@ void AArch64DeadRegisterDefinitions::processMachineBasicBlock( // Scan the function for instructions that have a dead definition of a // register. Replace that register with the zero register when possible. bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TRI = MF.getSubtarget().getRegisterInfo(); diff --git a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 7b4ab7cc1a3e..d1ddb2e3ef70 100644 --- a/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -798,7 +798,7 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { if (ST.getProcFamily() != AArch64Subtarget::Falkor) return false; - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; TII = static_cast(ST.getInstrInfo()); diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index fd1699fd363d..59168aea91e3 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3476,7 +3476,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -3492,7 +3492,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // address spaces. return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::sin: case Intrinsic::cos: @@ -5135,11 +5135,12 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { return selectAtomicCmpXchg(cast(I)); } - // fall-back to target-independent instruction selection. - return selectOperator(I, I->getOpcode()); // Silence warnings. (void)&CC_AArch64_DarwinPCS_VarArg; (void)&CC_AArch64_Win64_VarArg; + + // fall-back to target-independent instruction selection. + return selectOperator(I, I->getOpcode()); } namespace llvm { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 257e6f6e946e..ea4bfe7e8d90 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -97,6 +97,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -141,6 +142,12 @@ static cl::opt EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); +/// This is the biggest offset to the stack pointer we can encode in aarch64 +/// instructions (without using a separate calculation and a temp register). +/// Note that the exception here are vector stores/loads which cannot encode any +/// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). +static const unsigned DefaultSafeSPDisplacement = 255; + /// Look at each instruction that references stack frames and return the stack /// size limit beyond which some of these instructions will require a scratch /// register during their expansion later. @@ -166,7 +173,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { } } } - return 255; + return DefaultSafeSPDisplacement; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -174,7 +181,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { return false; // Don't use the red zone if the function explicitly asks us not to. // This is typically used for kernel code. - if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) + if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) return false; const MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -190,11 +197,25 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // Retain behavior of always omitting the FP for leaf functions when possible. - return (MFI.hasCalls() && - MF.getTarget().Options.DisableFramePointerElim(MF)) || - MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || - MFI.hasStackMap() || MFI.hasPatchPoint() || - RegInfo->needsStackRealignment(MF); + if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) + return true; + if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || + MFI.hasStackMap() || MFI.hasPatchPoint() || + RegInfo->needsStackRealignment(MF)) + return true; + // With large callframes around we may need to use FP to access the scavenging + // emergency spillslot. + // + // Unfortunately some calls to hasFP() like machine verifier -> + // getReservedReg() -> hasFP in the middle of global isel are too early + // to know the max call frame size. Hopefully conservatively returning "true" + // in those cases is fine. + // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. + if (!MFI.isMaxCallFrameSizeComputed() || + MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) + return true; + + return false; } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -335,6 +356,22 @@ bool AArch64FrameLowering::canUseAsPrologue( return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; } +static bool windowsRequiresStackProbe(MachineFunction &MF, + unsigned StackSizeInBytes) { + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + if (!Subtarget.isTargetWindows()) + return false; + const Function &F = MF.getFunction(); + // TODO: When implementing stack protectors, take that into account + // for the probe threshold. + unsigned StackProbeSize = 4096; + if (F.hasFnAttribute("stack-probe-size")) + F.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackSizeInBytes >= StackProbeSize; +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, unsigned StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); @@ -347,7 +384,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // 512 is the maximum immediate for stp/ldp that will be used for // callee-save save/restores - if (StackBumpBytes >= 512) + if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) return false; if (MFI.hasVarSizedObjects()) @@ -459,13 +496,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); + const Function &F = MF.getFunction(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo(); - bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); + bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); bool HasFP = hasFP(MF); // Debug location must be unknown since the first debug location is used @@ -474,11 +511,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; int NumBytes = (int)MFI.getStackSize(); - if (!AFI->hasStackFrame()) { + if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); // All of the stack allocation is for locals. @@ -507,7 +544,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, } bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -550,6 +587,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineInstr::FrameSetup); } + if (windowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 4; + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + + switch (MF.getTarget().getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) + .addExternalSymbol("__chkstk") + .addReg(AArch64::X15, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + case CodeModel::Large: + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) + .addReg(AArch64::X16, RegState::Define) + .addExternalSymbol("__chkstk") + .addExternalSymbol("__chkstk") + .setMIFlags(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR)) + .addReg(AArch64::X16, RegState::Kill) + .addReg(AArch64::X15, RegState::Implicit | RegState::Define) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) + .addReg(AArch64::SP, RegState::Kill) + .addReg(AArch64::X15, RegState::Kill) + .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) + .setMIFlags(MachineInstr::FrameSetup); + NumBytes = 0; + } + // Allocate space for the rest of the frame. if (NumBytes) { const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); @@ -716,7 +791,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; // Initial and residual are named for consistency with the prologue. Note that @@ -765,7 +840,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // it as the 2nd argument of AArch64ISD::TC_RETURN. bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -857,7 +932,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, const AArch64FunctionInfo *AFI = MF.getInfo(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); bool IsWin64 = - Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); @@ -928,7 +1003,7 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { static bool produceCompactUnwindFrame(MachineFunction &MF) { const AArch64Subtarget &Subtarget = MF.getSubtarget(); - AttributeList Attrs = MF.getFunction()->getAttributes(); + AttributeList Attrs = MF.getFunction().getAttributes(); return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)); @@ -959,7 +1034,7 @@ static void computeCalleeSaveRegisterPairs( AArch64FunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - CallingConv::ID CC = MF.getFunction()->getCallingConv(); + CallingConv::ID CC = MF.getFunction().getCallingConv(); unsigned Count = CSI.size(); (void)CC; // MachO's compact unwind format relies on all registers being stored in @@ -1060,9 +1135,9 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; else StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); + DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); if (RPI.isPaired()) - dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; @@ -1123,9 +1198,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; else LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); + DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); if (RPI.isPaired()) - dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ", " << printReg(Reg2, TRI); dbgs() << ") -> fi#(" << RPI.FrameIdx; if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx+1; @@ -1154,7 +1229,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); @@ -1164,18 +1239,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned UnspilledCSGPR = AArch64::NoRegister; unsigned UnspilledCSGPRPaired = AArch64::NoRegister; + MachineFrameInfo &MFI = MF.getFrameInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + + unsigned BasePointerReg = RegInfo->hasBasePointer(MF) + ? RegInfo->getBaseRegister() + : (unsigned)AArch64::NoRegister; + + unsigned SpillEstimate = SavedRegs.count(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + unsigned PairedReg = CSRegs[i ^ 1]; + if (Reg == BasePointerReg) + SpillEstimate++; + if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) + SpillEstimate++; + } + SpillEstimate += 2; // Conservatively include FP+LR in the estimate + unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; + // The frame record needs to be created by saving the appropriate registers - if (hasFP(MF)) { + if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { SavedRegs.set(AArch64::FP); SavedRegs.set(AArch64::LR); } - unsigned BasePointerReg = AArch64::NoRegister; - if (RegInfo->hasBasePointer(MF)) - BasePointerReg = RegInfo->getBaseRegister(); - unsigned ExtraCSSpill = 0; - const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { const unsigned Reg = CSRegs[i]; @@ -1217,7 +1306,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. - MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); @@ -1234,7 +1322,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (BigStack) { if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) - << " to get a scratch register.\n"); + << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); // MachO's compact unwind format relies on all registers being stored in // pairs, so if we need to spill one extra for BigStack, then we need to diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 06005f6b6886..0b10246b0cc8 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -53,7 +53,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { } bool runOnMachineFunction(MachineFunction &MF) override { - ForCodeSize = MF.getFunction()->optForSize(); + ForCodeSize = MF.getFunction().optForSize(); Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 589abaa5f7c8..2b613e14050f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -470,10 +470,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, if (Subtarget->hasPerfMon()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - if (Subtarget->isTargetMachO()) { - // For iOS, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret to avoid memory - // traffic. + if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && + getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { + // Issue __sincos_stret if available. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } else { @@ -633,16 +632,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // AArch64 doesn't have a direct vector ->f32 conversion instructions for // elements smaller than i32, so promote the input to i32 first. - setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32); // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16 // -> v8f16 conversions. - setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32); // Similarly, there is no direct i32 -> f64 vector conversion instruction. setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); @@ -2328,8 +2327,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, Entry.IsZExt = false; Args.push_back(Entry); - const char *LibcallName = - (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64 + : RTLIB::SINCOS_STRET_F32; + const char *LibcallName = getLibcallName(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); @@ -2731,7 +2731,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2745,7 +2745,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // we use a special version of AnalyzeFormalArguments to pass in ValVT and // LocVT. unsigned NumArgs = Ins.size(); - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned i = 0; i != NumArgs; ++i) { MVT ValVT = Ins[i].VT; @@ -2935,7 +2935,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); SmallVector MemOps; @@ -3087,15 +3087,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( return false; MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; // Byval parameters hand the function a pointer directly into the stack area // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. - for (Function::const_arg_iterator i = CallerF->arg_begin(), - e = CallerF->arg_end(); + for (Function::const_arg_iterator i = CallerF.arg_begin(), + e = CallerF.arg_end(); i != e; ++i) if (i->hasByValAttr()) return false; @@ -3343,9 +3343,15 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); } + // We can omit callseq_start/callseq_end if there is no callframe to setup. + // Do not omit for patchpoints as SelectionDAGBuilder::visitPatchpoint() + // currently expects it. + bool OmitCallSeq = NumBytes == 0 && !CLI.IsPatchPoint; + assert((!IsSibCall || OmitCallSeq) && "Should not get callseq for sibcalls"); + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - if (!IsSibCall) + if (!OmitCallSeq) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, @@ -3511,7 +3517,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be // in the correct location. - if (IsTailCall && !IsSibCall) { + if (IsTailCall && !OmitCallSeq) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); @@ -3569,9 +3575,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0; - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), - DAG.getIntPtrConstant(CalleePopBytes, DL, true), - InFlag, DL); + if (!OmitCallSeq) + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), + DAG.getIntPtrConstant(CalleePopBytes, DL, true), + InFlag, DL); + if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -3885,9 +3893,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(GA, DAG); - if (!EnableAArch64ELFLocalDynamicTLSGeneration) { if (Model == TLSModel::LocalDynamic) Model = TLSModel::GeneralDynamic; @@ -3973,6 +3978,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + const GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (Subtarget->isTargetDarwin()) return LowerDarwinGlobalTLSAddress(Op, DAG); if (Subtarget->isTargetELF()) @@ -4005,9 +4014,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. - if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) { - assert((CC == ISD::SETEQ || CC == ISD::SETNE) && - "Unexpected condition code."); + if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) return SDValue(); @@ -4185,7 +4193,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, } SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - if (DAG.getMachineFunction().getFunction()->hasFnAttribute( + if (DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat)) return SDValue(); @@ -4668,7 +4676,7 @@ SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv())) return LowerWin64_VASTART(Op, DAG); else if (Subtarget->isTargetDarwin()) return LowerDarwin_VASTART(Op, DAG); @@ -7290,8 +7298,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType()); } - if (LHS.getValueType().getVectorElementType() == MVT::f16) - return SDValue(); + const bool FullFP16 = + static_cast(DAG.getSubtarget()).hasFullFP16(); + + // Make v4f16 (only) fcmp operations utilise vector instructions + // v8f16 support will be a litle more complicated + if (LHS.getValueType().getVectorElementType() == MVT::f16) { + if (!FullFP16 && LHS.getValueType().getVectorNumElements() == 4) { + LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS); + RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS); + SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC); + DAG.ReplaceAllUsesWith(Op, NewSetcc); + CmpVT = MVT::v4i32; + } else + return SDValue(); + } assert(LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64); @@ -7371,6 +7392,7 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, /// specified in the intrinsic calls. bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { auto &DL = I.getModule()->getDataLayout(); switch (Intrinsic) { @@ -7393,9 +7415,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; Info.align = 0; - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::aarch64_neon_st2: @@ -7420,9 +7441,8 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; Info.align = 0; - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::aarch64_ldaxr: @@ -7433,9 +7453,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::aarch64_stlxr: @@ -7446,9 +7464,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } case Intrinsic::aarch64_ldaxp: @@ -7458,9 +7474,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 16; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; case Intrinsic::aarch64_stlxp: case Intrinsic::aarch64_stxp: @@ -7469,9 +7483,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 16; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; default: break; @@ -7918,9 +7930,9 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, // instruction to materialize the v2i64 zero and one store (with restrictive // addressing mode). Just do two i64 store of zero-registers. bool Fast; - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 && - !F->hasFnAttribute(Attribute::NoImplicitFloat) && + !F.hasFnAttribute(Attribute::NoImplicitFloat) && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; @@ -8165,7 +8177,7 @@ SDValue AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const { - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -8840,7 +8852,7 @@ static SDValue performConcatVectorsCombine(SDNode *N, static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have + // Wait until after everything is legalized to try this. That way we have // legal vector types and such. if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -9586,7 +9598,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); // Don't split at -Oz. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); // Don't split v2i64 vectors. Memcpy lowering produces those and splitting @@ -10948,7 +10960,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); @@ -10983,3 +10995,8 @@ AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; } + +void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const { + MF.getFrameInfo().computeMaxCallFrameSize(MF); + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 2af40edd8621..6018bc501dba 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -306,6 +306,7 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *MBB) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; bool isTruncateFree(Type *Ty1, Type *Ty2) const override; @@ -414,7 +415,7 @@ class AArch64TargetLowering : public TargetLowering { // Do not merge to float value size (128 bytes) if no implicit // float attribute is set. - bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); if (NoFloat) @@ -443,8 +444,8 @@ class AArch64TargetLowering : public TargetLowering { } bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( @@ -544,6 +545,7 @@ class AArch64TargetLowering : public TargetLowering { SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; + SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; @@ -646,6 +648,8 @@ class AArch64TargetLowering : public TargetLowering { SelectionDAG &DAG) const override; bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; + + void finalizeLowering(MachineFunction &MF) const override; }; namespace AArch64 { diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index 4c61c3510ba5..153bcf75cbcd 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -30,18 +30,18 @@ def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; // A atomic load operation that actually needs acquire semantics. class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return isAcquireOrStronger(Ordering); -}]>; + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 1; +} // An atomic load operation that does not need either acquire or release // semantics. class relaxed_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return !isAcquireOrStronger(Ordering); -}]>; + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 0; +} // 8-bit loads def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; @@ -113,19 +113,17 @@ def : Pat<(relaxed_load // A store operation that actually needs release semantics. class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - assert(Ordering != AtomicOrdering::AcquireRelease && - "unexpected store ordering"); - return isReleaseOrStronger(Ordering); -}]>; + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 1; +} // An atomic store operation that doesn't actually need to be atomic on AArch64. class relaxed_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return !isReleaseOrStronger(Ordering); -}]>; + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 0; +} // 8-bit stores def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 80c5092a4eed..0efbc3a6719f 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -193,6 +193,7 @@ def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } def SImm10s8Operand : AsmOperandClass { let Name = "SImm10s8"; let DiagnosticType = "InvalidMemoryIndexedSImm10"; + let PredicateMethod = "isSImmScaled<10, 8>"; } //===----------------------------------------------------------------------===// @@ -221,19 +222,29 @@ def adrlabel : Operand { let ParserMatchClass = AdrOperand; } +class SImmOperand : AsmOperandClass { + let Name = "SImm" # width; + let DiagnosticType = "InvalidMemoryIndexedSImm" # width; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isSImm<" # width # ">"; +} + def simm10Scaled : Operand { let ParserMatchClass = SImm10s8Operand; let DecoderMethod = "DecodeSImm<10>"; let PrintMethod = "printImmScale<8>"; } -// simm9 predicate - True if the immediate is in the range [-256, 255]. -def SImm9Operand : AsmOperandClass { - let Name = "SImm9"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} +def SImm9Operand : SImmOperand<9>; def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { let ParserMatchClass = SImm9Operand; + let DecoderMethod = "DecodeSImm<9>"; +} + +def SImm6Operand : SImmOperand<6>; +def simm6_32b : Operand, ImmLeaf= -32 && Imm < 32; }]> { + let ParserMatchClass = SImm6Operand; + let DecoderMethod = "DecodeSImm<6>"; } // simm7sN predicate - True if the immediate is a multiple of N in the range @@ -241,6 +252,7 @@ def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { class SImm7Scaled : AsmOperandClass { let Name = "SImm7s" # Scale; let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm7"; + let PredicateMethod = "isSImmScaled<7, " # Scale # ">"; } def SImm7s4Operand : SImm7Scaled<4>; @@ -3376,7 +3388,7 @@ class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, (outs GPR64sp:$wback, regtype:$Rt), (ins GPR64sp:$Rn, simm9:$offset), asm, "$Rn = $wback,@earlyclobber $wback", []>, - Sched<[WriteLD, WriteI]>; + Sched<[WriteLD, WriteAdr]>; let mayStore = 1, mayLoad = 0 in class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, @@ -3387,7 +3399,7 @@ class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, asm, "$Rn = $wback,@earlyclobber $wback", [(set GPR64sp:$wback, (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; + Sched<[WriteAdr, WriteST]>; } // hasSideEffects = 0 diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index c4aa6bf139d1..62414ea6f8ba 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -2801,14 +2802,14 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( LiveIntervals *LIS) const { // This is a bit of a hack. Consider this instruction: // - // %vreg0 = COPY %SP; GPR64all:%vreg0 + // %0 = COPY %sp; GPR64all:%0 // // We explicitly chose GPR64all for the virtual register so such a copy might // be eliminated by RegisterCoalescer. However, that may not be possible, and - // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all + // %0 may even spill. We can't spill %sp, and since it is in the GPR64all // register class, TargetInstrInfo::foldMemoryOperand() is going to try. // - // To prevent that, we are going to constrain the %vreg0 register class here. + // To prevent that, we are going to constrain the %0 register class here. // // // @@ -2830,26 +2831,26 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // Handle the case where a copy is being spilled or filled but the source // and destination register class don't match. For example: // - // %vreg0 = COPY %XZR; GPR64common:%vreg0 + // %0 = COPY %xzr; GPR64common:%0 // // In this case we can still safely fold away the COPY and generate the // following spill code: // - // STRXui %XZR, + // STRXui %xzr, %stack.0 // // This also eliminates spilled cross register class COPYs (e.g. between x and // d regs) of the same size. For example: // - // %vreg0 = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1 + // %0 = COPY %1; GPR64:%0, FPR64:%1 // // will be filled as // - // LDRDui %vreg0, fi<#0> + // LDRDui %0, fi<#0> // // instead of // - // LDRXui %vregTemp, fi<#0> - // %vreg0 = FMOV %vregTemp + // LDRXui %Temp, fi<#0> + // %0 = FMOV %Temp // if (MI.isCopy() && Ops.size() == 1 && // Make sure we're only folding the explicit COPY defs/uses. @@ -2886,12 +2887,12 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // Handle cases like spilling def of: // - // %vreg0:sub_32 = COPY %WZR; GPR64common:%vreg0 + // %0:sub_32 = COPY %wzr; GPR64common:%0 // // where the physical register source can be widened and stored to the full // virtual reg destination stack slot, in this case producing: // - // STRXui %XZR, + // STRXui %xzr, %stack.0 // if (IsSpill && DstMO.isUndef() && TargetRegisterInfo::isPhysicalRegister(SrcReg)) { @@ -2934,12 +2935,12 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // Handle cases like filling use of: // - // %vreg0:sub_32 = COPY %vreg1; GPR64:%vreg0, GPR32:%vreg1 + // %0:sub_32 = COPY %1; GPR64:%0, GPR32:%1 // // where we can load the full virtual reg source stack slot, into the subreg // destination, in this case producing: // - // LDRWui %vreg0:sub_32, + // LDRWui %0:sub_32, %stack.0 // if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { const TargetRegisterClass *FillRC; @@ -3681,6 +3682,15 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv2i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1); + Found = true; + } else if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv2f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1); + Found = true; + } break; case AArch64::FSUBv2f64: if (canCombineWithFMUL(MBB, Root.getOperand(2), @@ -3692,6 +3702,15 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv2i64_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1); + Found = true; + } else if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv2f64)) { + Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1); + Found = true; + } break; case AArch64::FSUBv4f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), @@ -3703,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4i32_indexed)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1); + Found = true; + } else if (canCombineWithFMUL(MBB, Root.getOperand(1), + AArch64::FMULv4f32)) { + Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1); + Found = true; + } break; } return Found; @@ -3790,12 +3818,15 @@ enum class FMAInstKind { Default, Indexed, Accumulator }; /// \param MaddOpc the opcode fo the f|madd instruction /// \param RC Register class of operands /// \param kind of fma instruction (addressing mode) to be generated +/// \param ReplacedAddend is the result register from the instruction +/// replacing the non-combined operand, if any. static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, - FMAInstKind kind = FMAInstKind::Default) { + FMAInstKind kind = FMAInstKind::Default, + const unsigned *ReplacedAddend = nullptr) { assert(IdxMulOpd == 1 || IdxMulOpd == 2); unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1; @@ -3805,8 +3836,17 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, bool Src0IsKill = MUL->getOperand(1).isKill(); unsigned SrcReg1 = MUL->getOperand(2).getReg(); bool Src1IsKill = MUL->getOperand(2).isKill(); - unsigned SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); - bool Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); + + unsigned SrcReg2; + bool Src2IsKill; + if (ReplacedAddend) { + // If we just generated a new addend, we must be it's only use. + SrcReg2 = *ReplacedAddend; + Src2IsKill = true; + } else { + SrcReg2 = Root.getOperand(IdxOtherOpd).getReg(); + Src2IsKill = Root.getOperand(IdxOtherOpd).isKill(); + } if (TargetRegisterInfo::isVirtualRegister(ResultReg)) MRI.constrainRegClass(ResultReg, RC); @@ -4326,6 +4366,66 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; + case MachineCombinerPattern::FMLSv2f32_OP1: + case MachineCombinerPattern::FMLSv2i32_indexed_OP1: { + RC = &AArch64::FPR64RegClass; + unsigned NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) { + Opc = AArch64::FMLAv2i32_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } else { + Opc = AArch64::FMLAv2f32; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv4f32_OP1: + case MachineCombinerPattern::FMLSv4i32_indexed_OP1: { + RC = &AArch64::FPR128RegClass; + unsigned NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) { + Opc = AArch64::FMLAv4i32_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } else { + Opc = AArch64::FMLAv4f32; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } + break; + } + case MachineCombinerPattern::FMLSv2f64_OP1: + case MachineCombinerPattern::FMLSv2i64_indexed_OP1: { + RC = &AArch64::FPR128RegClass; + unsigned NewVR = MRI.createVirtualRegister(RC); + MachineInstrBuilder MIB1 = + BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR) + .add(Root.getOperand(2)); + InsInstrs.push_back(MIB1); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) { + Opc = AArch64::FMLAv2i64_indexed; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Indexed, &NewVR); + } else { + Opc = AArch64::FMLAv2f64; + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC, + FMAInstKind::Accumulator, &NewVR); + } + break; + } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); @@ -4541,61 +4641,66 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { return makeArrayRef(TargetFlags); } -/// Constants defining how certain sequences should be outlined. -/// This encompasses how an outlined function should be called, and what kind of -/// frame should be emitted for that outlined function. -/// -/// \p MachineOutlinerDefault implies that the function should be called with -/// a save and restore of LR to the stack. -/// -/// That is, -/// -/// I1 Save LR OUTLINED_FUNCTION: -/// I2 --> BL OUTLINED_FUNCTION I1 -/// I3 Restore LR I2 -/// I3 -/// RET -/// -/// * Call construction overhead: 3 (save + BL + restore) -/// * Frame construction overhead: 1 (ret) -/// * Requires stack fixups? Yes -/// -/// \p MachineOutlinerTailCall implies that the function is being created from -/// a sequence of instructions ending in a return. -/// -/// That is, -/// -/// I1 OUTLINED_FUNCTION: -/// I2 --> B OUTLINED_FUNCTION I1 -/// RET I2 -/// RET -/// -/// * Call construction overhead: 1 (B) -/// * Frame construction overhead: 0 (Return included in sequence) -/// * Requires stack fixups? No -/// -/// \p MachineOutlinerNoLRSave implies that the function should be called using -/// a BL instruction, but doesn't require LR to be saved and restored. This -/// happens when LR is known to be dead. -/// -/// That is, -/// -/// I1 OUTLINED_FUNCTION: -/// I2 --> BL OUTLINED_FUNCTION I1 -/// I3 I2 -/// I3 -/// RET -/// -/// * Call construction overhead: 1 (BL) -/// * Frame construction overhead: 1 (RET) -/// * Requires stack fixups? No -/// + /// Constants defining how certain sequences should be outlined. + /// This encompasses how an outlined function should be called, and what kind of + /// frame should be emitted for that outlined function. + /// + /// \p MachineOutlinerDefault implies that the function should be called with + /// a save and restore of LR to the stack. + /// + /// That is, + /// + /// I1 Save LR OUTLINED_FUNCTION: + /// I2 --> BL OUTLINED_FUNCTION I1 + /// I3 Restore LR I2 + /// I3 + /// RET + /// + /// * Call construction overhead: 3 (save + BL + restore) + /// * Frame construction overhead: 1 (ret) + /// * Requires stack fixups? Yes + /// + /// \p MachineOutlinerTailCall implies that the function is being created from + /// a sequence of instructions ending in a return. + /// + /// That is, + /// + /// I1 OUTLINED_FUNCTION: + /// I2 --> B OUTLINED_FUNCTION I1 + /// RET I2 + /// RET + /// + /// * Call construction overhead: 1 (B) + /// * Frame construction overhead: 0 (Return included in sequence) + /// * Requires stack fixups? No + /// + /// \p MachineOutlinerNoLRSave implies that the function should be called using + /// a BL instruction, but doesn't require LR to be saved and restored. This + /// happens when LR is known to be dead. + /// + /// That is, + /// + /// I1 OUTLINED_FUNCTION: + /// I2 --> BL OUTLINED_FUNCTION I1 + /// I3 I2 + /// I3 + /// RET + /// + /// * Call construction overhead: 1 (BL) + /// * Frame construction overhead: 1 (RET) + /// * Requires stack fixups? No + /// enum MachineOutlinerClass { MachineOutlinerDefault, /// Emit a save, restore, call, and return. MachineOutlinerTailCall, /// Only emit a branch. MachineOutlinerNoLRSave /// Emit a call and return. }; +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4 +}; + bool AArch64InstrInfo::canOutlineWithoutLRSave( MachineBasicBlock::iterator &CallInsertionPt) const { // Was LR saved in the function containing this basic block? @@ -4606,9 +4711,8 @@ bool AArch64InstrInfo::canOutlineWithoutLRSave( // Get liveness information from the end of the block to the end of the // prospective outlined region. std::for_each(MBB.rbegin(), - (MachineBasicBlock::reverse_iterator)CallInsertionPt, - [&LRU](MachineInstr &MI) {LRU.stepBackward(MI);} - ); + (MachineBasicBlock::reverse_iterator)CallInsertionPt, + [&LRU](MachineInstr &MI) { LRU.stepBackward(MI); }); // If the link register is available at this point, then we can safely outline // the region without saving/restoring LR. Otherwise, we must emit a save and @@ -4648,34 +4752,70 @@ AArch64InstrInfo::getOutlininingCandidateInfo( NumInstrsToCreateFrame = 1; } + // Check if the range contains a call. These require a save + restore of the + // link register. + if (std::any_of(RepeatedSequenceLocs[0].first, RepeatedSequenceLocs[0].second, + [](const MachineInstr &MI) { return MI.isCall(); })) + NumInstrsToCreateFrame += 2; // Save + restore the link register. + + // Handle the last instruction separately. If this is a tail call, then the + // last instruction is a call. We don't want to save + restore in this case. + // However, it could be possible that the last instruction is a call without + // it being valid to tail call this sequence. We should consider this as well. + else if (RepeatedSequenceLocs[0].second->isCall() && + FrameID != MachineOutlinerTailCall) + NumInstrsToCreateFrame += 2; + return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID, FrameID); } -bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, - bool OutlineFromLinkOnceODRs) const { - const Function *F = MF.getFunction(); +bool AArch64InstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); // If F uses a redzone, then don't outline from it because it might mess up // the stack. - if (!F->hasFnAttribute(Attribute::NoRedZone)) - return false; - - // If anyone is using the address of this function, don't outline from it. - if (F->hasAddressTaken()) + if (!F.hasFnAttribute(Attribute::NoRedZone)) return false; // Can F be deduplicated by the linker? If it can, don't outline from it. - if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage()) + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) return false; - + return true; } -AArch64GenInstrInfo::MachineOutlinerInstrType -AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { +unsigned +AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const { + unsigned Flags = 0x0; + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (std::any_of(MBB.begin(), MBB.end(), + [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + LiveRegUnits LRU(getRegisterInfo()); + LRU.addLiveOuts(MBB); - MachineFunction *MF = MI.getParent()->getParent(); + std::for_each(MBB.rbegin(), + MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + if (!LRU.available(AArch64::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return Flags; +} + +AArch64GenInstrInfo::MachineOutlinerInstrType +AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); AArch64FunctionInfo *FuncInfo = MF->getInfo(); // Don't outline LOHs. @@ -4683,20 +4823,90 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { return MachineOutlinerInstrType::Illegal; // Don't allow debug values to impact outlining type. - if (MI.isDebugValue() || MI.isIndirectDebugValue()) + if (MI.isDebugValue() || MI.isIndirectDebugValue()) return MachineOutlinerInstrType::Invisible; - + // Is this a terminator for a basic block? if (MI.isTerminator()) { // Is this the end of a function? if (MI.getParent()->succ_empty()) return MachineOutlinerInstrType::Legal; - + // It's not, so don't outline it. return MachineOutlinerInstrType::Illegal; } + // Special cases for instructions that can always be outlined, but will fail + // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always + // be outlined because they don't require a *specific* value to be in LR. + if (MI.getOpcode() == AArch64::ADRP) + return MachineOutlinerInstrType::Legal; + + // Outline calls without stack parameters or aggregate parameters. + if (MI.isCall()) { + const Module *M = MF->getFunction().getParent(); + assert(M && "No module?"); + + // Get the function associated with the call. Look at each operand and find + // the one that represents the callee and get its name. + Function *Callee = nullptr; + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isSymbol()) { + Callee = M->getFunction(MOP.getSymbolName()); + break; + } + + else if (MOP.isGlobal()) { + Callee = M->getFunction(MOP.getGlobal()->getGlobalIdentifier()); + break; + } + } + + // Only handle functions that we have information about. + if (!Callee) + return MachineOutlinerInstrType::Illegal; + + // We have a function we have information about. Check it if it's something + // can safely outline. + + // If the callee is vararg, it passes parameters on the stack. Don't touch + // it. + // FIXME: Functions like printf are very common and we should be able to + // outline them. + if (Callee->isVarArg()) + return MachineOutlinerInstrType::Illegal; + + // Check if any of the arguments are a pointer to a struct. We don't want + // to outline these since they might be loaded in two instructions. + for (Argument &Arg : Callee->args()) { + if (Arg.getType()->isPointerTy() && + Arg.getType()->getPointerElementType()->isAggregateType()) + return MachineOutlinerInstrType::Illegal; + } + + // If the thing we're calling doesn't access memory at all, then we're good + // to go. + if (Callee->doesNotAccessMemory()) + return MachineOutlinerInstrType::Legal; + + + // It accesses memory. Get the machine function for the callee to see if + // it's safe to outline. + MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee); + + // We don't know what's going on with the callee at all. Don't touch it. + if (!CalleeMF) + return MachineOutlinerInstrType::Illegal; + + // Does it pass anything on the stack? If it does, don't outline it. + if (CalleeMF->getInfo()->getBytesInStackArgArea() != 0) + return MachineOutlinerInstrType::Illegal; + + // It doesn't, so it's safe to outline and we're done. + return MachineOutlinerInstrType::Legal; + } + // Don't outline positions. if (MI.isPosition()) return MachineOutlinerInstrType::Illegal; @@ -4720,8 +4930,52 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Does this use the stack? if (MI.modifiesRegister(AArch64::SP, &RI) || MI.readsRegister(AArch64::SP, &RI)) { + // True if there is no chance that any outlined candidate from this range + // could require stack fixups. That is, both + // * LR is available in the range (No save/restore around call) + // * The range doesn't include calls (No save/restore in outlined frame) + // are true. + bool MightNeedStackFixUp = + (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | + MachineOutlinerMBBFlags::HasCalls)); + + // If this instruction is in a range where it *never* needs to be fixed + // up, then we can *always* outline it. This is true even if it's not + // possible to fix that instruction up. + // + // Why? Consider two equivalent instructions I1, I2 where both I1 and I2 + // use SP. Suppose that I1 sits within a range that definitely doesn't + // need stack fixups, while I2 sits in a range that does. + // + // First, I1 can be outlined as long as we *never* fix up the stack in + // any sequence containing it. I1 is already a safe instruction in the + // original program, so as long as we don't modify it we're good to go. + // So this leaves us with showing that outlining I2 won't break our + // program. + // + // Suppose I1 and I2 belong to equivalent candidate sequences. When we + // look at I2, we need to see if it can be fixed up. Suppose I2, (and + // thus I1) cannot be fixed up. Then I2 will be assigned an unique + // integer label; thus, I2 cannot belong to any candidate sequence (a + // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up + // as well, so we're good. Thus, I1 is always safe to outline. + // + // This gives us two things: first off, it buys us some more instructions + // for our search space by deeming stack instructions illegal only when + // they can't be fixed up AND we might have to fix them up. Second off, + // This allows us to catch tricky instructions like, say, + // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might + // be paired with later SUBXris, which might *not* end up being outlined. + // If we mess with the stack to save something, then an ADDXri messes with + // it *after*, then we aren't going to restore the right something from + // the stack if we don't outline the corresponding SUBXri first. ADDXris and + // SUBXris are extremely common in prologue/epilogue code, so supporting + // them in the outliner can be a pretty big win! + if (!MightNeedStackFixUp) + return MachineOutlinerInstrType::Legal; - // Is it a memory operation? + // At this point, we have a stack instruction that we might need to fix + // up. We'll handle it if it's a load or store. if (MI.mayLoadOrStore()) { unsigned Base; // Filled with the base regiser of MI. int64_t Offset; // Filled with the offset of MI. @@ -4734,15 +4988,15 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { // Find the minimum/maximum offset for this instruction and check if // fixing it up would be in range. - int64_t MinOffset, MaxOffset; - unsigned DummyScale; - getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset, - MaxOffset); + int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction. + unsigned Scale; // The scale to multiply the offsets by. + getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset); // TODO: We should really test what happens if an instruction overflows. // This is tricky to test with IR tests, but when the outliner is moved // to a MIR test, it really ought to be checked. - if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset) + Offset += 16; // Update the offset to what it would be if we outlined. + if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale) return MachineOutlinerInstrType::Illegal; // It's in range, so we can outline it. @@ -4788,6 +5042,39 @@ void AArch64InstrInfo::insertOutlinerEpilogue( MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const { + // Is there a call in the outlined range? + if (std::any_of(MBB.instr_begin(), MBB.instr_end(), + [](MachineInstr &MI) { return MI.isCall(); })) { + // Fix up the instructions in the range, since we're going to modify the + // stack. + fixupPostOutline(MBB); + + // LR has to be a live in so that we can save it. + MBB.addLiveIn(AArch64::LR); + + MachineBasicBlock::iterator It = MBB.begin(); + MachineBasicBlock::iterator Et = MBB.end(); + + if (MInfo.FrameConstructionID == MachineOutlinerTailCall) + Et = std::prev(MBB.end()); + + // Insert a save before the outlined region + MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + It = MBB.insert(It, STRXpre); + + // Insert a restore before the terminator for the function. + MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); + Et = MBB.insert(Et, LDRXpost); + } + // If this is a tail call outlined function, then there's already a return. if (MInfo.FrameConstructionID == MachineOutlinerTailCall) return; diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 2f10bef1e474..889e5f6d5f61 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -359,7 +359,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { std::pair> &RepeatedSequenceLocs) const override; AArch64GenInstrInfo::MachineOutlinerInstrType - getOutliningType(MachineInstr &MI) const override; + getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; + unsigned getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const override; void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 841265c33672..c09757956041 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -328,10 +328,10 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">; - def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">; + def ForCodeSize : Predicate<"MF->getFunction().optForSize()">; + def NotForCodeSize : Predicate<"!MF->getFunction().optForSize()">; // Avoid generating STRQro if it is slow, unless we're optimizing for code size. - def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction()->optForSize()">; + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().optForSize()">; } include "AArch64InstrFormats.td" @@ -678,6 +678,9 @@ def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); }]>; +def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, + GISDNodeXFormEquiv; + def : Pat<(i64 i64imm_32bit:$src), (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; @@ -4592,10 +4595,8 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; - // EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; @@ -4617,6 +4618,7 @@ def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { // EDIT per word: 2s & 4s with MSL shifter def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", [(set (v2i32 V64:$Rd), @@ -4629,13 +4631,31 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, "movi", ".8b", [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; + def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, "movi", ".16b", [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; +} + +// Use the more efficient MOVI instead of DUP from ZR to zero up vectors +def : Pat<(v2f32 (AArch64dup (f32 fpimm0))), (MOVIv2i32 (i32 0), (i32 0))>; + +def : Pat<(v2i32 (AArch64dup (i32 0))), (MOVIv2i32 (i32 0), (i32 0))>; +def : Pat<(v4i16 (AArch64dup (i32 0))), (MOVIv4i16 (i32 0), (i32 0))>; +def : Pat<(v8i8 (AArch64dup (i32 0))), (MOVIv8b_ns (i32 0))>; + +def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv4i32 (i32 0), (i32 0))>; + +def : Pat<(v2i64 (AArch64dup (i64 0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4i32 (AArch64dup (i32 0))), (MOVIv4i32 (i32 0), (i32 0))>; +def : Pat<(v8i16 (AArch64dup (i32 0))), (MOVIv8i16 (i32 0), (i32 0))>; +def : Pat<(v16i8 (AArch64dup (i32 0))), (MOVIv16b_ns (i32 0))>; // AdvSIMD MVNI // EDIT per word & halfword: 2s, 4h, 4s, & 8h +let isReMaterializable = 1, isAsCheapAsAMove = 1 in defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; @@ -4658,12 +4678,14 @@ def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; // EDIT per word: 2s & 4s with MSL shifter +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", [(set (v2i32 V64:$Rd), (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", [(set (v4i32 V128:$Rd), (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +} //---------------------------------------------------------------------------- // AdvSIMD indexed element @@ -5785,7 +5807,7 @@ def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), - (v2i32 (REV64v4i16 FPR64:$src))>; + (v2i32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; @@ -5794,7 +5816,6 @@ def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; } @@ -5807,18 +5828,16 @@ def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), - (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 (REV64v4i16 FPR64:$src))>; } +def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; -def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; @@ -5828,20 +5847,17 @@ let Predicates = [IsBE] in { def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; -def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; + (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 (REV16v8i8 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), - (v4f16 (REV64v4i16 FPR64:$src))>; + (v4f16 (REV32v4i16 FPR64:$src))>; def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 (REV64v4i16 FPR64:$src))>; } - - +def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; let Predicates = [IsLE] in { def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; @@ -5933,7 +5949,7 @@ def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 (REV64v2i32 FPR64:$src))>; def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), - (v2f32 (REV64v4i16 FPR64:$src))>; + (v2f32 (REV32v4i16 FPR64:$src))>; } def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; @@ -6076,7 +6092,6 @@ def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; } let Predicates = [IsBE] in { def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), @@ -6093,15 +6108,13 @@ def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), - (v8i16 (REV32v8i16 FPR128:$src))>; } +def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; -def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; @@ -6115,8 +6128,6 @@ def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; -def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), - (v8f16 (REV64v8i16 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 (REV16v16i8 FPR128:$src))>; def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), @@ -6124,6 +6135,7 @@ def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 (REV32v8i16 FPR128:$src))>; } +def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; let Predicates = [IsLE] in { def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index c2d3ae31c624..392ba13d74a8 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -92,6 +92,8 @@ class AArch64InstructionSelector : public InstructionSelector { return selectAddrModeIndexed(Root, Width / 8); } + void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -568,11 +570,11 @@ bool AArch64InstructionSelector::selectCompareBranch( else return false; - auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) - .addUse(LHS) - .addMBB(DestMBB); + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) + .addUse(LHS) + .addMBB(DestMBB) + .constrainAllUses(TII, TRI, RBI); - constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); I.eraseFromParent(); return true; } @@ -868,6 +870,40 @@ bool AArch64InstructionSelector::select(MachineInstr &I, if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); + } else if (TM.getCodeModel() == CodeModel::Large) { + // Materialize the global using movz/movk instructions. + unsigned MovZDstReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto InsertPt = std::next(I.getIterator()); + auto MovZ = + BuildMI(MBB, InsertPt, I.getDebugLoc(), TII.get(AArch64::MOVZXi)) + .addDef(MovZDstReg); + MovZ->addOperand(MF, I.getOperand(1)); + MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | + AArch64II::MO_NC); + MovZ->addOperand(MF, MachineOperand::CreateImm(0)); + constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); + + auto BuildMovK = [&](unsigned SrcReg, unsigned char Flags, + unsigned Offset, unsigned ForceDstReg) { + unsigned DstReg = + ForceDstReg ? ForceDstReg + : MRI.createVirtualRegister(&AArch64::GPR64RegClass); + auto MovI = BuildMI(MBB, InsertPt, MovZ->getDebugLoc(), + TII.get(AArch64::MOVKXi)) + .addDef(DstReg) + .addReg(SrcReg); + MovI->addOperand(MF, MachineOperand::CreateGA( + GV, MovZ->getOperand(1).getOffset(), Flags)); + MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); + constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); + return DstReg; + }; + unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), + AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); + DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); + BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); + I.eraseFromParent(); + return true; } else { I.setDesc(TII.get(AArch64::MOVaddr)); I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); @@ -1522,6 +1558,15 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, }}; } +void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + Optional CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); + assert(CstVal && "Expected constant value"); + MIB.addImm(CstVal.getValue()); +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index f7027394f803..05df51202229 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "AArch64Subtarget.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -127,7 +128,7 @@ widen_1_8_16_32(const LegalizerInfo::SizeAndActionsVec &v) { return result; } -AArch64LegalizerInfo::AArch64LegalizerInfo() { +AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { using namespace TargetOpcode; const LLT p0 = LLT::pointer(0, 64); const LLT s1 = LLT::scalar(1); @@ -349,6 +350,41 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {s8, s16, s32, s64, p0}) setAction({G_VAARG, Ty}, Custom); + if (ST.hasLSE()) { + for (auto Ty : {s8, s16, s32, s64}) { + setAction({G_ATOMIC_CMPXCHG_WITH_SUCCESS, Ty}, Lower); + setAction({G_ATOMIC_CMPXCHG, Ty}, Legal); + } + setAction({G_ATOMIC_CMPXCHG, 1, p0}, Legal); + + for (unsigned Op : + {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, + G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, + G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) { + for (auto Ty : {s8, s16, s32, s64}) { + setAction({Op, Ty}, Legal); + } + setAction({Op, 1, p0}, Legal); + } + } + + // Merge/Unmerge + for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) + for (int Sz : {8, 16, 32, 64, 128, 192, 256, 384, 512}) { + LLT ScalarTy = LLT::scalar(Sz); + setAction({Op, ScalarTy}, Legal); + setAction({Op, 1, ScalarTy}, Legal); + if (Sz < 32) + continue; + for (int EltSize = 8; EltSize <= 64; EltSize *= 2) { + if (EltSize >= Sz) + continue; + LLT VecTy = LLT::vector(Sz / EltSize, EltSize); + setAction({Op, VecTy}, Legal); + setAction({Op, 1, VecTy}, Legal); + } + } + computeTables(); } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h index 42d4ac130c5c..a745b0edbc6d 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.h +++ b/lib/Target/AArch64/AArch64LegalizerInfo.h @@ -20,11 +20,12 @@ namespace llvm { class LLVMContext; +class AArch64Subtarget; /// This class provides the information for the target register banks. class AArch64LegalizerInfo : public LegalizerInfo { public: - AArch64LegalizerInfo(); + AArch64LegalizerInfo(const AArch64Subtarget &ST); bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index c32b0dbca9b2..8a29456430b9 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -830,8 +830,8 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, if (SExtIdx != -1) { // Generate the sign extension for the proper result of the ldp. // I.e., with X1, that would be: - // %W1 = KILL %W1, %X1 - // %X1 = SBFMXri %X1, 0, 31 + // %w1 = KILL %w1, implicit-def %x1 + // %x1 = SBFMXri killed %x1, 0, 31 MachineOperand &DstMO = MIB->getOperand(SExtIdx); // Right now, DstMO has the extended register, since it comes from an // extended opcode. @@ -1759,7 +1759,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, } bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; Subtarget = &static_cast(Fn.getSubtarget()); diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index bd4bdaa6d12d..6930c816b5ae 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/TargetInstrInfo.h" diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index cfd89ad1cab8..ee6703aed1e2 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -20,7 +20,7 @@ #include "AArch64PBQPRegAlloc.h" #include "AArch64.h" #include "AArch64RegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index ec98980fa0b9..e5822b114324 100644 --- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -12,9 +12,9 @@ // 1. For BBs that are targets of CBZ/CBNZ instructions, we know the value of // the CBZ/CBNZ source register is zero on the taken/not-taken path. For // instance, the copy instruction in the code below can be removed because -// the CBZW jumps to BB#2 when w0 is zero. +// the CBZW jumps to %bb.2 when w0 is zero. // -// BB#1: +// %bb.1: // cbz w0, .LBB0_2 // .LBB0_2: // mov w0, wzr ; <-- redundant @@ -22,11 +22,11 @@ // 2. If the flag setting instruction defines a register other than WZR/XZR, we // can remove a zero copy in some cases. // -// BB#0: +// %bb.0: // subs w0, w1, w2 // str w0, [x1] // b.ne .LBB0_2 -// BB#1: +// %bb.1: // mov w0, wzr ; <-- redundant // str w0, [x2] // .LBB0_2 @@ -35,7 +35,7 @@ // constant (i.e., ADDS[W|X]ri, SUBS[W|X]ri), we can remove a mov immediate // in some cases. // -// BB#0: +// %bb.0: // subs xzr, x0, #1 // b.eq .LBB0_1 // .LBB0_1: @@ -485,7 +485,7 @@ bool AArch64RedundantCopyElimination::optimizeBlock(MachineBasicBlock *MBB) { bool AArch64RedundantCopyElimination::runOnMachineFunction( MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1059bc37c8f2..360b39125b74 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -42,22 +42,22 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::GHC) + if (MF->getFunction().getCallingConv() == CallingConv::GHC) // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_AArch64_NoRegs_SaveList; - if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) + if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) + if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS) return MF->getInfo()->isSplitCSR() ? CSR_AArch64_CXX_TLS_Darwin_PE_SaveList : CSR_AArch64_CXX_TLS_Darwin_SaveList; if (MF->getSubtarget().getTargetLowering() ->supportSwiftError() && - MF->getFunction()->getAttributes().hasAttrSomewhere( + MF->getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_AArch64_AAPCS_SwiftError_SaveList; - if (MF->getFunction()->getCallingConv() == CallingConv::PreserveMost) + if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_SaveList; else return CSR_AArch64_AAPCS_SaveList; @@ -66,7 +66,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo()->isSplitCSR()) return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; return nullptr; @@ -84,7 +84,7 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_AArch64_CXX_TLS_Darwin_RegMask; if (MF.getSubtarget().getTargetLowering() ->supportSwiftError() && - MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return CSR_AArch64_AAPCS_SwiftError_RegMask; if (CC == CallingConv::PreserveMost) return CSR_AArch64_RT_MostRegs_RegMask; @@ -225,11 +225,13 @@ bool AArch64RegisterInfo::requiresVirtualBaseRegisters( bool AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - // AArch64FrameLowering::resolveFrameIndexReference() can always fall back - // to the stack pointer, so only put the emergency spill slot next to the - // FP when there's no better way to access it (SP or base pointer). - return MFI.hasVarSizedObjects() && !hasBasePointer(MF); + // This function indicates whether the emergency spillslot should be placed + // close to the beginning of the stackframe (closer to FP) or the end + // (closer to SP). + // + // The beginning works most reliably if we have a frame pointer. + const AArch64FrameLowering &TFI = *getFrameLowering(MF); + return TFI.hasFP(MF); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index a9fb0200d809..9023c3dd8c25 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -652,6 +652,24 @@ def XSeqPairClassOperand : //===----- END: v8.1a atomic CASP register operands -----------------------===// +// SVE predicate registers +def P0 : AArch64Reg<0, "p0">, DwarfRegNum<[48]>; +def P1 : AArch64Reg<1, "p1">, DwarfRegNum<[49]>; +def P2 : AArch64Reg<2, "p2">, DwarfRegNum<[50]>; +def P3 : AArch64Reg<3, "p3">, DwarfRegNum<[51]>; +def P4 : AArch64Reg<4, "p4">, DwarfRegNum<[52]>; +def P5 : AArch64Reg<5, "p5">, DwarfRegNum<[53]>; +def P6 : AArch64Reg<6, "p6">, DwarfRegNum<[54]>; +def P7 : AArch64Reg<7, "p7">, DwarfRegNum<[55]>; +def P8 : AArch64Reg<8, "p8">, DwarfRegNum<[56]>; +def P9 : AArch64Reg<9, "p9">, DwarfRegNum<[57]>; +def P10 : AArch64Reg<10, "p10">, DwarfRegNum<[58]>; +def P11 : AArch64Reg<11, "p11">, DwarfRegNum<[59]>; +def P12 : AArch64Reg<12, "p12">, DwarfRegNum<[60]>; +def P13 : AArch64Reg<13, "p13">, DwarfRegNum<[61]>; +def P14 : AArch64Reg<14, "p14">, DwarfRegNum<[62]>; +def P15 : AArch64Reg<15, "p15">, DwarfRegNum<[63]>; + // The part of SVE registers that don't overlap Neon registers. // These are only used as part of clobber lists. def Z0_HI : AArch64Reg<0, "z0_hi">; @@ -731,11 +749,59 @@ class SVERegOp : SVERegOp {} class ZPRRegOp : SVERegOp {} //****************************************************************************** +// SVE predicate register classes. +class PPRClass : RegisterClass< + "AArch64", + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16, + (sequence "P%u", 0, lastreg)> { + let Size = 16; +} + +def PPR : PPRClass<15>; +def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class. + +class PPRAsmOperand : AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEVectorRegOfWidth<" + # Width # ", " # "AArch64::" # RegClass # "RegClassID>"; + let DiagnosticType = "InvalidSVE" # name # "Reg"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseSVEPredicateVector"; +} + +def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", -1>; +def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>; +def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>; +def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>; +def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>; + +def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>; +def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>; +def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>; +def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>; +def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>; + +def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", -1>; +def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>; +def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>; +def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>; +def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>; + +def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>; +def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>; +def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>; +def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>; +def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>; + +//****************************************************************************** + // SVE vector register class def ZPR : RegisterClass<"AArch64", [nxv16i8, nxv8i16, nxv4i32, nxv2i64, @@ -748,7 +814,8 @@ def ZPR : RegisterClass<"AArch64", class ZPRAsmOperand : AsmOperandClass { let Name = "SVE" # name # "Reg"; - let PredicateMethod = "isSVEDataVectorRegOfWidth<" # Width # ">"; + let PredicateMethod = "isSVEVectorRegOfWidth<" + # Width # ", AArch64::ZPRRegClassID>"; let RenderMethod = "addRegOperands"; let ParserMethod = "tryParseSVEDataVector<" # !if(!eq(Width, -1), "false", "true") # ">"; diff --git a/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp new file mode 100644 index 000000000000..e1851875abc5 --- /dev/null +++ b/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp @@ -0,0 +1,741 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs optimization on SIMD instructions +// with high latency by splitting them into more efficient series of +// instructions. +// +// 1. Rewrite certain SIMD instructions with vector element due to their +// inefficiency on some targets. +// +// For example: +// fmla v0.4s, v1.4s, v2.s[1] +// +// Is rewritten into: +// dup v3.4s, v2.s[1] +// fmla v0.4s, v1.4s, v3.4s +// +// 2. Rewrite interleaved memory access instructions due to their +// inefficiency on some targets. +// +// For example: +// st2 {v0.4s, v1.4s}, addr +// +// Is rewritten into: +// zip1 v2.4s, v0.4s, v1.4s +// zip2 v3.4s, v0.4s, v1.4s +// stp q2, q3, addr +// +//===----------------------------------------------------------------------===// + +#include "AArch64InstrInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Pass.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-simdinstr-opt" + +STATISTIC(NumModifiedInstr, + "Number of SIMD instructions modified"); + +#define AARCH64_VECTOR_BY_ELEMENT_OPT_NAME \ + "AArch64 SIMD instructions optimization pass" + +namespace { + +struct AArch64SIMDInstrOpt : public MachineFunctionPass { + static char ID; + + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + TargetSchedModel SchedModel; + + // The two maps below are used to cache decisions instead of recomputing: + // This is used to cache instruction replacement decisions within function + // units and across function units. + std::map, bool> SIMDInstrTable; + // This is used to cache the decision of whether to leave the interleaved + // store instructions replacement pass early or not for a particular target. + std::unordered_map InterlEarlyExit; + + typedef enum { + VectorElem, + Interleave + } Subpass; + + // Instruction represented by OrigOpc is replaced by instructions in ReplOpc. + struct InstReplInfo { + unsigned OrigOpc; + std::vector ReplOpc; + const TargetRegisterClass RC; + }; + +#define RuleST2(OpcOrg, OpcR0, OpcR1, OpcR2, RC) \ + {OpcOrg, {OpcR0, OpcR1, OpcR2}, RC} +#define RuleST4(OpcOrg, OpcR0, OpcR1, OpcR2, OpcR3, OpcR4, OpcR5, OpcR6, \ + OpcR7, OpcR8, OpcR9, RC) \ + {OpcOrg, \ + {OpcR0, OpcR1, OpcR2, OpcR3, OpcR4, OpcR5, OpcR6, OpcR7, OpcR8, OpcR9}, RC} + + // The Instruction Replacement Table: + std::vector IRT = { + // ST2 instructions + RuleST2(AArch64::ST2Twov2d, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64, + AArch64::STPQi, AArch64::FPR128RegClass), + RuleST2(AArch64::ST2Twov4s, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32, + AArch64::STPQi, AArch64::FPR128RegClass), + RuleST2(AArch64::ST2Twov2s, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32, + AArch64::STPDi, AArch64::FPR64RegClass), + RuleST2(AArch64::ST2Twov8h, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16, + AArch64::STPQi, AArch64::FPR128RegClass), + RuleST2(AArch64::ST2Twov4h, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16, + AArch64::STPDi, AArch64::FPR64RegClass), + RuleST2(AArch64::ST2Twov16b, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8, + AArch64::STPQi, AArch64::FPR128RegClass), + RuleST2(AArch64::ST2Twov8b, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8, + AArch64::STPDi, AArch64::FPR64RegClass), + // ST4 instructions + RuleST4(AArch64::ST4Fourv2d, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64, + AArch64::ZIP1v2i64, AArch64::ZIP2v2i64, AArch64::ZIP1v2i64, + AArch64::ZIP2v2i64, AArch64::ZIP1v2i64, AArch64::ZIP2v2i64, + AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass), + RuleST4(AArch64::ST4Fourv4s, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32, + AArch64::ZIP1v4i32, AArch64::ZIP2v4i32, AArch64::ZIP1v4i32, + AArch64::ZIP2v4i32, AArch64::ZIP1v4i32, AArch64::ZIP2v4i32, + AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass), + RuleST4(AArch64::ST4Fourv2s, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32, + AArch64::ZIP1v2i32, AArch64::ZIP2v2i32, AArch64::ZIP1v2i32, + AArch64::ZIP2v2i32, AArch64::ZIP1v2i32, AArch64::ZIP2v2i32, + AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass), + RuleST4(AArch64::ST4Fourv8h, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16, + AArch64::ZIP1v8i16, AArch64::ZIP2v8i16, AArch64::ZIP1v8i16, + AArch64::ZIP2v8i16, AArch64::ZIP1v8i16, AArch64::ZIP2v8i16, + AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass), + RuleST4(AArch64::ST4Fourv4h, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16, + AArch64::ZIP1v4i16, AArch64::ZIP2v4i16, AArch64::ZIP1v4i16, + AArch64::ZIP2v4i16, AArch64::ZIP1v4i16, AArch64::ZIP2v4i16, + AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass), + RuleST4(AArch64::ST4Fourv16b, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8, + AArch64::ZIP1v16i8, AArch64::ZIP2v16i8, AArch64::ZIP1v16i8, + AArch64::ZIP2v16i8, AArch64::ZIP1v16i8, AArch64::ZIP2v16i8, + AArch64::STPQi, AArch64::STPQi, AArch64::FPR128RegClass), + RuleST4(AArch64::ST4Fourv8b, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8, + AArch64::ZIP1v8i8, AArch64::ZIP2v8i8, AArch64::ZIP1v8i8, + AArch64::ZIP2v8i8, AArch64::ZIP1v8i8, AArch64::ZIP2v8i8, + AArch64::STPDi, AArch64::STPDi, AArch64::FPR64RegClass) + }; + + // A costly instruction is replaced in this work by N efficient instructions + // The maximum of N is curently 10 and it is for ST4 case. + static const unsigned MaxNumRepl = 10; + + AArch64SIMDInstrOpt() : MachineFunctionPass(ID) { + initializeAArch64SIMDInstrOptPass(*PassRegistry::getPassRegistry()); + } + + /// Based only on latency of instructions, determine if it is cost efficient + /// to replace the instruction InstDesc by the instructions stored in the + /// array InstDescRepl. + /// Return true if replacement is expected to be faster. + bool shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc, + SmallVectorImpl &ReplInstrMCID); + + /// Determine if we need to exit the instruction replacement optimization + /// passes early. This makes sure that no compile time is spent in this pass + /// for targets with no need for any of these optimizations. + /// Return true if early exit of the pass is recommended. + bool shouldExitEarly(MachineFunction *MF, Subpass SP); + + /// Check whether an equivalent DUP instruction has already been + /// created or not. + /// Return true when the DUP instruction already exists. In this case, + /// DestReg will point to the destination of the already created DUP. + bool reuseDUP(MachineInstr &MI, unsigned DupOpcode, unsigned SrcReg, + unsigned LaneNumber, unsigned *DestReg) const; + + /// Certain SIMD instructions with vector element operand are not efficient. + /// Rewrite them into SIMD instructions with vector operands. This rewrite + /// is driven by the latency of the instructions. + /// Return true if the SIMD instruction is modified. + bool optimizeVectElement(MachineInstr &MI); + + /// Process The REG_SEQUENCE instruction, and extract the source + /// operands of the ST2/4 instruction from it. + /// Example of such instructions. + /// %dest = REG_SEQUENCE %st2_src1, dsub0, %st2_src2, dsub1; + /// Return true when the instruction is processed successfully. + bool processSeqRegInst(MachineInstr *DefiningMI, unsigned* StReg, + unsigned* StRegKill, unsigned NumArg) const; + + /// Load/Store Interleaving instructions are not always beneficial. + /// Replace them by ZIP instructionand classical load/store. + /// Return true if the SIMD instruction is modified. + bool optimizeLdStInterleave(MachineInstr &MI); + + /// Return the number of useful source registers for this + /// instruction (2 for ST2 and 4 for ST4). + unsigned determineSrcReg(MachineInstr &MI) const; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { + return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME; + } +}; + +char AArch64SIMDInstrOpt::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(AArch64SIMDInstrOpt, "aarch64-simdinstr-opt", + AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false) + +/// Based only on latency of instructions, determine if it is cost efficient +/// to replace the instruction InstDesc by the instructions stored in the +/// array InstDescRepl. +/// Return true if replacement is expected to be faster. +bool AArch64SIMDInstrOpt:: +shouldReplaceInst(MachineFunction *MF, const MCInstrDesc *InstDesc, + SmallVectorImpl &InstDescRepl) { + // Check if replacement decision is already available in the cached table. + // if so, return it. + std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU(); + auto InstID = std::make_pair(InstDesc->getOpcode(), Subtarget); + if (SIMDInstrTable.find(InstID) != SIMDInstrTable.end()) + return SIMDInstrTable[InstID]; + + unsigned SCIdx = InstDesc->getSchedClass(); + const MCSchedClassDesc *SCDesc = + SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); + + // If a target does not define resources for the instructions + // of interest, then return false for no replacement. + const MCSchedClassDesc *SCDescRepl; + if (!SCDesc->isValid() || SCDesc->isVariant()) + { + SIMDInstrTable[InstID] = false; + return false; + } + for (auto IDesc : InstDescRepl) + { + SCDescRepl = SchedModel.getMCSchedModel()->getSchedClassDesc( + IDesc->getSchedClass()); + if (!SCDescRepl->isValid() || SCDescRepl->isVariant()) + { + SIMDInstrTable[InstID] = false; + return false; + } + } + + // Replacement cost. + unsigned ReplCost = 0; + for (auto IDesc :InstDescRepl) + ReplCost += SchedModel.computeInstrLatency(IDesc->getOpcode()); + + if (SchedModel.computeInstrLatency(InstDesc->getOpcode()) > ReplCost) + { + SIMDInstrTable[InstID] = true; + return true; + } + else + { + SIMDInstrTable[InstID] = false; + return false; + } +} + +/// Determine if we need to exit this pass for a kind of instruction replacement +/// early. This makes sure that no compile time is spent in this pass for +/// targets with no need for any of these optimizations beyond performing this +/// check. +/// Return true if early exit of this pass for a kind of instruction +/// replacement is recommended for a target. +bool AArch64SIMDInstrOpt::shouldExitEarly(MachineFunction *MF, Subpass SP) { + const MCInstrDesc* OriginalMCID; + SmallVector ReplInstrMCID; + + switch (SP) { + // For this optimization, check by comparing the latency of a representative + // instruction to that of the replacement instructions. + // TODO: check for all concerned instructions. + case VectorElem: + OriginalMCID = &TII->get(AArch64::FMLAv4i32_indexed); + ReplInstrMCID.push_back(&TII->get(AArch64::DUPv4i32lane)); + ReplInstrMCID.push_back(&TII->get(AArch64::FMLAv4f32)); + if (shouldReplaceInst(MF, OriginalMCID, ReplInstrMCID)) + return false; + break; + + // For this optimization, check for all concerned instructions. + case Interleave: + std::string Subtarget = SchedModel.getSubtargetInfo()->getCPU(); + if (InterlEarlyExit.find(Subtarget) != InterlEarlyExit.end()) + return InterlEarlyExit[Subtarget]; + + for (auto &I : IRT) { + OriginalMCID = &TII->get(I.OrigOpc); + for (auto &Repl : I.ReplOpc) + ReplInstrMCID.push_back(&TII->get(Repl)); + if (shouldReplaceInst(MF, OriginalMCID, ReplInstrMCID)) { + InterlEarlyExit[Subtarget] = false; + return false; + } + ReplInstrMCID.clear(); + } + InterlEarlyExit[Subtarget] = true; + break; + } + + return true; +} + +/// Check whether an equivalent DUP instruction has already been +/// created or not. +/// Return true when the DUP instruction already exists. In this case, +/// DestReg will point to the destination of the already created DUP. +bool AArch64SIMDInstrOpt::reuseDUP(MachineInstr &MI, unsigned DupOpcode, + unsigned SrcReg, unsigned LaneNumber, + unsigned *DestReg) const { + for (MachineBasicBlock::iterator MII = MI, MIE = MI.getParent()->begin(); + MII != MIE;) { + MII--; + MachineInstr *CurrentMI = &*MII; + + if (CurrentMI->getOpcode() == DupOpcode && + CurrentMI->getNumOperands() == 3 && + CurrentMI->getOperand(1).getReg() == SrcReg && + CurrentMI->getOperand(2).getImm() == LaneNumber) { + *DestReg = CurrentMI->getOperand(0).getReg(); + return true; + } + } + + return false; +} + +/// Certain SIMD instructions with vector element operand are not efficient. +/// Rewrite them into SIMD instructions with vector operands. This rewrite +/// is driven by the latency of the instructions. +/// The instruction of concerns are for the time being FMLA, FMLS, FMUL, +/// and FMULX and hence they are hardcoded. +/// +/// For example: +/// fmla v0.4s, v1.4s, v2.s[1] +/// +/// Is rewritten into +/// dup v3.4s, v2.s[1] // DUP not necessary if redundant +/// fmla v0.4s, v1.4s, v3.4s +/// +/// Return true if the SIMD instruction is modified. +bool AArch64SIMDInstrOpt::optimizeVectElement(MachineInstr &MI) { + const MCInstrDesc *MulMCID, *DupMCID; + const TargetRegisterClass *RC = &AArch64::FPR128RegClass; + + switch (MI.getOpcode()) { + default: + return false; + + // 4X32 instructions + case AArch64::FMLAv4i32_indexed: + DupMCID = &TII->get(AArch64::DUPv4i32lane); + MulMCID = &TII->get(AArch64::FMLAv4f32); + break; + case AArch64::FMLSv4i32_indexed: + DupMCID = &TII->get(AArch64::DUPv4i32lane); + MulMCID = &TII->get(AArch64::FMLSv4f32); + break; + case AArch64::FMULXv4i32_indexed: + DupMCID = &TII->get(AArch64::DUPv4i32lane); + MulMCID = &TII->get(AArch64::FMULXv4f32); + break; + case AArch64::FMULv4i32_indexed: + DupMCID = &TII->get(AArch64::DUPv4i32lane); + MulMCID = &TII->get(AArch64::FMULv4f32); + break; + + // 2X64 instructions + case AArch64::FMLAv2i64_indexed: + DupMCID = &TII->get(AArch64::DUPv2i64lane); + MulMCID = &TII->get(AArch64::FMLAv2f64); + break; + case AArch64::FMLSv2i64_indexed: + DupMCID = &TII->get(AArch64::DUPv2i64lane); + MulMCID = &TII->get(AArch64::FMLSv2f64); + break; + case AArch64::FMULXv2i64_indexed: + DupMCID = &TII->get(AArch64::DUPv2i64lane); + MulMCID = &TII->get(AArch64::FMULXv2f64); + break; + case AArch64::FMULv2i64_indexed: + DupMCID = &TII->get(AArch64::DUPv2i64lane); + MulMCID = &TII->get(AArch64::FMULv2f64); + break; + + // 2X32 instructions + case AArch64::FMLAv2i32_indexed: + RC = &AArch64::FPR64RegClass; + DupMCID = &TII->get(AArch64::DUPv2i32lane); + MulMCID = &TII->get(AArch64::FMLAv2f32); + break; + case AArch64::FMLSv2i32_indexed: + RC = &AArch64::FPR64RegClass; + DupMCID = &TII->get(AArch64::DUPv2i32lane); + MulMCID = &TII->get(AArch64::FMLSv2f32); + break; + case AArch64::FMULXv2i32_indexed: + RC = &AArch64::FPR64RegClass; + DupMCID = &TII->get(AArch64::DUPv2i32lane); + MulMCID = &TII->get(AArch64::FMULXv2f32); + break; + case AArch64::FMULv2i32_indexed: + RC = &AArch64::FPR64RegClass; + DupMCID = &TII->get(AArch64::DUPv2i32lane); + MulMCID = &TII->get(AArch64::FMULv2f32); + break; + } + + SmallVector ReplInstrMCID; + ReplInstrMCID.push_back(DupMCID); + ReplInstrMCID.push_back(MulMCID); + if (!shouldReplaceInst(MI.getParent()->getParent(), &TII->get(MI.getOpcode()), + ReplInstrMCID)) + return false; + + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock &MBB = *MI.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + // Get the operands of the current SIMD arithmetic instruction. + unsigned MulDest = MI.getOperand(0).getReg(); + unsigned SrcReg0 = MI.getOperand(1).getReg(); + unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill()); + unsigned SrcReg1 = MI.getOperand(2).getReg(); + unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill()); + unsigned DupDest; + + // Instructions of interest have either 4 or 5 operands. + if (MI.getNumOperands() == 5) { + unsigned SrcReg2 = MI.getOperand(3).getReg(); + unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill()); + unsigned LaneNumber = MI.getOperand(4).getImm(); + // Create a new DUP instruction. Note that if an equivalent DUP instruction + // has already been created before, then use that one instead of creating + // a new one. + if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) { + DupDest = MRI.createVirtualRegister(RC); + BuildMI(MBB, MI, DL, *DupMCID, DupDest) + .addReg(SrcReg2, Src2IsKill) + .addImm(LaneNumber); + } + BuildMI(MBB, MI, DL, *MulMCID, MulDest) + .addReg(SrcReg0, Src0IsKill) + .addReg(SrcReg1, Src1IsKill) + .addReg(DupDest, Src2IsKill); + } else if (MI.getNumOperands() == 4) { + unsigned LaneNumber = MI.getOperand(3).getImm(); + if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) { + DupDest = MRI.createVirtualRegister(RC); + BuildMI(MBB, MI, DL, *DupMCID, DupDest) + .addReg(SrcReg1, Src1IsKill) + .addImm(LaneNumber); + } + BuildMI(MBB, MI, DL, *MulMCID, MulDest) + .addReg(SrcReg0, Src0IsKill) + .addReg(DupDest, Src1IsKill); + } else { + return false; + } + + ++NumModifiedInstr; + return true; +} + +/// Load/Store Interleaving instructions are not always beneficial. +/// Replace them by ZIP instructions and classical load/store. +/// +/// For example: +/// st2 {v0.4s, v1.4s}, addr +/// +/// Is rewritten into: +/// zip1 v2.4s, v0.4s, v1.4s +/// zip2 v3.4s, v0.4s, v1.4s +/// stp q2, q3, addr +// +/// For example: +/// st4 {v0.4s, v1.4s, v2.4s, v3.4s}, addr +/// +/// Is rewritten into: +/// zip1 v4.4s, v0.4s, v2.4s +/// zip2 v5.4s, v0.4s, v2.4s +/// zip1 v6.4s, v1.4s, v3.4s +/// zip2 v7.4s, v1.4s, v3.4s +/// zip1 v8.4s, v4.4s, v6.4s +/// zip2 v9.4s, v4.4s, v6.4s +/// zip1 v10.4s, v5.4s, v7.4s +/// zip2 v11.4s, v5.4s, v7.4s +/// stp q8, q9, addr +/// stp q10, q11, addr+32 +/// +/// Currently only instructions related to ST2 and ST4 are considered. +/// Other may be added later. +/// Return true if the SIMD instruction is modified. +bool AArch64SIMDInstrOpt::optimizeLdStInterleave(MachineInstr &MI) { + + unsigned SeqReg, AddrReg; + unsigned StReg[4], StRegKill[4]; + MachineInstr *DefiningMI; + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock &MBB = *MI.getParent(); + SmallVector ZipDest; + SmallVector ReplInstrMCID; + + // If current instruction matches any of the rewriting rules, then + // gather information about parameters of the new instructions. + bool Match = false; + for (auto &I : IRT) { + if (MI.getOpcode() == I.OrigOpc) { + SeqReg = MI.getOperand(0).getReg(); + AddrReg = MI.getOperand(1).getReg(); + DefiningMI = MRI->getUniqueVRegDef(SeqReg); + unsigned NumReg = determineSrcReg(MI); + if (!processSeqRegInst(DefiningMI, StReg, StRegKill, NumReg)) + return false; + + for (auto &Repl : I.ReplOpc) { + ReplInstrMCID.push_back(&TII->get(Repl)); + // Generate destination registers but only for non-store instruction. + if (Repl != AArch64::STPQi && Repl != AArch64::STPDi) + ZipDest.push_back(MRI->createVirtualRegister(&I.RC)); + } + Match = true; + break; + } + } + + if (!Match) + return false; + + // Determine if it is profitable to replace MI by the series of instructions + // represented in ReplInstrMCID. + if (!shouldReplaceInst(MI.getParent()->getParent(), &TII->get(MI.getOpcode()), + ReplInstrMCID)) + return false; + + // Generate the replacement instructions composed of ZIP1, ZIP2, and STP (at + // this point, the code generation is hardcoded and does not rely on the IRT + // table used above given that code generation for ST2 replacement is somewhat + // different than for ST4 replacement. We could have added more info into the + // table related to how we build new instructions but we may be adding more + // complexity with that). + switch (MI.getOpcode()) { + default: + return false; + + case AArch64::ST2Twov16b: + case AArch64::ST2Twov8b: + case AArch64::ST2Twov8h: + case AArch64::ST2Twov4h: + case AArch64::ST2Twov4s: + case AArch64::ST2Twov2s: + case AArch64::ST2Twov2d: + // ZIP instructions + BuildMI(MBB, MI, DL, *ReplInstrMCID[0], ZipDest[0]) + .addReg(StReg[0]) + .addReg(StReg[1]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[1], ZipDest[1]) + .addReg(StReg[0], StRegKill[0]) + .addReg(StReg[1], StRegKill[1]); + // STP instructions + BuildMI(MBB, MI, DL, *ReplInstrMCID[2]) + .addReg(ZipDest[0]) + .addReg(ZipDest[1]) + .addReg(AddrReg) + .addImm(0); + break; + + case AArch64::ST4Fourv16b: + case AArch64::ST4Fourv8b: + case AArch64::ST4Fourv8h: + case AArch64::ST4Fourv4h: + case AArch64::ST4Fourv4s: + case AArch64::ST4Fourv2s: + case AArch64::ST4Fourv2d: + // ZIP instructions + BuildMI(MBB, MI, DL, *ReplInstrMCID[0], ZipDest[0]) + .addReg(StReg[0]) + .addReg(StReg[2]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[1], ZipDest[1]) + .addReg(StReg[0], StRegKill[0]) + .addReg(StReg[2], StRegKill[2]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[2], ZipDest[2]) + .addReg(StReg[1]) + .addReg(StReg[3]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[3], ZipDest[3]) + .addReg(StReg[1], StRegKill[1]) + .addReg(StReg[3], StRegKill[3]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[4], ZipDest[4]) + .addReg(ZipDest[0]) + .addReg(ZipDest[2]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[5], ZipDest[5]) + .addReg(ZipDest[0]) + .addReg(ZipDest[2]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[6], ZipDest[6]) + .addReg(ZipDest[1]) + .addReg(ZipDest[3]); + BuildMI(MBB, MI, DL, *ReplInstrMCID[7], ZipDest[7]) + .addReg(ZipDest[1]) + .addReg(ZipDest[3]); + // stp instructions + BuildMI(MBB, MI, DL, *ReplInstrMCID[8]) + .addReg(ZipDest[4]) + .addReg(ZipDest[5]) + .addReg(AddrReg) + .addImm(0); + BuildMI(MBB, MI, DL, *ReplInstrMCID[9]) + .addReg(ZipDest[6]) + .addReg(ZipDest[7]) + .addReg(AddrReg) + .addImm(2); + break; + } + + ++NumModifiedInstr; + return true; +} + +/// Process The REG_SEQUENCE instruction, and extract the source +/// operands of the ST2/4 instruction from it. +/// Example of such instruction. +/// %dest = REG_SEQUENCE %st2_src1, dsub0, %st2_src2, dsub1; +/// Return true when the instruction is processed successfully. +bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI, + unsigned* StReg, unsigned* StRegKill, unsigned NumArg) const { + assert (DefiningMI != NULL); + if (DefiningMI->getOpcode() != AArch64::REG_SEQUENCE) + return false; + + for (unsigned i=0; igetOperand(2*i+1).getReg(); + StRegKill[i] = getKillRegState(DefiningMI->getOperand(2*i+1).isKill()); + + // Sanity check for the other arguments. + if (DefiningMI->getOperand(2*i+2).isImm()) { + switch (DefiningMI->getOperand(2*i+2).getImm()) { + default: + return false; + + case AArch64::dsub0: + case AArch64::dsub1: + case AArch64::dsub2: + case AArch64::dsub3: + case AArch64::qsub0: + case AArch64::qsub1: + case AArch64::qsub2: + case AArch64::qsub3: + break; + } + } + else + return false; + } + return true; +} + +/// Return the number of useful source registers for this instruction +/// (2 for ST2 and 4 for ST4). +unsigned AArch64SIMDInstrOpt::determineSrcReg(MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unsupported instruction for this pass"); + + case AArch64::ST2Twov16b: + case AArch64::ST2Twov8b: + case AArch64::ST2Twov8h: + case AArch64::ST2Twov4h: + case AArch64::ST2Twov4s: + case AArch64::ST2Twov2s: + case AArch64::ST2Twov2d: + return 2; + + case AArch64::ST4Fourv16b: + case AArch64::ST4Fourv8b: + case AArch64::ST4Fourv8h: + case AArch64::ST4Fourv4h: + case AArch64::ST4Fourv4s: + case AArch64::ST4Fourv2s: + case AArch64::ST4Fourv2d: + return 4; + } +} + +bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); + const AArch64InstrInfo *AAII = + static_cast(ST.getInstrInfo()); + if (!AAII) + return false; + SchedModel.init(ST.getSchedModel(), &ST, AAII); + if (!SchedModel.hasInstrSchedModel()) + return false; + + bool Changed = false; + for (auto OptimizationKind : {VectorElem, Interleave}) { + if (!shouldExitEarly(&MF, OptimizationKind)) { + SmallVector RemoveMIs; + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE;) { + MachineInstr &MI = *MII; + bool InstRewrite; + if (OptimizationKind == VectorElem) + InstRewrite = optimizeVectElement(MI) ; + else + InstRewrite = optimizeLdStInterleave(MI); + if (InstRewrite) { + // Add MI to the list of instructions to be removed given that it + // has been replaced. + RemoveMIs.push_back(&MI); + Changed = true; + } + ++MII; + } + } + for (MachineInstr *MI : RemoveMIs) + MI->eraseFromParent(); + } + } + + return Changed; +} + +/// Returns an instance of the high cost ASIMD instruction replacement +/// optimization pass. +FunctionPass *llvm::createAArch64SIMDInstrOptPass() { + return new AArch64SIMDInstrOpt(); +} diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 7da0b28d22dc..c2f46cae978f 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -14,4 +14,19 @@ let Predicates = [HasSVE] in { defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add">; defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub">; + + defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add">; + defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub">; + + defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">; + defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">; + + defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">; + defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">; + + defm DUP_ZR : sve_int_perm_dup_r<"dup">; + + def RDVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdvl">; + def ADDVL_XXI : sve_int_arith_vl<0b0, "addvl">; + def ADDPL_XXI : sve_int_arith_vl<0b1, "addpl">; } diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td index fd60459382a9..5f64f0de4c50 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -22,7 +22,7 @@ def ThunderX2T99Model : SchedMachineModel { let LoadLatency = 4; // Optimistic load latency. let MispredictPenalty = 12; // Extra cycles for mispredicted branch. // Determined via a mix of micro-arch details and experimentation. - let LoopMicroOpBufferSize = 32; + let LoopMicroOpBufferSize = 128; let PostRAScheduler = 1; // Using PostRA sched. let CompleteModel = 1; @@ -391,7 +391,7 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { - let Unsupported = 1; + let Latency = 4; let NumMicroOps = 2; } diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 7f5507371fa0..a719d47618e5 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -25,11 +25,11 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( ConstantSDNode *SizeValue = dyn_cast(Size); const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); - const char *bzeroEntry = - (V && V->isNullValue()) ? STI.getBZeroEntry() : nullptr; + const char *bzeroName = (V && V->isNullValue()) + ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr; // For small size (< 256), it is not beneficial to use bzero // instead of memset. - if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { + if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) { const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); @@ -45,7 +45,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), + DAG.getExternalSymbol(bzeroName, IntPtr), std::move(Args)) .setDiscardResult(); std::pair CallResult = TLI.LowerCallTo(CLI); diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp index 78fc322158b6..571e61d7083c 100644 --- a/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -120,7 +120,7 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { } bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; const TargetSubtargetInfo &ST = MF.getSubtarget(); diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 28b8f7c79cf1..eb9bb1498d62 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -21,13 +21,9 @@ #include "AArch64CallLowering.h" #include "AArch64LegalizerInfo.h" #include "AArch64RegisterBankInfo.h" -#include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -154,7 +150,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), TLInfo(TM, *this) { CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); - Legalizer.reset(new AArch64LegalizerInfo()); + Legalizer.reset(new AArch64LegalizerInfo(*this)); auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); @@ -221,19 +217,6 @@ unsigned char AArch64Subtarget::classifyGlobalFunctionReference( return AArch64II::MO_NO_FLAG; } -/// This function returns the name of a function which has an interface -/// like the non-standard bzero function, if such a function exists on -/// the current subtarget and it is considered prefereable over -/// memset with zero passed as the second argument. Otherwise it -/// returns null. -const char *AArch64Subtarget::getBZeroEntry() const { - // Prefer bzero on Darwin only. - if(isTargetDarwin()) - return "bzero"; - - return nullptr; -} - void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // LNT run (at least on Cyclone) showed reasonably significant gains for @@ -267,3 +250,13 @@ std::unique_ptr AArch64Subtarget::getCustomPBQPConstraints() const { return balanceFPOps() ? llvm::make_unique() : nullptr; } + +void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { + // We usually compute max call frame size after ISel. Do the computation now + // if the .mir file didn't specify it. Note that this will probably give you + // bogus values after PEI has eliminated the callframe setup/destroy pseudo + // instructions, specify explicitely if you need it to be correct. + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isMaxCallFrameSizeComputed()) + MFI.computeMaxCallFrameSize(MF); +} diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index a73ba8874131..45a8eb164648 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -86,6 +86,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. bool HasZeroCycleZeroing = false; + bool HasZeroCycleZeroingFPWorkaround = false; // StrictAlign - Disallow unaligned memory accesses. bool StrictAlign = false; @@ -197,6 +198,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } + bool hasZeroCycleZeroingFPWorkaround() const { + return HasZeroCycleZeroingFPWorkaround; + } + bool requiresStrictAlign() const { return StrictAlign; } bool isXRaySupported() const override { return true; } @@ -304,13 +309,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const; - /// This function returns the name of a function which has an interface - /// like the non-standard bzero function, if such a function exists on - /// the current subtarget and it is considered prefereable over - /// memset with zero passed as the second argument. Otherwise it - /// returns null. - const char *getBZeroEntry() const; - void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; @@ -328,6 +326,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { return false; } } + + void mirFileLoaded(MachineFunction &MF) const override; }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td index df939add70fa..2162775c369b 100644 --- a/lib/Target/AArch64/AArch64SystemOperands.td +++ b/lib/Target/AArch64/AArch64SystemOperands.td @@ -174,6 +174,37 @@ def : PRFM<"pstl2strm", 0x13>; def : PRFM<"pstl3keep", 0x14>; def : PRFM<"pstl3strm", 0x15>; +//===----------------------------------------------------------------------===// +// SVE Predicate patterns +//===----------------------------------------------------------------------===// + +class SVEPREDPAT encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<5> Encoding; + let Encoding = encoding; +} + +def : SVEPREDPAT<"pow2", 0x00>; +def : SVEPREDPAT<"vl1", 0x01>; +def : SVEPREDPAT<"vl2", 0x02>; +def : SVEPREDPAT<"vl3", 0x03>; +def : SVEPREDPAT<"vl4", 0x04>; +def : SVEPREDPAT<"vl5", 0x05>; +def : SVEPREDPAT<"vl6", 0x06>; +def : SVEPREDPAT<"vl7", 0x07>; +def : SVEPREDPAT<"vl8", 0x08>; +def : SVEPREDPAT<"vl16", 0x09>; +def : SVEPREDPAT<"vl32", 0x0a>; +def : SVEPREDPAT<"vl64", 0x0b>; +def : SVEPREDPAT<"vl128", 0x0c>; +def : SVEPREDPAT<"vl256", 0x0d>; +def : SVEPREDPAT<"mul4", 0x1d>; +def : SVEPREDPAT<"mul3", 0x1e>; +def : SVEPREDPAT<"all", 0x1f>; + //===----------------------------------------------------------------------===// // PState instruction options. //===----------------------------------------------------------------------===// @@ -322,6 +353,9 @@ def : ROSysReg<"PMCEID0_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b110>; def : ROSysReg<"PMCEID1_EL0", 0b11, 0b011, 0b1001, 0b1100, 0b111>; def : ROSysReg<"MIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b000>; def : ROSysReg<"CCSIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b000>; +def : ROSysReg<"CCSIDR2_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b010> { + let Requires = [{ {AArch64::HasV8_3aOps} }]; +} def : ROSysReg<"CLIDR_EL1", 0b11, 0b001, 0b0000, 0b0000, 0b001>; def : ROSysReg<"CTR_EL0", 0b11, 0b011, 0b0000, 0b0000, 0b001>; def : ROSysReg<"MPIDR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b101>; diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 2cf0a49896ed..94aa7edc953b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -136,7 +136,7 @@ static cl::opt static cl::opt EnableGlobalISelAtO( "aarch64-enable-global-isel-at-O", cl::Hidden, cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), - cl::init(-1)); + cl::init(0)); static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", cl::init(true), cl::Hidden); @@ -157,7 +157,7 @@ extern "C" void LLVMInitializeAArch64Target() { initializeAArch64DeadRegisterDefinitionsPass(*PR); initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); - initializeAArch64VectorByElementOptPass(*PR); + initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); @@ -243,6 +243,10 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, getEffectiveCodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) { initAsmInfo(); + + // Enable GlobalISel at or below EnableGlobalISelAt0. + if (getOptLevel() <= EnableGlobalISelAtO) + setGlobalISel(true); } AArch64TargetMachine::~AArch64TargetMachine() = default; @@ -340,16 +344,13 @@ class AArch64PassConfig : public TargetPassConfig { void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; - - bool isGlobalISelEnabled() const override; }; } // end anonymous namespace -TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(AArch64TTIImpl(this, F)); - }); +TargetTransformInfo +AArch64TargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(AArch64TTIImpl(this, F)); } TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { @@ -365,7 +366,7 @@ void AArch64PassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(1, true, true, false)); + addPass(createCFGSimplificationPass(1, true, true, false, true)); // Run LoopDataPrefetch // @@ -456,10 +457,6 @@ bool AArch64PassConfig::addGlobalInstructionSelect() { return false; } -bool AArch64PassConfig::isGlobalISelEnabled() const { - return TM->getOptLevel() <= EnableGlobalISelAtO; -} - bool AArch64PassConfig::addILPOpts() { if (EnableCondOpt) addPass(createAArch64ConditionOptimizerPass()); @@ -473,7 +470,7 @@ bool AArch64PassConfig::addILPOpts() { addPass(&EarlyIfConverterID); if (EnableStPairSuppress) addPass(createAArch64StorePairSuppressPass()); - addPass(createAArch64VectorByElementOptPass()); + addPass(createAArch64SIMDInstrOptPass()); return true; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 2bbfb2da3db6..8d28a5e30ebf 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -44,8 +44,7 @@ class AArch64TargetMachine : public LLVMTargetMachine { // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - /// \brief Get the TargetIRAnalysis for this target. - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile* getObjFileLowering() const override { return TLOF.get(); diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 1820ad959fcb..aafcd7fe19f9 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -277,7 +277,7 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // same as the second operand. In this case, we will generate a "long" // version of the widening instruction. if (auto *Cast = dyn_cast(SingleUser->getOperand(1))) - if (I->getOpcode() == Cast->getOpcode() && + if (I->getOpcode() == unsigned(Cast->getOpcode()) && cast(I)->getSrcTy() == Cast->getSrcTy()) return 0; } diff --git a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp deleted file mode 100644 index 7ea2fc88f4d5..000000000000 --- a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp +++ /dev/null @@ -1,388 +0,0 @@ -//=- AArch64VectorByElementOpt.cpp - AArch64 vector by element inst opt pass =// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that performs optimization for vector by element -// SIMD instructions. -// -// Certain SIMD instructions with vector element operand are not efficient. -// Rewrite them into SIMD instructions with vector operands. This rewrite -// is driven by the latency of the instructions. -// -// Example: -// fmla v0.4s, v1.4s, v2.s[1] -// is rewritten into -// dup v3.4s, v2.s[1] -// fmla v0.4s, v1.4s, v3.4s -// -//===----------------------------------------------------------------------===// - -#include "AArch64InstrInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCSchedule.h" -#include "llvm/Pass.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-vectorbyelement-opt" - -STATISTIC(NumModifiedInstr, - "Number of vector by element instructions modified"); - -#define AARCH64_VECTOR_BY_ELEMENT_OPT_NAME \ - "AArch64 vector by element instruction optimization pass" - -namespace { - -struct AArch64VectorByElementOpt : public MachineFunctionPass { - static char ID; - - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - TargetSchedModel SchedModel; - - AArch64VectorByElementOpt() : MachineFunctionPass(ID) { - initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry()); - } - - /// Based only on latency of instructions, determine if it is cost efficient - /// to replace the instruction InstDesc by the two instructions InstDescRep1 - /// and InstDescRep2. - /// Return true if replacement is recommended. - bool - shouldReplaceInstruction(MachineFunction *MF, const MCInstrDesc *InstDesc, - const MCInstrDesc *InstDescRep1, - const MCInstrDesc *InstDescRep2, - std::map &VecInstElemTable) const; - - /// Determine if we need to exit the vector by element instruction - /// optimization pass early. This makes sure that Targets with no need - /// for this optimization do not spent any compile time on this pass. - /// This check is done by comparing the latency of an indexed FMLA - /// instruction to the latency of the DUP + the latency of a vector - /// FMLA instruction. We do not check on other related instructions such - /// as FMLS as we assume that if the situation shows up for one - /// instruction, then it is likely to show up for the related ones. - /// Return true if early exit of the pass is recommended. - bool earlyExitVectElement(MachineFunction *MF); - - /// Check whether an equivalent DUP instruction has already been - /// created or not. - /// Return true when the dup instruction already exists. In this case, - /// DestReg will point to the destination of the already created DUP. - bool reuseDUP(MachineInstr &MI, unsigned DupOpcode, unsigned SrcReg, - unsigned LaneNumber, unsigned *DestReg) const; - - /// Certain SIMD instructions with vector element operand are not efficient. - /// Rewrite them into SIMD instructions with vector operands. This rewrite - /// is driven by the latency of the instructions. - /// Return true if the SIMD instruction is modified. - bool optimizeVectElement(MachineInstr &MI, - std::map *VecInstElemTable) const; - - bool runOnMachineFunction(MachineFunction &Fn) override; - - StringRef getPassName() const override { - return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME; - } -}; - -char AArch64VectorByElementOpt::ID = 0; - -} // end anonymous namespace - -INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt", - AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false) - -/// Based only on latency of instructions, determine if it is cost efficient -/// to replace the instruction InstDesc by the two instructions InstDescRep1 -/// and InstDescRep2. Note that it is assumed in this fuction that an -/// instruction of type InstDesc is always replaced by the same two -/// instructions as results are cached here. -/// Return true if replacement is recommended. -bool AArch64VectorByElementOpt::shouldReplaceInstruction( - MachineFunction *MF, const MCInstrDesc *InstDesc, - const MCInstrDesc *InstDescRep1, const MCInstrDesc *InstDescRep2, - std::map &VecInstElemTable) const { - // Check if replacment decision is alredy available in the cached table. - // if so, return it. - if (!VecInstElemTable.empty() && - VecInstElemTable.find(InstDesc->getOpcode()) != VecInstElemTable.end()) - return VecInstElemTable[InstDesc->getOpcode()]; - - unsigned SCIdx = InstDesc->getSchedClass(); - unsigned SCIdxRep1 = InstDescRep1->getSchedClass(); - unsigned SCIdxRep2 = InstDescRep2->getSchedClass(); - const MCSchedClassDesc *SCDesc = - SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); - const MCSchedClassDesc *SCDescRep1 = - SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep1); - const MCSchedClassDesc *SCDescRep2 = - SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep2); - - // If a subtarget does not define resources for any of the instructions - // of interest, then return false for no replacement. - if (!SCDesc->isValid() || SCDesc->isVariant() || !SCDescRep1->isValid() || - SCDescRep1->isVariant() || !SCDescRep2->isValid() || - SCDescRep2->isVariant()) { - VecInstElemTable[InstDesc->getOpcode()] = false; - return false; - } - - if (SchedModel.computeInstrLatency(InstDesc->getOpcode()) > - SchedModel.computeInstrLatency(InstDescRep1->getOpcode()) + - SchedModel.computeInstrLatency(InstDescRep2->getOpcode())) { - VecInstElemTable[InstDesc->getOpcode()] = true; - return true; - } - VecInstElemTable[InstDesc->getOpcode()] = false; - return false; -} - -/// Determine if we need to exit the vector by element instruction -/// optimization pass early. This makes sure that Targets with no need -/// for this optimization do not spent any compile time on this pass. -/// This check is done by comparing the latency of an indexed FMLA -/// instruction to the latency of the DUP + the latency of a vector -/// FMLA instruction. We do not check on other related instructions such -/// as FMLS as we assume that if the situation shows up for one -/// instruction, then it is likely to show up for the related ones. -/// Return true if early exit of the pass is recommended. -bool AArch64VectorByElementOpt::earlyExitVectElement(MachineFunction *MF) { - std::map VecInstElemTable; - const MCInstrDesc *IndexMulMCID = &TII->get(AArch64::FMLAv4i32_indexed); - const MCInstrDesc *DupMCID = &TII->get(AArch64::DUPv4i32lane); - const MCInstrDesc *MulMCID = &TII->get(AArch64::FMULv4f32); - - if (!shouldReplaceInstruction(MF, IndexMulMCID, DupMCID, MulMCID, - VecInstElemTable)) - return true; - return false; -} - -/// Check whether an equivalent DUP instruction has already been -/// created or not. -/// Return true when the dup instruction already exists. In this case, -/// DestReg will point to the destination of the already created DUP. -bool AArch64VectorByElementOpt::reuseDUP(MachineInstr &MI, unsigned DupOpcode, - unsigned SrcReg, unsigned LaneNumber, - unsigned *DestReg) const { - for (MachineBasicBlock::iterator MII = MI, MIE = MI.getParent()->begin(); - MII != MIE;) { - MII--; - MachineInstr *CurrentMI = &*MII; - - if (CurrentMI->getOpcode() == DupOpcode && - CurrentMI->getNumOperands() == 3 && - CurrentMI->getOperand(1).getReg() == SrcReg && - CurrentMI->getOperand(2).getImm() == LaneNumber) { - *DestReg = CurrentMI->getOperand(0).getReg(); - return true; - } - } - - return false; -} - -/// Certain SIMD instructions with vector element operand are not efficient. -/// Rewrite them into SIMD instructions with vector operands. This rewrite -/// is driven by the latency of the instructions. -/// The instruction of concerns are for the time being fmla, fmls, fmul, -/// and fmulx and hence they are hardcoded. -/// -/// Example: -/// fmla v0.4s, v1.4s, v2.s[1] -/// is rewritten into -/// dup v3.4s, v2.s[1] // dup not necessary if redundant -/// fmla v0.4s, v1.4s, v3.4s -/// Return true if the SIMD instruction is modified. -bool AArch64VectorByElementOpt::optimizeVectElement( - MachineInstr &MI, std::map *VecInstElemTable) const { - const MCInstrDesc *MulMCID, *DupMCID; - const TargetRegisterClass *RC = &AArch64::FPR128RegClass; - - switch (MI.getOpcode()) { - default: - return false; - - // 4X32 instructions - case AArch64::FMLAv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMLAv4f32); - break; - case AArch64::FMLSv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMLSv4f32); - break; - case AArch64::FMULXv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMULXv4f32); - break; - case AArch64::FMULv4i32_indexed: - DupMCID = &TII->get(AArch64::DUPv4i32lane); - MulMCID = &TII->get(AArch64::FMULv4f32); - break; - - // 2X64 instructions - case AArch64::FMLAv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMLAv2f64); - break; - case AArch64::FMLSv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMLSv2f64); - break; - case AArch64::FMULXv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMULXv2f64); - break; - case AArch64::FMULv2i64_indexed: - DupMCID = &TII->get(AArch64::DUPv2i64lane); - MulMCID = &TII->get(AArch64::FMULv2f64); - break; - - // 2X32 instructions - case AArch64::FMLAv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMLAv2f32); - break; - case AArch64::FMLSv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMLSv2f32); - break; - case AArch64::FMULXv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMULXv2f32); - break; - case AArch64::FMULv2i32_indexed: - RC = &AArch64::FPR64RegClass; - DupMCID = &TII->get(AArch64::DUPv2i32lane); - MulMCID = &TII->get(AArch64::FMULv2f32); - break; - } - - if (!shouldReplaceInstruction(MI.getParent()->getParent(), - &TII->get(MI.getOpcode()), DupMCID, MulMCID, - *VecInstElemTable)) - return false; - - const DebugLoc &DL = MI.getDebugLoc(); - MachineBasicBlock &MBB = *MI.getParent(); - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - - // get the operands of the current SIMD arithmetic instruction. - unsigned MulDest = MI.getOperand(0).getReg(); - unsigned SrcReg0 = MI.getOperand(1).getReg(); - unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill()); - unsigned SrcReg1 = MI.getOperand(2).getReg(); - unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill()); - unsigned DupDest; - - // Instructions of interest have either 4 or 5 operands. - if (MI.getNumOperands() == 5) { - unsigned SrcReg2 = MI.getOperand(3).getReg(); - unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill()); - unsigned LaneNumber = MI.getOperand(4).getImm(); - - // Create a new DUP instruction. Note that if an equivalent DUP instruction - // has already been created before, then use that one instread of creating - // a new one. - if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) { - DupDest = MRI.createVirtualRegister(RC); - BuildMI(MBB, MI, DL, *DupMCID, DupDest) - .addReg(SrcReg2, Src2IsKill) - .addImm(LaneNumber); - } - BuildMI(MBB, MI, DL, *MulMCID, MulDest) - .addReg(SrcReg0, Src0IsKill) - .addReg(SrcReg1, Src1IsKill) - .addReg(DupDest, Src2IsKill); - } else if (MI.getNumOperands() == 4) { - unsigned LaneNumber = MI.getOperand(3).getImm(); - if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) { - DupDest = MRI.createVirtualRegister(RC); - BuildMI(MBB, MI, DL, *DupMCID, DupDest) - .addReg(SrcReg1, Src1IsKill) - .addImm(LaneNumber); - } - BuildMI(MBB, MI, DL, *MulMCID, MulDest) - .addReg(SrcReg0, Src0IsKill) - .addReg(DupDest, Src1IsKill); - } else { - return false; - } - - ++NumModifiedInstr; - return true; -} - -bool AArch64VectorByElementOpt::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) - return false; - - TII = MF.getSubtarget().getInstrInfo(); - MRI = &MF.getRegInfo(); - const TargetSubtargetInfo &ST = MF.getSubtarget(); - const AArch64InstrInfo *AAII = - static_cast(ST.getInstrInfo()); - if (!AAII) - return false; - SchedModel.init(ST.getSchedModel(), &ST, AAII); - if (!SchedModel.hasInstrSchedModel()) - return false; - - // A simple check to exit this pass early for targets that do not need it. - if (earlyExitVectElement(&MF)) - return false; - - bool Changed = false; - std::map VecInstElemTable; - SmallVector RemoveMIs; - - for (MachineBasicBlock &MBB : MF) { - for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); - MII != MIE;) { - MachineInstr &MI = *MII; - if (optimizeVectElement(MI, &VecInstElemTable)) { - // Add MI to the list of instructions to be removed given that it has - // been replaced. - RemoveMIs.push_back(&MI); - Changed = true; - } - ++MII; - } - } - - for (MachineInstr *MI : RemoveMIs) - MI->eraseFromParent(); - - return Changed; -} - -/// createAArch64VectorByElementOptPass - returns an instance of the -/// vector by element optimization pass. -FunctionPass *llvm::createAArch64VectorByElementOptPass() { - return new AArch64VectorByElementOpt(); -} diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2763a5b3a905..34e1fce72438 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -59,7 +59,12 @@ using namespace llvm; namespace { -enum class RegKind {Scalar, NeonVector, SVEDataVector}; +enum class RegKind { + Scalar, + NeonVector, + SVEDataVector, + SVEPredicateVector +}; class AArch64AsmParser : public MCTargetAsmParser { private: @@ -134,6 +139,8 @@ class AArch64AsmParser : public MCTargetAsmParser { OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands); template OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands); + OperandMatchResultTy tryParseSVEPredicateVector(OperandVector &Operands); + OperandMatchResultTy tryParseSVEPattern(OperandVector &Operands); public: enum AArch64MatchResultTy { @@ -464,50 +471,32 @@ class AArch64Operand : public MCParsedAsmOperand { bool isImm() const override { return Kind == k_Immediate; } bool isMem() const override { return false; } - bool isSImm9() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val < 256); - } - bool isSImm10s8() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -4096 && Val < 4089 && (Val & 7) == 0); - } - bool isSImm7s4() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val <= 252 && (Val & 3) == 0); - } - bool isSImm7s8() const { + + template bool isSImm() const { return isSImmScaled(); } + + template bool isSImmScaled() const { if (!isImm()) return false; const MCConstantExpr *MCE = dyn_cast(getImm()); if (!MCE) return false; + + int64_t Shift = Bits - 1; + int64_t MinVal = (int64_t(1) << Shift) * -Scale; + int64_t MaxVal = ((int64_t(1) << Shift) - 1) * Scale; + int64_t Val = MCE->getValue(); - return (Val >= -512 && Val <= 504 && (Val & 7) == 0); + return Val >= MinVal && Val <= MaxVal && (Val % Scale) == 0; } - bool isSImm7s16() const { + + bool isSVEPattern() const { if (!isImm()) return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); + auto *MCE = dyn_cast(getImm()); if (!MCE) return false; int64_t Val = MCE->getValue(); - return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0); + return Val >= 0 && Val < 32; } bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const { @@ -813,6 +802,10 @@ class AArch64Operand : public MCParsedAsmOperand { } bool isReg() const override { + return Kind == k_Register; + } + + bool isScalarReg() const { return Kind == k_Register && Reg.Kind == RegKind::Scalar; } @@ -826,14 +819,27 @@ class AArch64Operand : public MCParsedAsmOperand { Reg.RegNum); } - template - bool isSVEDataVectorReg() const { - return (Kind == k_Register && Reg.Kind == RegKind::SVEDataVector) && + template bool isSVEVectorReg() const { + RegKind RK; + switch (Class) { + case AArch64::ZPRRegClassID: + RK = RegKind::SVEDataVector; + break; + case AArch64::PPRRegClassID: + case AArch64::PPR_3bRegClassID: + RK = RegKind::SVEPredicateVector; + break; + default: + llvm_unreachable("Unsupport register class"); + } + + return (Kind == k_Register && Reg.Kind == RK) && AArch64MCRegisterClasses[Class].contains(getReg()); } - template bool isSVEDataVectorRegOfWidth() const { - return isSVEDataVectorReg() && + template + bool isSVEVectorRegOfWidth() const { + return isSVEVectorReg() && (ElementWidth == -1 || Reg.ElementWidth == ElementWidth); } @@ -1058,7 +1064,7 @@ class AArch64Operand : public MCParsedAsmOperand { // ambiguity in the matcher. template bool isSImm9OffsetFB() const { - return isSImm9() && !isUImm12Offset(); + return isSImm<9>() && !isUImm12Offset(); } bool isAdrpLabel() const { @@ -1926,6 +1932,27 @@ static unsigned matchSVEDataVectorRegName(StringRef Name) { .Default(0); } +static unsigned matchSVEPredicateVectorRegName(StringRef Name) { + return StringSwitch(Name.lower()) + .Case("p0", AArch64::P0) + .Case("p1", AArch64::P1) + .Case("p2", AArch64::P2) + .Case("p3", AArch64::P3) + .Case("p4", AArch64::P4) + .Case("p5", AArch64::P5) + .Case("p6", AArch64::P6) + .Case("p7", AArch64::P7) + .Case("p8", AArch64::P8) + .Case("p9", AArch64::P9) + .Case("p10", AArch64::P10) + .Case("p11", AArch64::P11) + .Case("p12", AArch64::P12) + .Case("p13", AArch64::P13) + .Case("p14", AArch64::P14) + .Case("p15", AArch64::P15) + .Default(0); +} + static bool isValidSVEKind(StringRef Name) { return StringSwitch(Name.lower()) .Case(".b", true) @@ -1936,10 +1963,6 @@ static bool isValidSVEKind(StringRef Name) { .Default(false); } -static bool isSVEDataVectorRegister(StringRef Name) { - return Name[0] == 'z'; -} - static void parseValidVectorKind(StringRef Name, unsigned &NumElements, char &ElementKind) { assert(isValidVectorKind(Name)); @@ -1969,18 +1992,19 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, // Matches a register name or register alias previously defined by '.req' unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, RegKind Kind) { - unsigned RegNum; - switch (Kind) { - case RegKind::Scalar: - RegNum = MatchRegisterName(Name); - break; - case RegKind::NeonVector: - RegNum = MatchNeonVectorRegName(Name); - break; - case RegKind::SVEDataVector: - RegNum = matchSVEDataVectorRegName(Name); - break; - } + unsigned RegNum = 0; + if ((RegNum = matchSVEDataVectorRegName(Name))) + return Kind == RegKind::SVEDataVector ? RegNum : 0; + + if ((RegNum = matchSVEPredicateVectorRegName(Name))) + return Kind == RegKind::SVEPredicateVector ? RegNum : 0; + + if ((RegNum = MatchNeonVectorRegName(Name))) + return Kind == RegKind::NeonVector ? RegNum : 0; + + // The parsed register must be of RegKind Scalar + if ((RegNum = MatchRegisterName(Name))) + return Kind == RegKind::Scalar ? RegNum : 0; if (!RegNum) { // Check for aliases registered via .req. Canonicalize to lower case. @@ -2007,10 +2031,8 @@ int AArch64AsmParser::tryParseRegister() { return -1; std::string lowerCase = Tok.getString().lower(); - if (isSVEDataVectorRegister(lowerCase)) - return -1; - unsigned RegNum = matchRegisterNameAlias(lowerCase, RegKind::Scalar); + // Also handle a few aliases of registers. if (RegNum == 0) RegNum = StringSwitch(lowerCase) @@ -2742,6 +2764,66 @@ AArch64AsmParser::tryParseSVERegister(int &Reg, StringRef &Kind, return MatchOperand_NoMatch; } +/// tryParseSVEPredicateVector - Parse a SVE predicate register operand. +OperandMatchResultTy +AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) { + // Check for a SVE predicate register specifier first. + const SMLoc S = getLoc(); + StringRef Kind; + int RegNum = -1; + auto Res = tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector); + if (Res != MatchOperand_Success) + return Res; + + unsigned ElementWidth = StringSwitch(Kind.lower()) + .Case("", -1) + .Case(".b", 8) + .Case(".h", 16) + .Case(".s", 32) + .Case(".d", 64) + .Case(".q", 128) + .Default(0); + + if (!ElementWidth) + return MatchOperand_NoMatch; + + Operands.push_back( + AArch64Operand::CreateReg(RegNum, RegKind::SVEPredicateVector, + ElementWidth, S, getLoc(), getContext())); + + // Not all predicates are followed by a '/m' or '/z'. + MCAsmParser &Parser = getParser(); + if (Parser.getTok().isNot(AsmToken::Slash)) + return MatchOperand_Success; + + // But when they do they shouldn't have an element type suffix. + if (!Kind.empty()) { + Error(S, "not expecting size suffix"); + return MatchOperand_ParseFail; + } + + // Add a literal slash as operand + Operands.push_back( + AArch64Operand::CreateToken("/" , false, getLoc(), getContext())); + + Parser.Lex(); // Eat the slash. + + // Zeroing or merging? + auto Pred = Parser.getTok().getString().lower(); + if (Pred != "z" && Pred != "m") { + Error(getLoc(), "expecting 'm' or 'z' predication"); + return MatchOperand_ParseFail; + } + + // Add zero/merge token. + const char *ZM = Pred == "z" ? "z" : "m"; + Operands.push_back( + AArch64Operand::CreateToken(ZM, false, getLoc(), getContext())); + + Parser.Lex(); // Eat zero/merge token. + return MatchOperand_Success; +} + /// parseRegister - Parse a non-vector register operand. bool AArch64AsmParser::parseRegister(OperandVector &Operands) { SMLoc S = getLoc(); @@ -2962,9 +3044,12 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, bool invertCondCode) { MCAsmParser &Parser = getParser(); + + OperandMatchResultTy ResTy = + MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/ true); + // Check if the current operand has a custom associated parser, if so, try to // custom parse the operand, or fallback to the general approach. - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); if (ResTy == MatchOperand_Success) return false; // If there wasn't a custom match, try the generic matcher below. Otherwise, @@ -3081,7 +3166,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, return true; if (Operands.size() < 2 || - !static_cast(*Operands[1]).isReg()) + !static_cast(*Operands[1]).isScalarReg()) return Error(Loc, "Only valid when first operand is register"); bool IsXReg = @@ -3442,6 +3527,8 @@ static std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS, bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, OperandVector &Operands) { switch (ErrCode) { + case Match_InvalidTiedOperand: + return Error(Loc, "operand must match destination register"); case Match_MissingFeature: return Error(Loc, "instruction requires a CPU feature not currently enabled"); @@ -3475,6 +3562,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, case Match_InvalidFPImm: return Error(Loc, "expected compatible register or floating-point constant"); + case Match_InvalidMemoryIndexedSImm6: + return Error(Loc, "index must be an integer in range [-32, 31]."); case Match_InvalidMemoryIndexedSImm9: return Error(Loc, "index must be an integer in range [-256, 255]."); case Match_InvalidMemoryIndexedSImm10: @@ -3575,6 +3664,20 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, ComputeAvailableFeatures(STI->getFeatureBits())); return Error(Loc, "unrecognized instruction mnemonic" + Suggestion); } + case Match_InvalidSVEPattern: + return Error(Loc, "invalid predicate pattern"); + case Match_InvalidSVEPredicateAnyReg: + case Match_InvalidSVEPredicateBReg: + case Match_InvalidSVEPredicateHReg: + case Match_InvalidSVEPredicateSReg: + case Match_InvalidSVEPredicateDReg: + return Error(Loc, "invalid predicate register."); + case Match_InvalidSVEPredicate3bAnyReg: + case Match_InvalidSVEPredicate3bBReg: + case Match_InvalidSVEPredicate3bHReg: + case Match_InvalidSVEPredicate3bSReg: + case Match_InvalidSVEPredicate3bDReg: + return Error(Loc, "restricted predicate has range [0, 7]."); default: llvm_unreachable("unexpected error code!"); } @@ -3597,7 +3700,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumOperands == 4 && Tok == "lsl") { AArch64Operand &Op2 = static_cast(*Operands[2]); AArch64Operand &Op3 = static_cast(*Operands[3]); - if (Op2.isReg() && Op3.isImm()) { + if (Op2.isScalarReg() && Op3.isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3.getImm()); if (Op3CE) { uint64_t Op3Val = Op3CE->getValue(); @@ -3629,7 +3732,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand LSBOp = static_cast(*Operands[2]); AArch64Operand WidthOp = static_cast(*Operands[3]); - if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) { + if (Op1.isScalarReg() && LSBOp.isImm() && WidthOp.isImm()) { const MCConstantExpr *LSBCE = dyn_cast(LSBOp.getImm()); const MCConstantExpr *WidthCE = dyn_cast(WidthOp.getImm()); @@ -3685,7 +3788,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &Op3 = static_cast(*Operands[3]); AArch64Operand &Op4 = static_cast(*Operands[4]); - if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3.getImm()); const MCConstantExpr *Op4CE = dyn_cast(Op4.getImm()); @@ -3749,7 +3852,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, AArch64Operand &Op3 = static_cast(*Operands[3]); AArch64Operand &Op4 = static_cast(*Operands[4]); - if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) { const MCConstantExpr *Op3CE = dyn_cast(Op3.getImm()); const MCConstantExpr *Op4CE = dyn_cast(Op4.getImm()); @@ -3796,6 +3899,31 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } } } + + // The Cyclone CPU and early successors didn't execute the zero-cycle zeroing + // instruction for FP registers correctly in some rare circumstances. Convert + // it to a safe instruction and warn (because silently changing someone's + // assembly is rude). + if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] && + NumOperands == 4 && Tok == "movi") { + AArch64Operand &Op1 = static_cast(*Operands[1]); + AArch64Operand &Op2 = static_cast(*Operands[2]); + AArch64Operand &Op3 = static_cast(*Operands[3]); + if ((Op1.isToken() && Op2.isNeonVectorReg() && Op3.isImm()) || + (Op1.isNeonVectorReg() && Op2.isToken() && Op3.isImm())) { + StringRef Suffix = Op1.isToken() ? Op1.getToken() : Op2.getToken(); + if (Suffix.lower() == ".2d" && + cast(Op3.getImm())->getValue() == 0) { + Warning(IDLoc, "instruction movi.2d with immediate #0 may not function" + " correctly on this CPU, converting to equivalent movi.16b"); + // Switch the suffix to .16b. + unsigned Idx = Op1.isToken() ? 1 : 2; + Operands[Idx] = AArch64Operand::CreateToken(".16b", false, IDLoc, + getContext()); + } + } + } + // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands. // InstAlias can't quite handle this since the reg classes aren't // subclasses. @@ -3803,7 +3931,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. AArch64Operand &Op = static_cast(*Operands[2]); - if (Op.isReg()) { + if (Op.isScalarReg()) { unsigned Reg = getXRegFromWReg(Op.getReg()); Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, Op.getStartLoc(), Op.getEndLoc(), @@ -3813,13 +3941,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // FIXME: Likewise for sxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) { AArch64Operand &Op = static_cast(*Operands[1]); - if (Op.isReg() && + if (Op.isScalarReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. AArch64Operand &Op = static_cast(*Operands[2]); - if (Op.isReg()) { + if (Op.isScalarReg()) { unsigned Reg = getXRegFromWReg(Op.getReg()); Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, Op.getStartLoc(), @@ -3830,13 +3958,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // FIXME: Likewise for uxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) { AArch64Operand &Op = static_cast(*Operands[1]); - if (Op.isReg() && + if (Op.isScalarReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR32. Twiddle it here if necessary. AArch64Operand &Op = static_cast(*Operands[1]); - if (Op.isReg()) { + if (Op.isScalarReg()) { unsigned Reg = getWRegFromXReg(Op.getReg()); Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar, Op.getStartLoc(), @@ -3924,6 +4052,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return showMatchError(ErrorLoc, MatchResult, Operands); } + case Match_InvalidTiedOperand: case Match_InvalidMemoryIndexed1: case Match_InvalidMemoryIndexed2: case Match_InvalidMemoryIndexed4: @@ -3949,6 +4078,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMemoryXExtend32: case Match_InvalidMemoryXExtend64: case Match_InvalidMemoryXExtend128: + case Match_InvalidMemoryIndexedSImm6: case Match_InvalidMemoryIndexed4SImm7: case Match_InvalidMemoryIndexed8SImm7: case Match_InvalidMemoryIndexed16SImm7: @@ -3974,6 +4104,17 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidLabel: case Match_InvalidComplexRotationEven: case Match_InvalidComplexRotationOdd: + case Match_InvalidSVEPredicateAnyReg: + case Match_InvalidSVEPattern: + case Match_InvalidSVEPredicateBReg: + case Match_InvalidSVEPredicateHReg: + case Match_InvalidSVEPredicateSReg: + case Match_InvalidSVEPredicateDReg: + case Match_InvalidSVEPredicate3bAnyReg: + case Match_InvalidSVEPredicate3bBReg: + case Match_InvalidSVEPredicate3bHReg: + case Match_InvalidSVEPredicate3bSReg: + case Match_InvalidSVEPredicate3bDReg: case Match_MSR: case Match_MRS: { if (ErrorInfo >= Operands.size()) @@ -4324,6 +4465,20 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { "sve vector register without type specifier expected"); } + if (RegNum == -1) { + StringRef Kind; + RegisterKind = RegKind::SVEPredicateVector; + OperandMatchResultTy Res = + tryParseSVERegister(RegNum, Kind, RegKind::SVEPredicateVector); + + if (Res == MatchOperand_ParseFail) + return true; + + if (Res == MatchOperand_Success && !Kind.empty()) + return Error(SRegLoc, + "sve predicate register without type specifier expected"); + } + if (RegNum == -1) return Error(SRegLoc, "register name or alias expected"); @@ -4584,3 +4739,47 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) { return MatchOperand_Success; } + +OperandMatchResultTy +AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + + SMLoc SS = getLoc(); + const AsmToken &TokE = Parser.getTok(); + bool IsHash = TokE.is(AsmToken::Hash); + + if (!IsHash && TokE.isNot(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + int64_t Pattern; + if (IsHash) { + Parser.Lex(); // Eat hash + + // Parse the immediate operand. + const MCExpr *ImmVal; + SS = getLoc(); + if (Parser.parseExpression(ImmVal)) + return MatchOperand_ParseFail; + + auto *MCE = dyn_cast(ImmVal); + if (!MCE) + return MatchOperand_ParseFail; + + Pattern = MCE->getValue(); + } else { + // Parse the pattern + auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByName(TokE.getString()); + if (!Pat) + return MatchOperand_NoMatch; + + Parser.Lex(); + Pattern = Pat->Encoding; + assert(Pattern >= 0 && Pattern < 32); + } + + Operands.push_back( + AArch64Operand::CreateImm(MCConstantExpr::create(Pattern, getContext()), + SS, getLoc(), getContext())); + + return MatchOperand_Success; +} diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index eb1079be7300..3d4b9dcf7e8d 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -53,7 +53,7 @@ add_llvm_target(AArch64CodeGen AArch64TargetMachine.cpp AArch64TargetObjectFile.cpp AArch64TargetTransformInfo.cpp - AArch64VectorByElementOpt.cpp + AArch64SIMDInstrOpt.cpp DEPENDS intrinsics_gen diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index aea1b4f2d2c4..583a07f6a7be 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -88,6 +88,12 @@ static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decode); +static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decode); +static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decode); static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, uint64_t Address, @@ -461,6 +467,33 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo, return Success; } +static const unsigned PPRDecoderTable[] = { + AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3, + AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7, + AArch64::P8, AArch64::P9, AArch64::P10, AArch64::P11, + AArch64::P12, AArch64::P13, AArch64::P14, AArch64::P15 +}; + +static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { + if (RegNo > 15) + return Fail; + + unsigned Register = PPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + +static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void* Decoder) { + if (RegNo > 7) + return Fail; + + // Just reuse the PPR decode table + return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder); +} + static const unsigned VectorDecoderTable[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9, diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index bdf71b095fda..119de4c08d3a 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -1340,6 +1340,16 @@ void AArch64InstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo, O << "#" << (Val * Angle) + Remainder; } +void AArch64InstPrinter::printSVEPattern(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNum).getImm(); + if (auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByEncoding(Val)) + O << Pat->Name; + else + O << '#' << formatImm(Val); +} + template void AArch64InstPrinter::printSVERegOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 76f20f042cef..baf11e5c9c61 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -17,6 +17,7 @@ #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" +#include "../Utils/AArch64BaseInfo.h" namespace llvm { @@ -165,6 +166,8 @@ class AArch64InstPrinter : public MCInstPrinter { void printGPRSeqPairsClassOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + void printSVEPattern(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); template void printSVERegOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 7b33b4b5b542..4d1d3fd57353 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -605,10 +605,10 @@ class COFFAArch64AsmBackend : public AArch64AsmBackend { } MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TheTriple, - StringRef CPU, const MCTargetOptions &Options) { + const Triple &TheTriple = STI.getTargetTriple(); if (TheTriple.isOSBinFormatMachO()) return new DarwinAArch64AsmBackend(T, TheTriple, MRI); @@ -624,10 +624,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, } MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TheTriple, - StringRef CPU, const MCTargetOptions &Options) { + const Triple &TheTriple = STI.getTargetTriple(); assert(TheTriple.isOSBinFormatELF() && "Big endian is only supported for ELF targets!"); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index c5da457c38ff..12b5a27b7699 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -104,6 +104,11 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; + + Data16bitsDirective = "\t.hword\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = "\t.xword\t"; + AlignmentIsInBytes = false; SupportsDebugInformation = true; CodePointerSize = 8; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 97c92fa0778d..f606d272bcb0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AArch64MCExpr.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index b9e1673b9317..a5720e0e8b87 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -45,12 +45,12 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); MCAsmBackend *createAArch64leAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); MCAsmBackend *createAArch64beAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); std::unique_ptr diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index 9c36deea8136..078ae683110d 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -11,8 +11,105 @@ // //===----------------------------------------------------------------------===// +def SVEPatternOperand : AsmOperandClass { + let Name = "SVEPattern"; + let ParserMethod = "tryParseSVEPattern"; + let PredicateMethod = "isSVEPattern"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidSVEPattern"; +} + +def sve_pred_enum : Operand, ImmLeaf { + + let PrintMethod = "printSVEPattern"; + let ParserMatchClass = SVEPatternOperand; +} + +//===----------------------------------------------------------------------===// +// SVE PTrue - These are used extensively throughout the pattern matching so +// it's important we define them first. +//===----------------------------------------------------------------------===// + +class sve_int_ptrue sz8_64, bits<3> opc, string asm, PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins sve_pred_enum:$pattern), + asm, "\t$Pd, $pattern", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<5> pattern; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz8_64; + let Inst{21-19} = 0b011; + let Inst{18-17} = opc{2-1}; + let Inst{16} = opc{0}; + let Inst{15-10} = 0b111000; + let Inst{9-5} = pattern; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; + + let Defs = !if(!eq (opc{0}, 1), [NZCV], []); +} + +multiclass sve_int_ptrue opc, string asm> { + def _B : sve_int_ptrue<0b00, opc, asm, PPR8>; + def _H : sve_int_ptrue<0b01, opc, asm, PPR16>; + def _S : sve_int_ptrue<0b10, opc, asm, PPR32>; + def _D : sve_int_ptrue<0b11, opc, asm, PPR64>; + + def : InstAlias(NAME # _B) PPR8:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _H) PPR16:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _S) PPR32:$Pd, 0b11111), 1>; + def : InstAlias(NAME # _D) PPR64:$Pd, 0b11111), 1>; +} + +let Predicates = [HasSVE] in { + defm PTRUE : sve_int_ptrue<0b000, "ptrue">; + defm PTRUES : sve_int_ptrue<0b001, "ptrues">; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Cross Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_dup_r sz8_64, string asm, ZPRRegOp zprty, + RegisterClass srcRegType> +: I<(outs zprty:$Zd), (ins srcRegType:$Rn), + asm, "\t$Zd, $Rn", + "", + []>, Sched<[]> { + bits<5> Rn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-10} = 0b100000001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_dup_r { + def _B : sve_int_perm_dup_r<0b00, asm, ZPR8, GPR32sp>; + def _H : sve_int_perm_dup_r<0b01, asm, ZPR16, GPR32sp>; + def _S : sve_int_perm_dup_r<0b10, asm, ZPR32, GPR32sp>; + def _D : sve_int_perm_dup_r<0b11, asm, ZPR64, GPR64sp>; + + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _B) ZPR8:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _H) ZPR16:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _S) ZPR32:$Zd, GPR32sp:$Rn), 1>; + def : InstAlias<"mov $Zd, $Rn", + (!cast(NAME # _D) ZPR64:$Zd, GPR64sp:$Rn), 1>; +} + //===----------------------------------------------------------------------===// -// SVE Integer Arithmetic - Unpredicated Group. +// SVE Integer Arithmetic - Unpredicated Group. //===----------------------------------------------------------------------===// class sve_int_bin_cons_arit_0 sz8_64, bits<3> opc, string asm, @@ -39,3 +136,133 @@ multiclass sve_int_bin_cons_arit_0 opc, string asm> { def _S : sve_int_bin_cons_arit_0<0b10, opc, asm, ZPR32>; def _D : sve_int_bin_cons_arit_0<0b11, opc, asm, ZPR64>; } + +//===----------------------------------------------------------------------===// +// SVE Stack Allocation Group +//===----------------------------------------------------------------------===// + +class sve_int_arith_vl +: I<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, simm6_32b:$imm6), + asm, "\t$Rd, $Rn, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<5> Rn; + bits<6> imm6; + let Inst{31-23} = 0b000001000; + let Inst{22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Rn; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +class sve_int_read_vl_a opc2, string asm> +: I<(outs GPR64:$Rd), (ins simm6_32b:$imm6), + asm, "\t$Rd, $imm6", + "", + []>, Sched<[]> { + bits<5> Rd; + bits<6> imm6; + let Inst{31-23} = 0b000001001; + let Inst{22} = op; + let Inst{21} = 0b1; + let Inst{20-16} = opc2{4-0}; + let Inst{15-11} = 0b01010; + let Inst{10-5} = imm6; + let Inst{4-0} = Rd; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - In Lane Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_zz opc, bits<2> sz8_64, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), + asm, "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zd; + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +multiclass sve_int_perm_bin_perm_zz opc, string asm> { + def _B : sve_int_perm_bin_perm_zz; + def _H : sve_int_perm_bin_perm_zz; + def _S : sve_int_perm_bin_perm_zz; + def _D : sve_int_perm_bin_perm_zz; +} + +//===----------------------------------------------------------------------===// +// SVE Integer Arithmetic - Binary Predicated Group +//===----------------------------------------------------------------------===// + +class sve_int_bin_pred_arit_log sz8_64, bits<2> fmt, bits<3> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, zprty:$Zm), + asm, "\t$Zdn, $Pg/m, $_Zdn, $Zm", "", []>, Sched<[]> { + bits<3> Pg; + bits<5> Zdn; + bits<5> Zm; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21} = 0b0; + let Inst{20-19} = fmt; + let Inst{18-16} = opc; + let Inst{15-13} = 0b000; + let Inst{12-10} = Pg; + let Inst{9-5} = Zm; + let Inst{4-0} = Zdn; + + let Constraints = "$Zdn = $_Zdn"; +} + +multiclass sve_int_bin_pred_arit_0 opc, string asm> { + def _B : sve_int_bin_pred_arit_log<0b00, 0b00, opc, asm, ZPR8>; + def _H : sve_int_bin_pred_arit_log<0b01, 0b00, opc, asm, ZPR16>; + def _S : sve_int_bin_pred_arit_log<0b10, 0b00, opc, asm, ZPR32>; + def _D : sve_int_bin_pred_arit_log<0b11, 0b00, opc, asm, ZPR64>; +} + +//===----------------------------------------------------------------------===// +// SVE Permute - Predicates Group +//===----------------------------------------------------------------------===// + +class sve_int_perm_bin_perm_pp opc, bits<2> sz8_64, string asm, + PPRRegOp pprty> +: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm), + asm, "\t$Pd, $Pn, $Pm", + "", + []>, Sched<[]> { + bits<4> Pd; + bits<4> Pm; + bits<4> Pn; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b10; + let Inst{19-16} = Pm; + let Inst{15-13} = 0b010; + let Inst{12-10} = opc; + let Inst{9} = 0b0; + let Inst{8-5} = Pn; + let Inst{4} = 0b0; + let Inst{3-0} = Pd; +} + +multiclass sve_int_perm_bin_perm_pp opc, string asm> { + def _B : sve_int_perm_bin_perm_pp; + def _H : sve_int_perm_bin_perm_pp; + def _S : sve_int_perm_bin_perm_pp; + def _D : sve_int_perm_bin_perm_pp; +} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index e65ba1f2401d..a9c4f3854def 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -60,6 +60,13 @@ namespace llvm { } } +namespace llvm { + namespace AArch64SVEPredPattern { +#define GET_SVEPREDPAT_IMPL +#include "AArch64GenSystemOperands.inc" + } +} + namespace llvm { namespace AArch64PState { #define GET_PSTATE_IMPL diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index c1c799b7b349..59390e16d8c7 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -335,6 +335,15 @@ namespace AArch64PRFM { #include "AArch64GenSystemOperands.inc" } +namespace AArch64SVEPredPattern { + struct SVEPREDPAT { + const char *Name; + uint16_t Encoding; + }; +#define GET_SVEPREDPAT_DECL +#include "AArch64GenSystemOperands.inc" +} + namespace AArch64PState { struct PState : SysAlias{ using SysAlias::SysAlias; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index faa9a41c96ac..81ead62fe35d 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -19,6 +19,12 @@ def FeatureFP64 : SubtargetFeature<"fp64", "Enable double precision operations" >; +def FeatureFMA : SubtargetFeature<"fmaf", + "FMA", + "true", + "Enable single precision FMA (not as fast as mul+add, but fused)" +>; + def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "FastFMAF32", "true", @@ -286,6 +292,12 @@ def FeatureIntClamp : SubtargetFeature<"int-clamp-insts", "Support clamp for integer destination" >; +def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem", + "HasUnpackedD16VMem", + "true", + "Has unpacked d16 vmem instructions" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -534,30 +546,34 @@ def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_4 : SubtargetFeatureISAVersion <7,0,4, + [FeatureSeaIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, - FeatureSGPRInitBug]>; + FeatureSGPRInitBug, + FeatureUnpackedD16VMem]>; def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1, [FeatureVolcanicIslands, FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32, - FeatureXNACK]>; + FeatureXNACK, + FeatureUnpackedD16VMem]>; def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2, [FeatureVolcanicIslands, FeatureLDSBankCount32, - FeatureSGPRInitBug]>; + FeatureSGPRInitBug, + FeatureUnpackedD16VMem]>; def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3, [FeatureVolcanicIslands, - FeatureLDSBankCount32]>; - -def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4, - [FeatureVolcanicIslands, - FeatureLDSBankCount32]>; + FeatureLDSBankCount32, + FeatureUnpackedD16VMem]>; def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, [FeatureVolcanicIslands, @@ -570,24 +586,12 @@ def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, FeatureLDSBankCount32 ]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, - [FeatureGFX9, - FeatureMadMixInsts, - FeatureLDSBankCount32, - FeatureXNACK]>; - def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, FeatureMadMixInsts, FeatureLDSBankCount32 ]>; -def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, - [FeatureGFX9, - FeatureMadMixInsts, - FeatureLDSBankCount32, - FeatureXNACK]>; - //===----------------------------------------------------------------------===// // Debugger related subtarget features. //===----------------------------------------------------------------------===// @@ -721,6 +725,15 @@ def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">, def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">, AssemblerPredicate<"FeatureGFX9Insts">; +def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">, + AssemblerPredicate<"FeatureUnpackedD16VMem">; +def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">, + AssemblerPredicate<"!FeatureUnpackedD16VMem">; + + +def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">; +def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">; + def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">, AssemblerPredicate<"FeatureGFX9Insts">; diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 563ca0d236a4..50d1d4351188 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -205,7 +205,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - HSAMetadataStream.emitKernel(*MF->getFunction(), + HSAMetadataStream.emitKernel(MF->getFunction(), getHSACodeProps(*MF, CurrentProgramInfo), getHSADebugProps(*MF, CurrentProgramInfo)); } @@ -215,14 +215,34 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const AMDGPUSubtarget &STM = MF->getSubtarget(); if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) { SmallString<128> SymbolName; - getNameWithPrefix(SymbolName, MF->getFunction()), + getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } + const AMDGPUSubtarget &STI = MF->getSubtarget(); + if (STI.dumpCode()) { + // Disassemble function name label to text. + DisasmLines.push_back(MF->getName().str() + ":"); + DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size()); + HexLines.push_back(""); + } AsmPrinter::EmitFunctionEntryLabel(); } +void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + const AMDGPUSubtarget &STI = MBB.getParent()->getSubtarget(); + if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) { + // Write a line for the basic block label if it is not only fallthrough. + DisasmLines.push_back( + (Twine("BB") + Twine(getFunctionNumber()) + + "_" + Twine(MBB.getNumber()) + ":").str()); + DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size()); + HexLines.push_back(""); + } + AsmPrinter::EmitBasicBlockStart(MBB); +} + void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. @@ -294,7 +314,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { getSIProgramInfo(CurrentProgramInfo, MF); } else { auto I = CallGraphResourceInfo.insert( - std::make_pair(MF.getFunction(), SIFunctionResourceInfo())); + std::make_pair(&MF.getFunction(), SIFunctionResourceInfo())); SIFunctionResourceInfo &Info = I.first->second; assert(I.second && "should only be called once per function"); Info = analyzeResourceUsage(MF); @@ -323,7 +343,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { if (!MFI->isEntryFunction()) { OutStreamer->emitRawComment(" Function info:", false); - SIFunctionResourceInfo &Info = CallGraphResourceInfo[MF.getFunction()]; + SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, Info.getTotalNumSGPRs(MF.getSubtarget()), @@ -406,8 +426,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Context.getELFSection(".AMDGPU.disasm", ELF::SHT_NOTE, 0)); for (size_t i = 0; i < DisasmLines.size(); ++i) { - std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); - Comment += " ; " + HexLines[i] + "\n"; + std::string Comment = "\n"; + if (!HexLines[i].empty()) { + Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' '); + Comment += " ; " + HexLines[i] + "\n"; + } OutStreamer->EmitBytes(StringRef(DisasmLines[i])); OutStreamer->EmitBytes(StringRef(Comment)); @@ -446,7 +469,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { unsigned RsrcReg; if (STM.getGeneration() >= R600Subtarget::EVERGREEN) { // Evergreen / Northern Islands - switch (MF.getFunction()->getCallingConv()) { + switch (MF.getFunction().getCallingConv()) { default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; @@ -455,7 +478,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } } else { // R600 / R700 - switch (MF.getFunction()->getCallingConv()) { + switch (MF.getFunction().getCallingConv()) { default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_GS: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: LLVM_FALLTHROUGH; @@ -470,7 +493,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); - if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4); } @@ -640,6 +663,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( case AMDGPU::FLAT_SCR_HI: continue; + case AMDGPU::XNACK_MASK: + case AMDGPU::XNACK_MASK_LO: + case AMDGPU::XNACK_MASK_HI: + llvm_unreachable("xnack_mask registers should not be used"); + case AMDGPU::TBA: case AMDGPU::TBA_LO: case AMDGPU::TBA_HI: @@ -672,18 +700,24 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( IsSGPR = false; Width = 3; } else if (AMDGPU::SReg_128RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_128RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 4; } else if (AMDGPU::VReg_128RegClass.contains(Reg)) { IsSGPR = false; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 8; } else if (AMDGPU::VReg_256RegClass.contains(Reg)) { IsSGPR = false; Width = 8; } else if (AMDGPU::SReg_512RegClass.contains(Reg)) { + assert(!AMDGPU::TTMP_512RegClass.contains(Reg) && + "trap handler registers should not be used"); IsSGPR = true; Width = 16; } else if (AMDGPU::VReg_512RegClass.contains(Reg)) { @@ -764,9 +798,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; if (!isUInt<32>(ProgInfo.ScratchSize)) { - DiagnosticInfoStackSize DiagStackSize(*MF.getFunction(), + DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ProgInfo.ScratchSize, DS_Error); - MF.getFunction()->getContext().diagnose(DiagStackSize); + MF.getFunction().getContext().diagnose(DiagStackSize); } const SISubtarget &STM = MF.getSubtarget(); @@ -785,8 +819,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. - LLVMContext &Ctx = MF.getFunction()->getContext(); - DiagnosticInfoResourceLimit Diag(*MF.getFunction(), + LLVMContext &Ctx = MF.getFunction().getContext(); + DiagnosticInfoResourceLimit Diag(MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR, DS_Error, DK_ResourceLimit, @@ -813,8 +847,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm to use // the registers which are usually reserved for vcc etc. - LLVMContext &Ctx = MF.getFunction()->getContext(); - DiagnosticInfoResourceLimit Diag(*MF.getFunction(), + LLVMContext &Ctx = MF.getFunction().getContext(); + DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers", ProgInfo.NumSGPR, DS_Error, DK_ResourceLimit, @@ -833,15 +867,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) { - LLVMContext &Ctx = MF.getFunction()->getContext(); - DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs", + LLVMContext &Ctx = MF.getFunction().getContext(); + DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs", MFI->getNumUserSGPRs(), DS_Error); Ctx.diagnose(Diag); } if (MFI->getLDSSize() > static_cast(STM.getLocalMemorySize())) { - LLVMContext &Ctx = MF.getFunction()->getContext(); - DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory", + LLVMContext &Ctx = MF.getFunction().getContext(); + DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory", MFI->getLDSSize(), DS_Error); Ctx.diagnose(Diag); } @@ -954,9 +988,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { const SISubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); - unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv()); + unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv()); - if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4); @@ -974,13 +1008,13 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); unsigned Rsrc2Val = 0; - if (STM.isVGPRSpillingEnabled(*MF.getFunction())) { + if (STM.isVGPRSpillingEnabled(MF.getFunction())) { OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); if (TM.getTargetTriple().getOS() == Triple::AMDPAL) Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0); } - if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); @@ -1013,13 +1047,13 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, // we can use the same fixed value that .AMDGPU.config has for Mesa. Note // that we use a register number rather than a byte offset, so we need to // divide by 4. - unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4; + unsigned Rsrc1Reg = getRsrcReg(MF.getFunction().getCallingConv()) / 4; unsigned Rsrc2Reg = Rsrc1Reg + 1; // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used // with a constant offset to access any non-register shader-specific PAL // metadata key. unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE; - switch (MF.getFunction()->getCallingConv()) { + switch (MF.getFunction().getCallingConv()) { case CallingConv::AMDGPU_PS: ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE; break; @@ -1045,7 +1079,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE; PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU; PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU; - if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1; PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2; // ScratchSize is in bytes, 16 aligned. @@ -1060,7 +1094,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, PALMetadataMap[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16); } - if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) { PALMetadataMap[Rsrc2Reg] |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable(); @@ -1188,6 +1222,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps( HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); + HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); + HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs(); return HSACodeProps; } diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index bf9a5defb1fd..51d48a0c7320 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -181,6 +181,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void EmitFunctionEntryLabel() override; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + void EmitGlobalVariable(const GlobalVariable *GV) override; void EmitStartOfAsmFile(Module &M) override; @@ -195,8 +197,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter { raw_ostream &O) override; protected: - std::vector DisasmLines, HexLines; - size_t DisasmLineMaxLen; + mutable std::vector DisasmLines, HexLines; + mutable size_t DisasmLineMaxLen; AMDGPUAS AMDGPUASI; }; diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 6d6fccb10cb3..5a9138731934 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -43,7 +43,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const SIMachineFunctionInfo *MFI = MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); @@ -64,7 +64,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, unsigned Offset, unsigned DstReg) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b6449b9f2824..440f8b20d48c 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -337,7 +337,8 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, } SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { - if (cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) + if (cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || + !Subtarget->ldsRequiresM0Init()) return N; const SITargetLowering& Lowering = @@ -355,9 +356,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { Ops.push_back(N->getOperand(i)); } Ops.push_back(Glue); - CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); - - return N; + return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); } static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { @@ -451,11 +450,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { } if (isa(N) || - (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) + (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || + Opc == AMDGPUISD::ATOMIC_LOAD_FADD || + Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || + Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) N = glueCopyToM0(N); switch (Opc) { - default: break; + default: + break; // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. @@ -702,6 +705,7 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +// FIXME: Should only handle addcarry/subcarry void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDLoc DL(N); SDValue LHS = N->getOperand(0); @@ -711,8 +715,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); bool ProduceCarry = ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; - bool IsAdd = - (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); + bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); @@ -875,8 +878,12 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, ByteOffset, 16)) { + // FIXME: Select to VOP3 version for with-carry. + unsigned SubOp = Subtarget->hasAddNoCarry() ? + AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + MachineSDNode *MachineSub - = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Zero, Addr.getOperand(1)); Base = SDValue(MachineSub, 0); @@ -945,8 +952,11 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, Zero, Addr.getOperand(1)); if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { + unsigned SubOp = Subtarget->hasAddNoCarry() ? + AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + MachineSDNode *MachineSub - = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, + = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Zero, Addr.getOperand(1)); Base = SDValue(MachineSub, 0); @@ -1157,14 +1167,25 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - // Offsets in vaddr must be positive. + // Offsets in vaddr must be positive if range checking is enabled. // - // The total computation of vaddr + soffset + offset must not overflow. - // If vaddr is negative, even if offset is 0 the sgpr offset add will end up + // The total computation of vaddr + soffset + offset must not overflow. If + // vaddr is negative, even if offset is 0 the sgpr offset add will end up // overflowing. + // + // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would + // always perform a range check. If a negative vaddr base index was used, + // this would fail the range check. The overall address computation would + // compute a valid address, but this doesn't happen due to the range + // check. For out-of-bounds MUBUF loads, a 0 is returned. + // + // Therefore it should be safe to fold any VGPR offset on gfx9 into the + // MUBUF vaddr, but not on older subtargets which can only do this if the + // sign bit is known 0. ConstantSDNode *C1 = cast(N1); if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && - CurDAG->SignBitIsZero(N0)) { + (!Subtarget->privateMemoryResourceIsRangeChecked() || + CurDAG->SignBitIsZero(N0))) { std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; @@ -1656,6 +1677,26 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; SDLoc SL(N); + if (!UseSCCBr) { + // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not + // analyzed what generates the vcc value, so we do not know whether vcc + // bits for disabled lanes are 0. Thus we need to mask out bits for + // disabled lanes. + // + // For the case that we select S_CBRANCH_SCC1 and it gets + // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls + // SIInstrInfo::moveToVALU which inserts the S_AND). + // + // We could add an analysis of what generates the vcc value here and omit + // the S_AND when is unnecessary. But it would be better to add a separate + // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it + // catches both cases. + Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, + CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), + Cond), + 0); + } + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); CurDAG->SelectNodeTo(N, BrOp, MVT::Other, N->getOperand(2), // Basic Block @@ -2062,15 +2103,19 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() { bool IsModified = false; do { IsModified = false; + // Go over all selected nodes and try to fold them a bit more - for (SDNode &Node : CurDAG->allnodes()) { - MachineSDNode *MachineNode = dyn_cast(&Node); + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); + while (Position != CurDAG->allnodes_end()) { + SDNode *Node = &*Position++; + MachineSDNode *MachineNode = dyn_cast(Node); if (!MachineNode) continue; SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); - if (ResNode != &Node) { - ReplaceUses(&Node, ResNode); + if (ResNode != Node) { + if (ResNode) + ReplaceUses(Node, ResNode); IsModified = true; } } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 40166866d38d..0ec2e8ebd349 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1069,7 +1069,7 @@ SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI, SDValue Callee = CLI.Callee; SelectionDAG &DAG = CLI.DAG; - const Function &Fn = *DAG.getMachineFunction().getFunction(); + const Function &Fn = DAG.getMachineFunction().getFunction(); StringRef FuncName(""); @@ -1097,7 +1097,7 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - const Function &Fn = *DAG.getMachineFunction().getFunction(); + const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", SDLoc(Op).getDebugLoc()); @@ -1190,7 +1190,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, } } - const Function &Fn = *DAG.getMachineFunction().getFunction(); + const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadInit( Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc()); DAG.getContext()->diagnose(BadInit); @@ -1336,7 +1336,6 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, return scalarizeVectorLoad(Load, DAG); SDValue BasePtr = Load->getBasePtr(); - EVT PtrVT = BasePtr.getValueType(); EVT MemVT = Load->getMemoryVT(); SDLoc SL(Op); @@ -1357,8 +1356,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue, LoMemVT, BaseAlign, Load->getMemOperand()->getFlags()); - SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Size, SL, PtrVT)); + SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, Size); SDValue HiLoad = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), @@ -1397,10 +1395,7 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT); std::tie(Lo, Hi) = DAG.SplitVector(Val, SL, LoVT, HiVT); - EVT PtrVT = BasePtr.getValueType(); - SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(LoMemVT.getStoreSize(), SL, - PtrVT)); + SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); unsigned BaseAlign = Store->getAlignment(); @@ -3842,9 +3837,8 @@ SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, int64_t Offset) const { MachineFunction &MF = DAG.getMachineFunction(); MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); - SDValue PtrOffset = DAG.getConstant(Offset, SL, MVT::i32); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, StackPtr, PtrOffset); + SDValue Ptr = DAG.getObjectPtrOffset(SL, StackPtr, Offset); SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, 4, MachineMemOperand::MODereferenceable); return Store; @@ -3982,14 +3976,21 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(LOAD_CONSTANT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3) + NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) + NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(ATOMIC_CMP_SWAP) NODE_NAME_CASE(ATOMIC_INC) NODE_NAME_CASE(ATOMIC_DEC) + NODE_NAME_CASE(ATOMIC_LOAD_FADD) + NODE_NAME_CASE(ATOMIC_LOAD_FMIN) + NODE_NAME_CASE(ATOMIC_LOAD_FMAX) NODE_NAME_CASE(BUFFER_LOAD) NODE_NAME_CASE(BUFFER_LOAD_FORMAT) + NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) NODE_NAME_CASE(BUFFER_STORE) NODE_NAME_CASE(BUFFER_STORE_FORMAT) + NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) NODE_NAME_CASE(BUFFER_ATOMIC_ADD) NODE_NAME_CASE(BUFFER_ATOMIC_SUB) @@ -4001,6 +4002,83 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUFFER_ATOMIC_OR) NODE_NAME_CASE(BUFFER_ATOMIC_XOR) NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) + NODE_NAME_CASE(IMAGE_LOAD) + NODE_NAME_CASE(IMAGE_LOAD_MIP) + NODE_NAME_CASE(IMAGE_STORE) + NODE_NAME_CASE(IMAGE_STORE_MIP) + // Basic sample. + NODE_NAME_CASE(IMAGE_SAMPLE) + NODE_NAME_CASE(IMAGE_SAMPLE_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_D) + NODE_NAME_CASE(IMAGE_SAMPLE_D_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_L) + NODE_NAME_CASE(IMAGE_SAMPLE_B) + NODE_NAME_CASE(IMAGE_SAMPLE_B_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_LZ) + NODE_NAME_CASE(IMAGE_SAMPLE_CD) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL) + // Sample with comparison. + NODE_NAME_CASE(IMAGE_SAMPLE_C) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_L) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL) + NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL) + // Sample with offsets. + NODE_NAME_CASE(IMAGE_SAMPLE_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_D_O) + NODE_NAME_CASE(IMAGE_SAMPLE_D_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_L_O) + NODE_NAME_CASE(IMAGE_SAMPLE_B_O) + NODE_NAME_CASE(IMAGE_SAMPLE_B_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_LZ_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_O) + NODE_NAME_CASE(IMAGE_SAMPLE_CD_CL_O) + // Sample with comparison and offsets. + NODE_NAME_CASE(IMAGE_SAMPLE_C_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_D_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_L_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_B_CL_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_LZ_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_O) + NODE_NAME_CASE(IMAGE_SAMPLE_C_CD_CL_O) + // Basic gather4. + NODE_NAME_CASE(IMAGE_GATHER4) + NODE_NAME_CASE(IMAGE_GATHER4_CL) + NODE_NAME_CASE(IMAGE_GATHER4_L) + NODE_NAME_CASE(IMAGE_GATHER4_B) + NODE_NAME_CASE(IMAGE_GATHER4_B_CL) + NODE_NAME_CASE(IMAGE_GATHER4_LZ) + // Gather4 with comparison. + NODE_NAME_CASE(IMAGE_GATHER4_C) + NODE_NAME_CASE(IMAGE_GATHER4_C_CL) + NODE_NAME_CASE(IMAGE_GATHER4_C_L) + NODE_NAME_CASE(IMAGE_GATHER4_C_B) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL) + NODE_NAME_CASE(IMAGE_GATHER4_C_LZ) + // Gather4 with offsets. + NODE_NAME_CASE(IMAGE_GATHER4_O) + NODE_NAME_CASE(IMAGE_GATHER4_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_L_O) + NODE_NAME_CASE(IMAGE_GATHER4_B_O) + NODE_NAME_CASE(IMAGE_GATHER4_B_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_LZ_O) + // Gather4 with comparison and offsets. + NODE_NAME_CASE(IMAGE_GATHER4_C_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_L_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_B_CL_O) + NODE_NAME_CASE(IMAGE_GATHER4_C_LZ_O) + case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; } return nullptr; diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 3f8a9b1964ca..a16402e3c98d 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -202,6 +202,16 @@ class AMDGPUTargetLowering : public TargetLowering { const char* getTargetNodeName(unsigned Opcode) const override; + // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection + // for AMDGPU. + // A commit ( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319036 + // 91177308-0d34-0410-b5e6-96231b3b80d8 ) turned on + // MergeConsecutiveStores() before Instruction Selection for all targets. + // Enough AMDGPU compiles go into an infinite loop ( MergeConsecutiveStores() + // merges two stores; LegalizeStoreOps() un-merges; MergeConsecutiveStores() + // re-merges, etc. ) to warrant turning it off for now. + bool mergeStoresAfterLegalization() const override { return false; } + bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; } @@ -441,14 +451,21 @@ enum NodeType : unsigned { LOAD_CONSTANT, TBUFFER_STORE_FORMAT, TBUFFER_STORE_FORMAT_X3, + TBUFFER_STORE_FORMAT_D16, TBUFFER_LOAD_FORMAT, + TBUFFER_LOAD_FORMAT_D16, ATOMIC_CMP_SWAP, ATOMIC_INC, ATOMIC_DEC, + ATOMIC_LOAD_FADD, + ATOMIC_LOAD_FMIN, + ATOMIC_LOAD_FMAX, BUFFER_LOAD, BUFFER_LOAD_FORMAT, + BUFFER_LOAD_FORMAT_D16, BUFFER_STORE, BUFFER_STORE_FORMAT, + BUFFER_STORE_FORMAT_D16, BUFFER_ATOMIC_SWAP, BUFFER_ATOMIC_ADD, BUFFER_ATOMIC_SUB, @@ -460,6 +477,91 @@ enum NodeType : unsigned { BUFFER_ATOMIC_OR, BUFFER_ATOMIC_XOR, BUFFER_ATOMIC_CMPSWAP, + IMAGE_LOAD, + IMAGE_LOAD_MIP, + IMAGE_STORE, + IMAGE_STORE_MIP, + + // Basic sample. + IMAGE_SAMPLE, + IMAGE_SAMPLE_CL, + IMAGE_SAMPLE_D, + IMAGE_SAMPLE_D_CL, + IMAGE_SAMPLE_L, + IMAGE_SAMPLE_B, + IMAGE_SAMPLE_B_CL, + IMAGE_SAMPLE_LZ, + IMAGE_SAMPLE_CD, + IMAGE_SAMPLE_CD_CL, + + // Sample with comparison. + IMAGE_SAMPLE_C, + IMAGE_SAMPLE_C_CL, + IMAGE_SAMPLE_C_D, + IMAGE_SAMPLE_C_D_CL, + IMAGE_SAMPLE_C_L, + IMAGE_SAMPLE_C_B, + IMAGE_SAMPLE_C_B_CL, + IMAGE_SAMPLE_C_LZ, + IMAGE_SAMPLE_C_CD, + IMAGE_SAMPLE_C_CD_CL, + + // Sample with offsets. + IMAGE_SAMPLE_O, + IMAGE_SAMPLE_CL_O, + IMAGE_SAMPLE_D_O, + IMAGE_SAMPLE_D_CL_O, + IMAGE_SAMPLE_L_O, + IMAGE_SAMPLE_B_O, + IMAGE_SAMPLE_B_CL_O, + IMAGE_SAMPLE_LZ_O, + IMAGE_SAMPLE_CD_O, + IMAGE_SAMPLE_CD_CL_O, + + // Sample with comparison and offsets. + IMAGE_SAMPLE_C_O, + IMAGE_SAMPLE_C_CL_O, + IMAGE_SAMPLE_C_D_O, + IMAGE_SAMPLE_C_D_CL_O, + IMAGE_SAMPLE_C_L_O, + IMAGE_SAMPLE_C_B_O, + IMAGE_SAMPLE_C_B_CL_O, + IMAGE_SAMPLE_C_LZ_O, + IMAGE_SAMPLE_C_CD_O, + IMAGE_SAMPLE_C_CD_CL_O, + + // Basic gather4. + IMAGE_GATHER4, + IMAGE_GATHER4_CL, + IMAGE_GATHER4_L, + IMAGE_GATHER4_B, + IMAGE_GATHER4_B_CL, + IMAGE_GATHER4_LZ, + + // Gather4 with comparison. + IMAGE_GATHER4_C, + IMAGE_GATHER4_C_CL, + IMAGE_GATHER4_C_L, + IMAGE_GATHER4_C_B, + IMAGE_GATHER4_C_B_CL, + IMAGE_GATHER4_C_LZ, + + // Gather4 with offsets. + IMAGE_GATHER4_O, + IMAGE_GATHER4_CL_O, + IMAGE_GATHER4_L_O, + IMAGE_GATHER4_B_O, + IMAGE_GATHER4_B_CL_O, + IMAGE_GATHER4_LZ_O, + + // Gather4 with comparison and offsets. + IMAGE_GATHER4_C_O, + IMAGE_GATHER4_C_CL_O, + IMAGE_GATHER4_C_L_O, + IMAGE_GATHER4_C_B_O, + IMAGE_GATHER4_C_B_CL_O, + IMAGE_GATHER4_C_LZ_O, + LAST_AMDGPU_ISD_NUMBER }; diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 1e23aa8411ad..9b9ec0638648 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -23,7 +23,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR -#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" // Pin the vtable to this file. @@ -56,35 +55,16 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, return (NumLoads <= 16 && (Offset1 - Offset0) < 64); } -int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { - switch (Channels) { - default: return Opcode; - case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1); - case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2); - case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); - } -} - // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, VI = 1, SDWA = 2, SDWA9 = 3, - GFX9 = 4 + GFX80 = 4, + GFX9 = 5 }; -// Wrapper for Tablegen'd function. enum Subtarget is not defined in any -// header files, so we need to wrap it in a function that takes unsigned -// instead. -namespace llvm { -namespace AMDGPU { -static int getMCOpcode(uint16_t Opcode, unsigned Gen) { - return getMCOpcodeGen(Opcode, static_cast(Gen)); -} -} -} - static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { switch (ST.getGeneration()) { case AMDGPUSubtarget::SOUTHERN_ISLANDS: diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index f1a42b42f1f1..a9fcd4834638 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -22,6 +22,7 @@ #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRINFO_HEADER namespace llvm { @@ -49,10 +50,6 @@ class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; - - /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the - /// equivalent opcode that writes \p Channels Channels. - int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const; }; } // End llvm namespace diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 16d240e96196..b7f65c20507c 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -17,6 +17,7 @@ #include "AMDGPURegisterBankInfo.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index c14679701c0b..31f728b0c22f 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -49,6 +49,7 @@ def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">; def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">; def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +def FMA : Predicate<"Subtarget->hasFMA()">; def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index c15b37f9e9cd..23fd8113932c 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -153,7 +153,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { int MCOpcode = TII->pseudoToMCOpcode(Opcode); if (MCOpcode == -1) { - LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); + LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " "a target-specific version: " + Twine(MI->getOpcode())); } @@ -205,7 +205,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { StringRef Err; if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { - LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); + LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); C.emitError("Illegal instruction detected: " + Err); MI->print(errs()); } diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 879f65e12287..20918233e447 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -270,8 +270,8 @@ LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) { dbgs() << "Dest: " << printReg(Element.DestReg, TRI) << " Sources: {"; for (auto &SI : Element.Sources) { - dbgs() << printReg(SI.first, TRI) << "(BB#" - << SI.second->getNumber() << "),"; + dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second) + << "),"; } dbgs() << "}\n"; } @@ -658,7 +658,7 @@ RegionMRT *MRT::buildMRT(MachineFunction &MF, continue; } - DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); + DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n"); MBBMRT *NewMBB = new MBBMRT(MBB); MachineRegion *Region = RegionInfo->getRegionFor(MBB); @@ -705,7 +705,7 @@ void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, // If this is live out of the MBB for (auto &UI : MRI->use_operands(Reg)) { if (UI.getParent()->getParent() != MBB) { - DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() + DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB) << "): " << printReg(Reg, TRI) << "\n"); addLiveOut(Reg); } else { @@ -749,7 +749,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI, PHILinearize &PHIInfo) { - DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); + DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB) + << ")-\n"); for (auto &II : *MBB) { for (auto &RI : II.defs()) { storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); @@ -773,8 +774,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, for (int i = 0; i < numPreds; ++i) { if (getPHIPred(PHI, i) == MBB) { unsigned PHIReg = getPHISourceReg(PHI, i); - DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() - << " -> BB#" << (*SI)->getNumber() + DEBUG(dbgs() << "Add LiveOut (PhiSource " << printMBBReference(*MBB) + << " -> " << printMBBReference(*(*SI)) << "): " << printReg(PHIReg, TRI) << "\n"); addLiveOut(PHIReg); } @@ -1449,8 +1450,7 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, unsigned *ReplaceReg) { DEBUG(dbgs() << "Shrink PHI: "); DEBUG(PHI.dump()); - DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) - << " = PHI("); + DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); bool Replaced = false; unsigned NumInputs = getPHINumInputs(PHI); @@ -1480,8 +1480,8 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, if (SourceMBB) { MIB.addReg(CombinedSourceReg); MIB.addMBB(SourceMBB); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", BB#" - << SourceMBB->getNumber()); + DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*SourceMBB)); } for (unsigned i = 0; i < NumInputs; ++i) { @@ -1492,8 +1492,8 @@ bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", BB#" - << SourcePred->getNumber()); + DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } DEBUG(dbgs() << ")\n"); } @@ -1506,8 +1506,7 @@ void AMDGPUMachineCFGStructurizer::replacePHI( SmallVector &PHIRegionIndices) { DEBUG(dbgs() << "Replace PHI: "); DEBUG(PHI.dump()); - DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) - << " = PHI("); + DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); bool HasExternalEdge = false; unsigned NumInputs = getPHINumInputs(PHI); @@ -1524,8 +1523,8 @@ void AMDGPUMachineCFGStructurizer::replacePHI( getPHIDestReg(PHI)); MIB.addReg(CombinedSourceReg); MIB.addMBB(LastMerge); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", BB#" - << LastMerge->getNumber()); + DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*LastMerge)); for (unsigned i = 0; i < NumInputs; ++i) { if (isPHIRegionIndex(PHIRegionIndices, i)) { continue; @@ -1534,8 +1533,8 @@ void AMDGPUMachineCFGStructurizer::replacePHI( MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", BB#" - << SourcePred->getNumber()); + DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } DEBUG(dbgs() << ")\n"); } else { @@ -1565,15 +1564,15 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI( DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) << "\n"); PHI.eraseFromParent(); } else { - DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); + DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); MachineBasicBlock *MBB = PHI.getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), getPHIDestReg(PHI)); MIB.addReg(CombinedSourceReg); MIB.addMBB(IfMBB); - DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", BB#" - << IfMBB->getNumber()); + DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " + << printMBBReference(*IfMBB)); unsigned NumInputs = getPHINumInputs(PHI); for (unsigned i = 0; i < NumInputs; ++i) { if (isPHIRegionIndex(PHIRegionIndices, i)) { @@ -1583,8 +1582,8 @@ void AMDGPUMachineCFGStructurizer::replaceEntryPHI( MachineBasicBlock *SourcePred = getPHIPred(PHI, i); MIB.addReg(SourceReg); MIB.addMBB(SourcePred); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", BB#" - << SourcePred->getNumber()); + DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*SourcePred)); } DEBUG(dbgs() << ")\n"); PHI.eraseFromParent(); @@ -1749,11 +1748,11 @@ void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, if (MergeBB->succ_begin() == MergeBB->succ_end()) { return; } - DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() - << "): " << printReg(DestRegister, TRI) << " = PHI(" - << printReg(IfSourceRegister, TRI) << ", BB#" - << IfBB->getNumber() << printReg(CodeSourceRegister, TRI) - << ", BB#" << CodeBB->getNumber() << ")\n"); + DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB) + << "): " << printReg(DestRegister, TRI) << " = PHI(" + << printReg(IfSourceRegister, TRI) << ", " + << printMBBReference(*IfBB) << printReg(CodeSourceRegister, TRI) + << ", " << printMBBReference(*CodeBB) << ")\n"); const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, TII->get(TargetOpcode::PHI), DestRegister); @@ -1811,8 +1810,8 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, for (auto SI : Succs) { std::pair Edge = SI; - DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" - << Edge.second->getNumber() << "\n"); + DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first) + << " -> " << printMBBReference(*Edge.second) << "\n"); Edge.first->removeSuccessor(Edge.second); } } @@ -1850,8 +1849,8 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( if (!CodeBBEnd->isSuccessor(MergeBB)) CodeBBEnd->addSuccessor(MergeBB); - DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" - << CodeBBEnd->getNumber() << "\n"); + DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart) << " through " + << printMBBReference(*CodeBBEnd) << "\n"); // If we have a single predecessor we can find a reasonable debug location MachineBasicBlock *SinglePred = @@ -2064,7 +2063,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, // is a source block for a definition. SmallVector Sources; if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { - DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() + DEBUG(dbgs() << "Inserting PHI Live Out from " << printMBBReference(*CodeBB) << "\n"); for (auto SI : Sources) { unsigned DestReg; @@ -2146,7 +2145,7 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, TII->get(TargetOpcode::PHI), DestReg); - DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI("); + DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI("); unsigned CurrentBackedgeReg = 0; @@ -2171,17 +2170,18 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio BackedgePHI.addMBB((*SRI).second); CurrentBackedgeReg = NewBackedgeReg; DEBUG(dbgs() << "Inserting backedge PHI: " - << printReg(NewBackedgeReg, TRI) << " = PHI(" - << printReg(CurrentBackedgeReg, TRI) << ", BB#" - << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " + << printReg(NewBackedgeReg, TRI) << " = PHI(" + << printReg(CurrentBackedgeReg, TRI) << ", " + << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) + << ", " << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) - << ", BB#" << (*SRI).second->getNumber()); + << ", " << printMBBReference(*(*SRI).second)); } } else { MIB.addReg(SourceReg); MIB.addMBB((*SRI).second); - DEBUG(dbgs() << printReg(SourceReg, TRI) << ", BB#" - << (*SRI).second->getNumber() << ", "); + DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " + << printMBBReference(*(*SRI).second) << ", "); } } @@ -2189,8 +2189,8 @@ void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegio if (CurrentBackedgeReg != 0) { MIB.addReg(CurrentBackedgeReg); MIB.addMBB(Exit); - DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", BB#" - << Exit->getNumber() << ")\n"); + DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", " + << printMBBReference(*Exit) << ")\n"); } else { DEBUG(dbgs() << ")\n"); } @@ -2439,15 +2439,15 @@ void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, MachineInstrBuilder MIB = BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), NewDestReg); - DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) - << " = PHI("); + DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) << " = PHI("); MIB.addReg(PHISource); MIB.addMBB(Entry); - DEBUG(dbgs() << printReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); + DEBUG(dbgs() << printReg(PHISource, TRI) << ", " + << printMBBReference(*Entry)); MIB.addReg(RegionSourceReg); MIB.addMBB(RegionSourceMBB); - DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", BB#" - << RegionSourceMBB->getNumber() << ")\n"); + DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", " + << printMBBReference(*RegionSourceMBB) << ")\n"); } void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, @@ -2528,9 +2528,9 @@ AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); MachineBasicBlock *Exit = LRegion->getExit(); - DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" - << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() - << "\n"); + DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to " + << printMBBReference(*Entry) << " -> " + << printMBBReference(*EntrySucc) << "\n"); LRegion->addMBB(EntrySucc); // Make the backedge go to Entry Succ diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 9fb7f5f88927..b7c8c1213537 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -19,7 +19,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), - IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction()->getCallingConv())), + IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned. diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 8454dede0e1e..5e4d33aaa691 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -43,7 +43,7 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { // Forced to be here by one .inc const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( const MachineFunction *MF) const { - CallingConv::ID CC = MF->getFunction()->getCallingConv(); + CallingConv::ID CC = MF->getFunction().getCallingConv(); switch (CC) { case CallingConv::C: case CallingConv::Fast: diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 8e5a432e068a..2d6834fd2753 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,14 +48,27 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); + if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere + // if someone tries to enable these? + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + FullFS += "+fp64-fp16-denormals,"; + } else { + FullFS += "-fp32-denormals,"; + } + FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); + // We don't support FP64 for EG/NI atm. + assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es // on VI and newer hardware to avoid assertion failures due to missing ADDR64 // variants of MUBUF instructions. @@ -63,14 +76,6 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, FlatForGlobal = true; } - // FIXME: I don't think think Evergreen has any useful support for - // denormals, but should be checked. Should we issue a warning somewhere - // if someone tries to enable these? - if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP64FP16Denormals = false; - FP32Denormals = false; - } - // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; @@ -130,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DumpCode(false), FP64(false), + FMA(false), IsGCN(false), GCN3Encoding(false), CIInsts(false), @@ -156,6 +162,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FlatGlobalInsts(false), FlatScratchInsts(false), AddNoCarryInsts(false), + HasUnpackedD16VMem(false), R600ALUInst(false), CaymanISA(false), @@ -462,7 +469,7 @@ unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { } unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); // Compute maximum number of SGPRs function can use using default/requested @@ -512,7 +519,7 @@ unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { } unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); // Compute maximum number of VGPRs function can use using default/requested diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 63634f434fa6..9f75e1e6133c 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -66,16 +66,14 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { ISAVersion7_0_1, ISAVersion7_0_2, ISAVersion7_0_3, + ISAVersion7_0_4, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, ISAVersion8_0_3, - ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1, - ISAVersion9_0_2, - ISAVersion9_0_3 + ISAVersion9_0_2 }; enum TrapHandlerAbi { @@ -140,6 +138,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { // Subtarget statically properties set by tablegen bool FP64; + bool FMA; bool IsGCN; bool GCN3Encoding; bool CIInsts; @@ -166,6 +165,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { bool FlatGlobalInsts; bool FlatScratchInsts; bool AddNoCarryInsts; + bool HasUnpackedD16VMem; bool R600ALUInst; bool CaymanISA; bool CFALUBug; @@ -261,7 +261,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return HasVOP3PInsts; } - bool hasHWFP64() const { + bool hasFP64() const { return FP64; } @@ -348,6 +348,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return CaymanISA; } + bool hasFMA() const { + return FMA; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } @@ -379,7 +383,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); - return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction()); + return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction()); } bool hasFP16Denormals() const { @@ -407,13 +411,19 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { } bool enableIEEEBit(const MachineFunction &MF) const { - return AMDGPU::isCompute(MF.getFunction()->getCallingConv()); + return AMDGPU::isCompute(MF.getFunction().getCallingConv()); } bool useFlatForGlobal() const { return FlatForGlobal; } + /// \returns If MUBUF instructions always perform range checking, even for + /// buffer resources used for private memory access. + bool privateMemoryResourceIsRangeChecked() const { + return getGeneration() < AMDGPUSubtarget::GFX9; + } + bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; } @@ -462,17 +472,27 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return getGeneration() >= GFX9; } + /// Return if most LDS instructions have an m0 use that require m0 to be + /// iniitalized. + bool ldsRequiresM0Init() const { + return getGeneration() < GFX9; + } + bool hasAddNoCarry() const { return AddNoCarryInsts; } + bool hasUnpackedD16VMem() const { + return HasUnpackedD16VMem; + } + bool isMesaKernel(const MachineFunction &MF) const { - return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); + return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction().getCallingConv()); } // Covers VS/PS/CS graphics shaders bool isMesaGfxShader(const MachineFunction &MF) const { - return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv()); + return isMesa3DOS() && AMDGPU::isShader(MF.getFunction().getCallingConv()); } bool isAmdCodeObjectV2(const MachineFunction &MF) const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8f9ad2306160..7bb8b67bf9da 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -579,10 +579,9 @@ class GCNPassConfig final : public AMDGPUPassConfig { } // end anonymous namespace -TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(AMDGPUTTIImpl(this, F)); - }); +TargetTransformInfo +AMDGPUTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(AMDGPUTTIImpl(this, F)); } void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { @@ -793,7 +792,7 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SILoadStoreOptimizerID); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); - addPass(&MachineLICMID); + addPass(&EarlyMachineLICMID); addPass(&MachineCSEID); addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); @@ -908,4 +907,3 @@ void GCNPassConfig::addPreEmitPass() { TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(*this, PM); } - diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 5627b4cb412e..085c91ca4ede 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -55,7 +55,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { return &IntrinsicInfo; } - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); @@ -115,6 +115,10 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; const SISubtarget *getSubtargetImpl(const Function &) const override; + + bool useIPRA() const override { + return true; + } }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 153a4a8ddb7e..21088d3e48e3 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -288,6 +288,32 @@ unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) { return 8; } +bool AMDGPUTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) const { + switch (Inst->getIntrinsicID()) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: { + auto *Ordering = dyn_cast(Inst->getArgOperand(2)); + auto *Volatile = dyn_cast(Inst->getArgOperand(4)); + if (!Ordering || !Volatile) + return false; // Invalid. + + unsigned OrderingVal = Ordering->getZExtValue(); + if (OrderingVal > static_cast(AtomicOrdering::SequentiallyConsistent)) + return false; + + Info.PtrVal = Inst->getArgOperand(0); + Info.Ordering = static_cast(OrderingVal); + Info.ReadMem = true; + Info.WriteMem = true; + Info.IsVolatile = !Volatile->isNullValue(); + return true; + } + default: + return false; + } +} + int AMDGPUTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -449,6 +475,9 @@ static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) { case Intrinsic::r600_read_tidig_z: case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: case Intrinsic::amdgcn_image_atomic_swap: case Intrinsic::amdgcn_image_atomic_add: case Intrinsic::amdgcn_image_atomic_sub: diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index ee0683d39b49..8899d2c6da8a 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -132,6 +132,8 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase { unsigned getMaxInterleaveFactor(unsigned VF); + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; + int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 223fdf77941f..0a0e43123ae0 100644 --- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -1641,7 +1641,7 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { FuncRep->push_back(DummyExitBlk); //insert to function SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); - LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext(); + LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); Ctx.emitError("Extra register needed to handle CFG"); return nullptr; } diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 7223e888c1c6..7a7ed39428e4 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -267,7 +267,11 @@ class AMDGPUOperand : public MCParsedAsmOperand { return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); } - bool isSDWARegKind() const; + bool isSDWAOperand(MVT type) const; + bool isSDWAFP16Operand() const; + bool isSDWAFP32Operand() const; + bool isSDWAInt16Operand() const; + bool isSDWAInt32Operand() const; bool isImmTy(ImmTy ImmT) const { return isImm() && Imm.Type == ImmT; @@ -536,6 +540,10 @@ class AMDGPUOperand : public MCParsedAsmOperand { return EndLoc; } + SMRange getLocRange() const { + return SMRange(StartLoc, EndLoc); + } + Modifiers getModifiers() const { assert(isRegKind() || isImmTy(ImmTyNone)); return isRegKind() ? Reg.Mods : Imm.Mods; @@ -811,6 +819,10 @@ class KernelScopeInfo { class AMDGPUAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; + // Number of extra operands parsed after the first optional operand. + // This may be necessary to skip hardcoded mandatory operands. + static const unsigned MAX_OPR_LOOKAHEAD = 1; + unsigned ForcedEncodingSize = 0; bool ForcedDPP = false; bool ForcedSDWA = false; @@ -888,6 +900,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser { KernelScope.initialize(getContext()); } + bool hasXNACK() const { + return AMDGPU::hasXNACK(getSTI()); + } + bool isSI() const { return AMDGPU::isSI(getSTI()); } @@ -1033,6 +1049,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); + OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); OperandMatchResultTy parseExpTgt(OperandVector &Operands); OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); @@ -1272,15 +1289,31 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const { return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); } -bool AMDGPUOperand::isSDWARegKind() const { +bool AMDGPUOperand::isSDWAOperand(MVT type) const { if (AsmParser->isVI()) return isVReg(); else if (AsmParser->isGFX9()) - return isRegKind(); + return isRegKind() || isInlinableImm(type); else return false; } +bool AMDGPUOperand::isSDWAFP16Operand() const { + return isSDWAOperand(MVT::f16); +} + +bool AMDGPUOperand::isSDWAFP32Operand() const { + return isSDWAOperand(MVT::f32); +} + +bool AMDGPUOperand::isSDWAInt16Operand() const { + return isSDWAOperand(MVT::i16); +} + +bool AMDGPUOperand::isSDWAInt32Operand() const { + return isSDWAOperand(MVT::i32); +} + uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const { assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); @@ -1491,6 +1524,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 1: return AMDGPU::TTMP_32RegClassID; case 2: return AMDGPU::TTMP_64RegClassID; case 4: return AMDGPU::TTMP_128RegClassID; + case 8: return AMDGPU::TTMP_256RegClassID; + case 16: return AMDGPU::TTMP_512RegClassID; } } else if (Is == IS_SGPR) { switch (RegWidth) { @@ -1498,8 +1533,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) { case 1: return AMDGPU::SGPR_32RegClassID; case 2: return AMDGPU::SGPR_64RegClassID; case 4: return AMDGPU::SGPR_128RegClassID; - case 8: return AMDGPU::SReg_256RegClassID; - case 16: return AMDGPU::SReg_512RegClassID; + case 8: return AMDGPU::SGPR_256RegClassID; + case 16: return AMDGPU::SGPR_512RegClassID; } } return -1; @@ -1510,12 +1545,15 @@ static unsigned getSpecialRegForName(StringRef RegName) { .Case("exec", AMDGPU::EXEC) .Case("vcc", AMDGPU::VCC) .Case("flat_scratch", AMDGPU::FLAT_SCR) + .Case("xnack_mask", AMDGPU::XNACK_MASK) .Case("m0", AMDGPU::M0) .Case("scc", AMDGPU::SCC) .Case("tba", AMDGPU::TBA) .Case("tma", AMDGPU::TMA) .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) + .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) + .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) .Case("vcc_lo", AMDGPU::VCC_LO) .Case("vcc_hi", AMDGPU::VCC_HI) .Case("exec_lo", AMDGPU::EXEC_LO) @@ -1553,6 +1591,11 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, RegWidth = 2; return true; } + if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { + Reg = AMDGPU::XNACK_MASK; + RegWidth = 2; + return true; + } if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; @@ -1754,6 +1797,11 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { // TODO: add syntactic sugar for 1/(2*PI) bool Minus = false; if (getLexer().getKind() == AsmToken::Minus) { + const AsmToken NextToken = getLexer().peekTok(); + if (!NextToken.is(AsmToken::Integer) && + !NextToken.is(AsmToken::Real)) { + return MatchOperand_NoMatch; + } Minus = true; Parser.Lex(); } @@ -1783,7 +1831,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { return MatchOperand_Success; } default: - return Minus ? MatchOperand_ParseFail : MatchOperand_NoMatch; + return MatchOperand_NoMatch; } } @@ -2244,6 +2292,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, return true; } +static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); + bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2286,8 +2337,13 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_MissingFeature: return Error(IDLoc, "instruction not supported on this GPU"); - case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = AMDGPUMnemonicSpellCheck( + ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((AMDGPUOperand &)*Operands[0]).getLocRange()); + } case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; @@ -2578,6 +2634,29 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const { + + for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); + R.isValid(); ++R) { + if (*R == RegNo) + return isGFX9(); + } + + switch (RegNo) { + case AMDGPU::TBA: + case AMDGPU::TBA_LO: + case AMDGPU::TBA_HI: + case AMDGPU::TMA: + case AMDGPU::TMA_LO: + case AMDGPU::TMA_HI: + return !isGFX9(); + case AMDGPU::XNACK_MASK: + case AMDGPU::XNACK_MASK_LO: + case AMDGPU::XNACK_MASK_HI: + return !isCI() && !isSI() && hasXNACK(); + default: + break; + } + if (isCI()) return true; @@ -3120,7 +3199,10 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, HwReg.IsSymbolic = true; HwReg.Id = ID_UNKNOWN_; const StringRef tok = Parser.getTok().getString(); - for (int i = ID_SYMBOLIC_FIRST_; i < ID_SYMBOLIC_LAST_; ++i) { + int Last = ID_SYMBOLIC_LAST_; + if (isSI() || isCI() || isVI()) + Last = ID_SYMBOLIC_FIRST_GFX9_; + for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { if (tok == IdSymbolic[i]) { HwReg.Id = i; break; @@ -3819,7 +3901,9 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { return Ok? MatchOperand_Success : MatchOperand_ParseFail; } else { - return MatchOperand_NoMatch; + // Swizzle "offset" operand is optional. + // If it is omitted, try parsing other optional operands. + return parseOptionalOpr(Operands); } } @@ -3969,7 +4053,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, if (IsAtomic) { // Add src, same as dst - ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + assert(Desc.getNumDefs() == 1); + ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); } OptionalImmIndexMap OptionalIdx; @@ -3978,9 +4063,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments - if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - continue; + if (Op.isReg()) { + Op.addRegOperands(Inst, 1); } else if (Op.isImmModifier()) { OptionalIdx[Op.getImmTy()] = I; } else { @@ -3991,11 +4075,11 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); } void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { @@ -4139,6 +4223,39 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { }; OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { + unsigned size = Operands.size(); + assert(size > 0); + + OperandMatchResultTy res = parseOptionalOpr(Operands); + + // This is a hack to enable hardcoded mandatory operands which follow + // optional operands. + // + // Current design assumes that all operands after the first optional operand + // are also optional. However implementation of some instructions violates + // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). + // + // To alleviate this problem, we have to (implicitly) parse extra operands + // to make sure autogenerated parser of custom operands never hit hardcoded + // mandatory operands. + + if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { + + // We have parsed the first optional operand. + // Parse as many operands as necessary to skip all mandatory operands. + + for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { + if (res != MatchOperand_Success || + getLexer().is(AsmToken::EndOfStatement)) break; + if (getLexer().is(AsmToken::Comma)) Parser.Lex(); + res = parseOptionalOpr(Operands); + } + } + + return res; +} + +OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { OperandMatchResultTy res; for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { // try to parse any optional operand here @@ -4702,7 +4819,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { - Op.addRegWithInputModsOperands(Inst, 2); + Op.addRegOrImmWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments OptionalIdx[Op.getImmTy()] = I; @@ -4767,6 +4884,7 @@ extern "C" void LLVMInitializeAMDGPUAsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "AMDGPUGenAsmMatcher.inc" // This fuction should be defined after auto-generated include so that we have diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 2230457b3a9b..ceb596c7d403 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -671,6 +671,61 @@ defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores < defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores < "buffer_store_format_xyzw", VReg_128 >; + +let SubtargetPredicate = HasUnpackedD16VMem in { + defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_x", VGPR_32 + >; + defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xy", VReg_64 + >; + defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xyz", VReg_96 + >; + defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xyzw", VReg_128 + >; + defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_x", VGPR_32 + >; + defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xy", VReg_64 + >; + defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xyz", VReg_96 + >; + defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xyzw", VReg_128 + >; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_x", VGPR_32 + >; + defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xy", VGPR_32 + >; + defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xyz", VReg_64 + >; + defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads < + "buffer_load_format_d16_xyzw", VReg_64 + >; + defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_x", VGPR_32 + >; + defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xy", VGPR_32 + >; + defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xyz", VReg_64 + >; + defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores < + "buffer_store_format_d16_xyzw", VReg_64 + >; +} // End HasPackedD16VMem. + defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads < "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8 >; @@ -860,6 +915,28 @@ defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; +let SubtargetPredicate = HasUnpackedD16VMem in { + defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; + defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>; + defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; + defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>; + defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>; + defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>; + defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>; + defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>; + defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>; + defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>; + defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>; +} // End HasPackedD16VMem. + let SubtargetPredicate = isCIVI in { //===----------------------------------------------------------------------===// @@ -922,6 +999,20 @@ multiclass MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; + +let SubtargetPredicate = HasUnpackedD16VMem in { + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; + defm : MUBUF_LoadIntrinsicPat; +} // End HasPackedD16VMem. + defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -969,6 +1060,20 @@ multiclass MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; + +let SubtargetPredicate = HasUnpackedD16VMem in { + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; + defm : MUBUF_StoreIntrinsicPat; +} // End HasPackedD16VMem. + defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; @@ -1382,6 +1487,19 @@ defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; defm : MTBUF_LoadIntrinsicPat; +let SubtargetPredicate = HasUnpackedD16VMem in { + defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; + defm : MTBUF_LoadIntrinsicPat; +} // End HasPackedD16VMem. + multiclass MTBUF_StoreIntrinsicPat { def : GCNPat< @@ -1431,6 +1549,19 @@ defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; +let SubtargetPredicate = HasUnpackedD16VMem in { + defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; +} // End HasUnpackedD16VMem. + +let SubtargetPredicate = HasPackedD16VMem in { + defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; + defm : MTBUF_StoreIntrinsicPat; +} // End HasPackedD16VMem. + //===----------------------------------------------------------------------===// // Target instructions, move to the appropriate target TD file //===----------------------------------------------------------------------===// @@ -1628,6 +1759,35 @@ multiclass MUBUF_Real_AllAddr_vi op> { def _BOTHEN_vi : MUBUF_Real_vi (NAME#"_BOTHEN")>; } +class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : + MUBUF_Real, + Enc64, + SIMCInstr { + let AssemblerPredicate=HasUnpackedD16VMem; + let DecoderNamespace="GFX80_UNPACKED"; + + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{16} = lds; + let Inst{17} = !if(ps.has_slc, slc, ?); + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +multiclass MUBUF_Real_AllAddr_gfx80 op> { + def _OFFSET_gfx80 : MUBUF_Real_gfx80 (NAME#"_OFFSET")>; + def _OFFEN_gfx80 : MUBUF_Real_gfx80 (NAME#"_OFFEN")>; + def _IDXEN_gfx80 : MUBUF_Real_gfx80 (NAME#"_IDXEN")>; + def _BOTHEN_gfx80 : MUBUF_Real_gfx80 (NAME#"_BOTHEN")>; +} + multiclass MUBUF_Real_Atomic_vi op> : MUBUF_Real_AllAddr_vi { def _OFFSET_RTN_vi : MUBUF_Real_vi (NAME#"_OFFSET_RTN")>; @@ -1644,6 +1804,26 @@ defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_vi <0x04>; defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x05>; defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x06>; defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x07>; +let SubtargetPredicate = HasUnpackedD16VMem in { + defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x08>; + defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x09>; + defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0a>; + defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0b>; + defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0c>; + defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0d>; + defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0e>; + defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0f>; +} // End HasUnpackedD16VMem. +let SubtargetPredicate = HasPackedD16VMem in { + defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x08>; + defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x09>; + defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0a>; + defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0b>; + defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x0c>; + defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x0d>; + defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0e>; + defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0f>; +} // End HasPackedD16VMem. defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_vi <0x10>; defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_vi <0x11>; defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_vi <0x12>; @@ -1729,11 +1909,61 @@ multiclass MTBUF_Real_AllAddr_vi op> { def _BOTHEN_vi : MTBUF_Real_vi (NAME#"_BOTHEN")>; } -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>; -//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>; +class MTBUF_Real_gfx80 op, MTBUF_Pseudo ps> : + MTBUF_Real, + Enc64, + SIMCInstr { + let AssemblerPredicate=HasUnpackedD16VMem; + let DecoderNamespace="GFX80_UNPACKED"; + + let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); + let Inst{18-15} = op; + let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value); + let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value); + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{54} = !if(ps.has_slc, slc, ?); + let Inst{55} = !if(ps.has_tfe, tfe, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +multiclass MTBUF_Real_AllAddr_gfx80 op> { + def _OFFSET_gfx80 : MTBUF_Real_gfx80 (NAME#"_OFFSET")>; + def _OFFEN_gfx80 : MTBUF_Real_gfx80 (NAME#"_OFFEN")>; + def _IDXEN_gfx80 : MTBUF_Real_gfx80 (NAME#"_IDXEN")>; + def _BOTHEN_gfx80 : MTBUF_Real_gfx80 (NAME#"_BOTHEN")>; +} + +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0x00>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x01>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x02>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x03>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <0x04>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x05>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x06>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x07>; +let SubtargetPredicate = HasUnpackedD16VMem in { + defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x08>; + defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x09>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0a>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0b>; + defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0c>; + defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0d>; + defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0e>; + defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0f>; +} // End HasUnpackedD16VMem. +let SubtargetPredicate = HasPackedD16VMem in { + defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x08>; + defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x09>; + defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0a>; + defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0b>; + defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x0c>; + defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x0d>; + defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>; + defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>; +} // End HasUnpackedD16VMem. diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td index 0ba5acad680f..ae40c6387982 100644 --- a/lib/Target/AMDGPU/CaymanInstructions.td +++ b/lib/Target/AMDGPU/CaymanInstructions.td @@ -144,8 +144,8 @@ def VTX_READ_32_cm // to be caused by ALU instructions in the next instruction group that wrote // to the $src_gpr registers of the VTX_READ. // e.g. - // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 - // %T2_X = MOV %ZERO + // %t3_x = VTX_READ_PARAM_32_eg killed %t2_x, 24 + // %t2_x = MOV %zero //Adding this constraint prevents this from happening. let Constraints = "$src_gpr.ptr = $dst_gpr"; } diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index 9fcfb1083bb1..1c38a0f9ac86 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -440,7 +440,7 @@ defm DS_XOR_RTN_B32 : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">; defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">; defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">; defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">; -defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc <"ds_min_rtn_f32", VGPR_32, "ds_min_f32">; +defm DS_MIN_RTN_F32 : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32, "ds_min_f32">; defm DS_MAX_RTN_F32 : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32, "ds_max_f32">; defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">; @@ -600,6 +600,20 @@ class DSReadPat : GCNPat < (inst $ptr, (as_i16imm $offset), (i1 0)) >; +// FIXME: Passing name of PatFrag in workaround. Why doesn't +// !cast(frag.NAME#"_m0") work!? +multiclass DSReadPat_mc { + + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSReadPat(frag#"_m0")>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSReadPat(inst.NAME#"_gfx9"), vt, !cast(frag)>; + } +} + + multiclass DSReadPat_Hi16 { def : GCNPat < (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))), @@ -624,30 +638,22 @@ multiclass DSReadPat_Lo16 { >; } - -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; -def : DSReadPat ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; let AddedComplexity = 100 in { -def : DSReadPat ; +defm : DSReadPat_mc ; } // End AddedComplexity = 100 -def : GCNPat < - (v2i32 (load_local_m0 (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, - i8:$offset1))), - (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0)) ->; - - let OtherPredicates = [HasD16LoadStore] in { let AddedComplexity = 100 in { defm : DSReadPat_Hi16; @@ -666,71 +672,122 @@ class DSWritePat : GCNPat < (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; -def : DSWritePat ; +multiclass DSWritePat_mc { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSWritePat(frag#"_m0")>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSWritePat(inst.NAME#"_gfx9"), vt, !cast(frag)>; + } +} + +defm : DSWritePat_mc ; +defm : DSWritePat_mc ; +defm : DSWritePat_mc ; +defm : DSWritePat_mc ; +defm : DSWritePat_mc ; let OtherPredicates = [HasD16LoadStore] in { def : DSWritePat ; def : DSWritePat ; } -let AddedComplexity = 100 in { -def : DSWritePat ; -} // End AddedComplexity = 100 +class DS64Bit4ByteAlignedReadPat : GCNPat < + (v2i32 (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), + (inst $ptr, $offset0, $offset1, (i1 0)) +>; -def : GCNPat < - (store_local_m0 v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, - i8:$offset1)), - (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), - (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1, - (i1 0)) +class DS64Bit4ByteAlignedWritePat : GCNPat< + (frag v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), + (inst $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), + (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1, + (i1 0)) >; +let OtherPredicates = [LDSRequiresM0Init] in { +def : DS64Bit4ByteAlignedReadPat; +def : DS64Bit4ByteAlignedWritePat; +} + +let OtherPredicates = [NotLDSRequiresM0Init] in { +def : DS64Bit4ByteAlignedReadPat; +def : DS64Bit4ByteAlignedWritePat; +} + + +let AddedComplexity = 100 in { + +defm : DSWritePat_mc ; +} // End AddedComplexity = 100 class DSAtomicRetPat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; +multiclass DSAtomicRetPat_mc { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSAtomicRetPat(frag#"_m0")>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSAtomicRetPat(inst.NAME#"_gfx9"), vt, !cast(frag)>; + } +} + + + class DSAtomicCmpXChg : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0)) >; +multiclass DSAtomicCmpXChg_mc { + let OtherPredicates = [LDSRequiresM0Init] in { + def : DSAtomicCmpXChg(frag#"_m0")>; + } + + let OtherPredicates = [NotLDSRequiresM0Init] in { + def : DSAtomicCmpXChg(inst.NAME#"_gfx9"), vt, !cast(frag)>; + } +} + + // 32-bit atomics. -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicCmpXChg; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicCmpXChg_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; // 64-bit atomics. -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; - -def : DSAtomicCmpXChg; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; +defm : DSAtomicRetPat_mc; + +defm : DSAtomicCmpXChg_mc; //===----------------------------------------------------------------------===// // Real instructions diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9f399c386482..6ea9367f2702 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -234,6 +234,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, AMDGPU::OpName::src2_modifiers); } + if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { + Res = convertMIMGInst(MI); + } + if (Res && IsSDWA) Res = convertSDWAInst(MI); @@ -250,7 +254,7 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); if (SDst != -1) { // VOPC - insert VCC register as sdst - insertNamedMCOperand(MI, MCOperand::createReg(AMDGPU::VCC), + insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), AMDGPU::OpName::sdst); } else { // VOP1/2 - insert omod if present in instruction @@ -260,6 +264,42 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { return MCDisassembler::Success; } +DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { + int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vdata); + + int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::dmask); + unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; + if (DMask == 0) + return MCDisassembler::Success; + + unsigned ChannelCount = countPopulation(DMask); + if (ChannelCount == 1) + return MCDisassembler::Success; + + int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount); + assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); + auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; + + // Widen the register to the correct number of enabled channels. + unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); + auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, + &MRI.getRegClass(RCID)); + if (NewVdata == AMDGPU::NoRegister) { + // It's possible to encode this such that the low register + enabled + // components exceeds the register count. + return MCDisassembler::Success; + } + + MI.setOpcode(NewOpcode); + // vaddr will be always appear as a single VGPR. This will look different than + // how it is usually emitted because the number of register components is not + // in the instruction encoding. + MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); + return MCDisassembler::Success; +} + const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { return getContext().getRegisterInfo()-> getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); @@ -277,7 +317,7 @@ MCOperand AMDGPUDisassembler::errOperand(unsigned V, inline MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { - return MCOperand::createReg(RegId); + return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); } inline @@ -308,10 +348,12 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, case AMDGPU::TTMP_128RegClassID: // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in // this bundle? - case AMDGPU::SReg_256RegClassID: - // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in + case AMDGPU::SGPR_256RegClassID: + case AMDGPU::TTMP_256RegClassID: + // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in // this bundle? - case AMDGPU::SReg_512RegClassID: + case AMDGPU::SGPR_512RegClassID: + case AMDGPU::TTMP_512RegClassID: shift = 2; break; // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in @@ -401,11 +443,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { - return createSRegOperand(AMDGPU::SReg_256RegClassID, Val); + return decodeDstOp(OPW256, Val); } MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { - return createSRegOperand(AMDGPU::SReg_512RegClassID, Val); + return decodeDstOp(OPW512, Val); } MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { @@ -553,6 +595,8 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { return SGPR_32RegClassID; case OPW64: return SGPR_64RegClassID; case OPW128: return SGPR_128RegClassID; + case OPW256: return SGPR_256RegClassID; + case OPW512: return SGPR_512RegClassID; } } @@ -568,9 +612,20 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { return TTMP_32RegClassID; case OPW64: return TTMP_64RegClassID; case OPW128: return TTMP_128RegClassID; + case OPW256: return TTMP_256RegClassID; + case OPW512: return TTMP_512RegClassID; } } +int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { + using namespace AMDGPU::EncValues; + + unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; + unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; + + return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; +} + MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { using namespace AMDGPU::EncValues; @@ -583,8 +638,10 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); } - if (TTMP_MIN <= Val && Val <= TTMP_MAX) { - return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN); + + int TTmpIdx = getTTmpIdx(Val); + if (TTmpIdx >= 0) { + return createSRegOperand(getTtmpClassId(Width), TTmpIdx); } if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) @@ -608,21 +665,39 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c } } +MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { + using namespace AMDGPU::EncValues; + + assert(Val < 128); + assert(Width == OPW256 || Width == OPW512); + + if (Val <= SGPR_MAX) { + assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. + return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); + } + + int TTmpIdx = getTTmpIdx(Val); + if (TTmpIdx >= 0) { + return createSRegOperand(getTtmpClassId(Width), TTmpIdx); + } + + llvm_unreachable("unknown dst register"); +} + MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { using namespace AMDGPU; switch (Val) { - case 102: return createRegOperand(getMCReg(FLAT_SCR_LO, STI)); - case 103: return createRegOperand(getMCReg(FLAT_SCR_HI, STI)); - // ToDo: no support for xnack_mask_lo/_hi register - case 104: - case 105: break; + case 102: return createRegOperand(FLAT_SCR_LO); + case 103: return createRegOperand(FLAT_SCR_HI); + case 104: return createRegOperand(XNACK_MASK_LO); + case 105: return createRegOperand(XNACK_MASK_HI); case 106: return createRegOperand(VCC_LO); case 107: return createRegOperand(VCC_HI); - case 108: return createRegOperand(TBA_LO); - case 109: return createRegOperand(TBA_HI); - case 110: return createRegOperand(TMA_LO); - case 111: return createRegOperand(TMA_HI); + case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); + case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); + case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); + case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); case 124: return createRegOperand(M0); case 126: return createRegOperand(EXEC_LO); case 127: return createRegOperand(EXEC_HI); @@ -645,10 +720,11 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { using namespace AMDGPU; switch (Val) { - case 102: return createRegOperand(getMCReg(FLAT_SCR, STI)); + case 102: return createRegOperand(FLAT_SCR); + case 104: return createRegOperand(XNACK_MASK); case 106: return createRegOperand(VCC); - case 108: return createRegOperand(TBA); - case 110: return createRegOperand(TMA); + case 108: assert(!isGFX9()); return createRegOperand(TBA); + case 110: assert(!isGFX9()); return createRegOperand(TMA); case 126: return createRegOperand(EXEC); default: break; } @@ -656,8 +732,9 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { } MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, - unsigned Val) const { + const unsigned Val) const { using namespace AMDGPU::SDWA; + using namespace AMDGPU::EncValues; if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { // XXX: static_cast is needed to avoid stupid warning: @@ -672,8 +749,21 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, return createSRegOperand(getSgprClassId(Width), Val - SDWA9EncValues::SRC_SGPR_MIN); } + if (SDWA9EncValues::SRC_TTMP_MIN <= Val && + Val <= SDWA9EncValues::SRC_TTMP_MAX) { + return createSRegOperand(getTtmpClassId(Width), + Val - SDWA9EncValues::SRC_TTMP_MIN); + } + + const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; + + if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) + return decodeIntImmed(SVal); + + if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) + return decodeFPImmed(Width, SVal); - return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); + return decodeSpecialReg32(SVal); } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { return createRegOperand(getVgprClassId(Width), Val); } @@ -695,7 +785,11 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { "SDWAVopcDst should be present only on GFX9"); if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; - if (Val > AMDGPU::EncValues::SGPR_MAX) { + + int TTmpIdx = getTTmpIdx(Val); + if (TTmpIdx >= 0) { + return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); + } else if (Val > AMDGPU::EncValues::SGPR_MAX) { return decodeSpecialReg64(Val); } else { return createSRegOperand(getSgprClassId(OPW64), Val); @@ -705,6 +799,14 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { } } +bool AMDGPUDisassembler::isVI() const { + return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; +} + +bool AMDGPUDisassembler::isGFX9() const { + return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// @@ -758,7 +860,7 @@ static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, static MCDisassembler *createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { - return new AMDGPUDisassembler(STI, Ctx); + return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); } extern "C" void LLVMInitializeAMDGPUDisassembler() { diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index c487fe9b9db9..75cfc5e11282 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,16 +17,18 @@ #define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" + #include #include #include namespace llvm { -class MCContext; class MCInst; class MCOperand; class MCSubtargetInfo; @@ -38,13 +40,16 @@ class Twine; class AMDGPUDisassembler : public MCDisassembler { private: + std::unique_ptr const MCII; + const MCRegisterInfo &MRI; mutable ArrayRef Bytes; mutable uint32_t Literal; mutable bool HasLiteral; public: - AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : - MCDisassembler(STI, Ctx) {} + AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + MCInstrInfo const *MCII) : + MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {} ~AMDGPUDisassembler() override = default; @@ -64,6 +69,7 @@ class AMDGPUDisassembler : public MCDisassembler { uint64_t Address) const; DecodeStatus convertSDWAInst(MCInst &MI) const; + DecodeStatus convertMIMGInst(MCInst &MI) const; MCOperand decodeOperand_VGPR_32(unsigned Val) const; MCOperand decodeOperand_VS_32(unsigned Val) const; @@ -89,6 +95,8 @@ class AMDGPUDisassembler : public MCDisassembler { OPW32, OPW64, OPW128, + OPW256, + OPW512, OPW16, OPWV216, OPW_LAST_, @@ -104,6 +112,7 @@ class AMDGPUDisassembler : public MCDisassembler { MCOperand decodeLiteralConstant() const; MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; @@ -111,7 +120,12 @@ class AMDGPUDisassembler : public MCDisassembler { MCOperand decodeSDWASrc16(unsigned Val) const; MCOperand decodeSDWASrc32(unsigned Val) const; MCOperand decodeSDWAVopcDst(unsigned Val) const; -}; + + int getTTmpIdx(unsigned Val) const; + + bool isVI() const; + bool isGFX9() const; + }; //===----------------------------------------------------------------------===// // AMDGPUSymbolizer diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index bccad826d18f..5e26f97b0c86 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -212,8 +212,8 @@ def VTX_READ_32_eg // to be caused by ALU instructions in the next instruction group that wrote // to the $src_gpr registers of the VTX_READ. // e.g. - // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 - // %T2_X = MOV %ZERO + // %t3_x = VTX_READ_PARAM_32_eg killed %t2_x, 24 + // %t2_x = MOV %zero //Adding this constraint prevents this from happening. let Constraints = "$src_gpr.ptr = $dst_gpr"; } diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index be0588b45e30..dd515b0bf2f1 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -148,6 +148,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { checkReadM0Hazards(MI) > 0) return NoopHazard; + if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0) + return NoopHazard; + if (checkAnyInstHazards(MI) > 0) return NoopHazard; @@ -179,6 +182,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (isRWLane(MI->getOpcode())) WaitStates = std::max(WaitStates, checkRWLaneHazards(MI)); + if (MI->isInlineAsm()) + return std::max(WaitStates, checkInlineAsmHazards(MI)); + if (isSGetReg(MI->getOpcode())) return std::max(WaitStates, checkGetRegHazards(MI)); @@ -525,39 +531,76 @@ int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) { return -1; } +int GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, + const MachineRegisterInfo &MRI) { + // Helper to check for the hazard where VMEM instructions that store more than + // 8 bytes can have there store data over written by the next instruction. + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + const int VALUWaitStates = 1; + int WaitStatesNeeded = 0; + + if (!TRI->isVGPR(MRI, Def.getReg())) + return WaitStatesNeeded; + unsigned Reg = Def.getReg(); + auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { + int DataIdx = createsVALUHazard(*MI); + return DataIdx >= 0 && + TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); + }; + int WaitStatesNeededForDef = + VALUWaitStates - getWaitStatesSince(IsHazardFn); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + + return WaitStatesNeeded; +} + int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { // This checks for the hazard where VMEM instructions that store more than // 8 bytes can have there store data over written by the next instruction. if (!ST.has12DWordStoreHazard()) return 0; - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo(); - - const int VALUWaitStates = 1; + const MachineRegisterInfo &MRI = MF.getRegInfo(); int WaitStatesNeeded = 0; for (const MachineOperand &Def : VALU->defs()) { - if (!TRI->isVGPR(MRI, Def.getReg())) - continue; - unsigned Reg = Def.getReg(); - auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) { - int DataIdx = createsVALUHazard(*MI); - return DataIdx >= 0 && - TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg); - }; - int WaitStatesNeededForDef = - VALUWaitStates - getWaitStatesSince(IsHazardFn); - WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Def, MRI)); + } + + return WaitStatesNeeded; +} + +int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { + // This checks for hazards associated with inline asm statements. + // Since inline asms can contain just about anything, we use this + // to call/leverage other check*Hazard routines. Note that + // this function doesn't attempt to address all possible inline asm + // hazards (good luck), but is a collection of what has been + // problematic thus far. + + // see checkVALUHazards() + if (!ST.has12DWordStoreHazard()) + return 0; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + int WaitStatesNeeded = 0; + + for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands(); + I != E; ++I) { + const MachineOperand &Op = IA->getOperand(I); + if (Op.isReg() && Op.isDef()) { + WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); + } } + return WaitStatesNeeded; } int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const MachineRegisterInfo &MRI = - RWLane->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); const MachineOperand *LaneSelectOp = TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1); diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h index 01682acfac41..f9a6e395a454 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -23,6 +23,8 @@ namespace llvm { class MachineFunction; class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class ScheduleDAG; class SIInstrInfo; class SIRegisterInfo; @@ -67,8 +69,10 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkSetRegHazards(MachineInstr *SetRegInstr); int createsVALUHazard(const MachineInstr &MI); int checkVALUHazards(MachineInstr *VALU); + int checkVALUHazardsHelper(const MachineOperand &Def, const MachineRegisterInfo &MRI); int checkRWLaneHazards(MachineInstr *RWLane); int checkRFEHazards(MachineInstr *RFE); + int checkInlineAsmHazards(MachineInstr *IA); int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); public: diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 942063d5f933..a0e4f7ff24cb 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -14,7 +14,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterPressure.h" @@ -63,8 +63,8 @@ static void printRegion(raw_ostream &OS, unsigned MaxInstNum = std::numeric_limits::max()) { auto BB = Begin->getParent(); - OS << BB->getParent()->getName() << ":BB#" << BB->getNumber() - << ' ' << BB->getName() << ":\n"; + OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' ' + << BB->getName() << ":\n"; auto I = Begin; MaxInstNum = std::max(MaxInstNum, 1u); for (; I != End && MaxInstNum; ++I, --MaxInstNum) { @@ -566,7 +566,7 @@ void GCNIterativeScheduler::scheduleILP( bool TryMaximizeOccupancy) { const auto &ST = MF.getSubtarget(); auto TgtOcc = std::min(ST.getOccupancyWithLocalMemSize(MF), - ST.getWavesPerEU(*MF.getFunction()).second); + ST.getWavesPerEU(MF.getFunction()).second); sortRegionsByPressure(TgtOcc); auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); diff --git a/lib/Target/AMDGPU/GCNProcessors.td b/lib/Target/AMDGPU/GCNProcessors.td index 3b9d552d127a..b2a3f652abd8 100644 --- a/lib/Target/AMDGPU/GCNProcessors.td +++ b/lib/Target/AMDGPU/GCNProcessors.td @@ -53,10 +53,6 @@ def : ProcessorModel<"gfx700", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"bonaire", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; @@ -85,6 +81,14 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureISAVersion7_0_3] >; +def : ProcessorModel<"gfx704", SIQuarterSpeedModel, + [FeatureISAVersion7_0_4] +>; + +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, + [FeatureISAVersion7_0_4] +>; + //===----------------------------------------------------------------------===// // GCN GFX8 (Volcanic Islands (VI)). //===----------------------------------------------------------------------===// @@ -129,10 +133,6 @@ def : ProcessorModel<"polaris11", SIQuarterSpeedModel, [FeatureISAVersion8_0_3] >; -def : ProcessorModel<"gfx804", SIQuarterSpeedModel, - [FeatureISAVersion8_0_4] ->; - def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; @@ -149,14 +149,6 @@ def : ProcessorModel<"gfx900", SIQuarterSpeedModel, [FeatureISAVersion9_0_0] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureISAVersion9_0_1] ->; - def : ProcessorModel<"gfx902", SIQuarterSpeedModel, [FeatureISAVersion9_0_2] >; - -def : ProcessorModel<"gfx903", SIQuarterSpeedModel, - [FeatureISAVersion9_0_3] ->; diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 1204f86e4620..992bb7cceb6f 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,7 +12,7 @@ #include "SIRegisterInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index 32a199d510c5..e418aa0fe911 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -12,7 +12,7 @@ #include "AMDGPUSubtarget.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SlotIndexes.h" diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 155b400ba022..cd7ccb4ac316 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -37,7 +37,7 @@ static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs, ST.getOccupancyWithNumVGPRs(VGPRs)); return std::min(MinRegOccupancy, ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), - *MF.getFunction())); + MF.getFunction())); } void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { @@ -81,7 +81,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU Cand.AtTop = AtTop; // getDownwardPressure() and getUpwardPressure() make temporary changes to - // the the tracker, so we need to pass those function a non-const copy. + // the tracker, so we need to pass those function a non-const copy. RegPressureTracker &TempTracker = const_cast(RPTracker); std::vector Pressure; @@ -315,7 +315,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, ST(MF.getSubtarget()), MFI(*MF.getInfo()), StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), - *MF.getFunction())), + MF.getFunction())), MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); @@ -330,8 +330,9 @@ void GCNScheduleDAGMILive::schedule() { std::vector Unsched; Unsched.reserve(NumRegionInstrs); - for (auto &I : *this) + for (auto &I : *this) { Unsched.push_back(&I); + } GCNRegPressure PressureBefore; if (LIS) { @@ -387,10 +388,14 @@ void GCNScheduleDAGMILive::schedule() { DEBUG(dbgs() << "Attempting to revert scheduling.\n"); RegionEnd = RegionBegin; for (MachineInstr *MI : Unsched) { + if (MI->isDebugValue()) + continue; + if (MI->getIterator() != RegionEnd) { BB->remove(MI); BB->insert(RegionEnd, MI); - LIS->handleMove(*MI, true); + if (!MI->isDebugValue()) + LIS->handleMove(*MI, true); } // Reset read-undef flags and update them later. for (auto &Op : MI->operands()) @@ -398,13 +403,15 @@ void GCNScheduleDAGMILive::schedule() { Op.setIsUndef(false); RegisterOperands RegOpers; RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); - if (ShouldTrackLaneMasks) { - // Adjust liveness and add missing dead+read-undef flags. - SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); - RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); - } else { - // Adjust for missing dead-def flags. - RegOpers.detectDeadDefs(*MI, *LIS); + if (!MI->isDebugValue()) { + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } } RegionEnd = MI->getIterator(); ++RegionEnd; @@ -531,9 +538,8 @@ void GCNScheduleDAGMILive::finalizeSchedule() { } DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF.getName() - << ":BB#" << MBB->getNumber() << " " << MBB->getName() - << "\n From: " << *begin() << " To: "; + DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*MBB) << " " + << MBB->getName() << "\n From: " << *begin() << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index 2768e5c9984b..e189b7d0eb9c 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -267,6 +267,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, case AMDGPU::FLAT_SCR: O << "flat_scratch"; return; + case AMDGPU::XNACK_MASK: + O << "xnack_mask"; + return; case AMDGPU::VCC_LO: O << "vcc_lo"; return; @@ -297,6 +300,12 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, case AMDGPU::FLAT_SCR_HI: O << "flat_scratch_hi"; return; + case AMDGPU::XNACK_MASK_LO: + O << "xnack_mask_lo"; + return; + case AMDGPU::XNACK_MASK_HI: + O << "xnack_mask_hi"; + return; case AMDGPU::FP_REG: case AMDGPU::SP_REG: case AMDGPU::SCRATCH_WAVE_OFFSET_REG: @@ -335,25 +344,15 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O, } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 8; - } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) { + } else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) { O << 's'; NumRegs = 8; } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) { O << 'v'; NumRegs = 16; - } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) { + } else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) { O << 's'; NumRegs = 16; - } else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(RegNo)) { - O << "ttmp"; - NumRegs = 2; - // Trap temps start at offset 112. TODO: Get this from tablegen. - RegIdx -= 112; - } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(RegNo)) { - O << "ttmp"; - NumRegs = 4; - // Trap temps start at offset 112. TODO: Get this from tablegen. - RegIdx -= 112; } else { O << getRegisterName(RegNo); return; @@ -1264,7 +1263,10 @@ void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo, const unsigned Width = ((SImm16 & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; O << "hwreg("; - if (ID_SYMBOLIC_FIRST_ <= Id && Id < ID_SYMBOLIC_LAST_) { + unsigned Last = ID_SYMBOLIC_LAST_; + if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI) || AMDGPU::isVI(STI)) + Last = ID_SYMBOLIC_FIRST_GFX9_; + if (ID_SYMBOLIC_FIRST_ <= Id && Id < Last && IdSymbolic[Id]) { O << IdSymbolic[Id]; } else { O << Id; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 778d4a7ba9d0..d700acc34bc9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -198,9 +198,9 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { } // end anonymous namespace MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { // Use 64-bit ELF for amdgcn - return new ELFAMDGPUAsmBackend(T, TT); + return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple()); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp index 5a6dfb28b505..463e700f13b7 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -292,6 +292,8 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { Node = Func->getMetadata("kernel_arg_name"); if (Node && ArgNo < Node->getNumOperands()) Name = cast(Node->getOperand(ArgNo))->getString(); + else if (Arg.hasName()) + Name = Arg.getName(); StringRef TypeName; Node = Func->getMetadata("kernel_arg_type"); diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index 56bcff487174..1173dfd437ca 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -45,8 +45,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createAMDGPUAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr @@ -60,7 +61,9 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM +#define GET_INSTRINFO_SCHED_ENUM #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRINFO_SCHED_ENUM #undef GET_INSTRINFO_OPERAND_ENUM #undef GET_INSTRINFO_ENUM diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 94c0157edeb5..0d917a192fd9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -335,13 +335,24 @@ SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); - unsigned Reg = MO.getReg(); - RegEnc |= MRI.getEncodingValue(Reg); - RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; - if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { - RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; + if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { + RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; + } + return RegEnc; + } else { + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI); + if (Enc != ~0U && Enc != 255) { + return Enc | SDWA9EncValues::SRC_SGPR_MASK; + } } - return RegEnc; + + llvm_unreachable("Unsupported operand kind"); + return 0; } unsigned diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 99a018d2e245..9fd0abd9a3de 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -32,26 +32,45 @@ class MIMG_Helper op, string asm, RegisterClass dst_rc, RegisterClass addr_rc, + bit d16_bit=0, string dns=""> : MIMG_Helper < (outs dst_rc:$vdata), (ins addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe { let ssamp = 0; + let D16 = d16; +} + +multiclass MIMG_NoSampler_Src_Helper_Helper op, string asm, + RegisterClass dst_rc, + int channels, bit d16_bit, + string suffix> { + def _V1 # suffix : MIMG_NoSampler_Helper , + MIMG_Mask; + def _V2 # suffix : MIMG_NoSampler_Helper , + MIMG_Mask; + def _V4 # suffix : MIMG_NoSampler_Helper , + MIMG_Mask; } multiclass MIMG_NoSampler_Src_Helper op, string asm, RegisterClass dst_rc, int channels> { - def _V1 : MIMG_NoSampler_Helper , - MIMG_Mask; - def _V2 : MIMG_NoSampler_Helper , - MIMG_Mask; - def _V4 : MIMG_NoSampler_Helper , - MIMG_Mask; + defm : MIMG_NoSampler_Src_Helper_Helper ; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_NoSampler_Src_Helper_Helper ; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_NoSampler_Src_Helper_Helper ; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_NoSampler op, string asm> { @@ -63,30 +82,50 @@ multiclass MIMG_NoSampler op, string asm> { class MIMG_Store_Helper op, string asm, RegisterClass data_rc, - RegisterClass addr_rc> : MIMG_Helper < + RegisterClass addr_rc, + bit d16_bit=0, + string dns = ""> : MIMG_Helper < (outs), (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da" - >, MIMGe { + asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe { let ssamp = 0; - let mayLoad = 1; // TableGen requires this for matching with the intrinsics + let mayLoad = 0; let mayStore = 1; - let hasSideEffects = 1; + let hasSideEffects = 0; let hasPostISelHook = 0; let DisableWQM = 1; + let D16 = d16; +} + +multiclass MIMG_Store_Addr_Helper_Helper op, string asm, + RegisterClass data_rc, + int channels, bit d16_bit, + string suffix> { + def _V1 # suffix : MIMG_Store_Helper , + MIMG_Mask; + def _V2 # suffix : MIMG_Store_Helper , + MIMG_Mask; + def _V4 # suffix : MIMG_Store_Helper , + MIMG_Mask; } multiclass MIMG_Store_Addr_Helper op, string asm, RegisterClass data_rc, int channels> { - def _V1 : MIMG_Store_Helper , - MIMG_Mask; - def _V2 : MIMG_Store_Helper , - MIMG_Mask; - def _V4 : MIMG_Store_Helper , - MIMG_Mask; + defm : MIMG_Store_Addr_Helper_Helper ; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Store_Addr_Helper_Helper ; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Store_Addr_Helper_Helper ; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Store op, string asm> { @@ -102,10 +141,10 @@ class MIMG_Atomic_Helper { + asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> { + let mayLoad = 1; let mayStore = 1; - let hasSideEffects = 1; + let hasSideEffects = 1; // FIXME: Remove this let hasPostISelHook = 0; let DisableWQM = 1; let Constraints = "$vdst = $vdata"; @@ -158,30 +197,49 @@ class MIMG_Sampler_Helper op, string asm, RegisterClass dst_rc, RegisterClass src_rc, bit wqm, + bit d16_bit=0, string dns=""> : MIMG_Helper < (outs dst_rc:$vdata), (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe { let WQM = wqm; + let D16 = d16; +} + +multiclass MIMG_Sampler_Src_Helper_Helper op, string asm, + RegisterClass dst_rc, + int channels, bit wqm, + bit d16_bit, string suffix> { + def _V1 # suffix : MIMG_Sampler_Helper , + MIMG_Mask; + def _V2 # suffix : MIMG_Sampler_Helper , + MIMG_Mask; + def _V4 # suffix : MIMG_Sampler_Helper , + MIMG_Mask; + def _V8 # suffix : MIMG_Sampler_Helper , + MIMG_Mask; + def _V16 # suffix : MIMG_Sampler_Helper , + MIMG_Mask; } multiclass MIMG_Sampler_Src_Helper op, string asm, RegisterClass dst_rc, int channels, bit wqm> { - def _V1 : MIMG_Sampler_Helper , - MIMG_Mask; - def _V2 : MIMG_Sampler_Helper , - MIMG_Mask; - def _V4 : MIMG_Sampler_Helper , - MIMG_Mask; - def _V8 : MIMG_Sampler_Helper , - MIMG_Mask; - def _V16 : MIMG_Sampler_Helper , - MIMG_Mask; + defm : MIMG_Sampler_Src_Helper_Helper ; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Sampler_Src_Helper_Helper ; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Sampler_Src_Helper_Helper ; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Sampler op, string asm, bit wqm=0> { @@ -195,12 +253,12 @@ multiclass MIMG_Sampler_WQM op, string asm> : MIMG_Sampler; class MIMG_Gather_Helper op, string asm, RegisterClass dst_rc, - RegisterClass src_rc, bit wqm> : MIMG < + RegisterClass src_rc, bit wqm, bit d16_bit=0> : MIMG < (outs dst_rc:$vdata), (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp, dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc, r128:$r128, tfe:$tfe, lwe:$lwe, da:$da), - asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da", + asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), []>, MIMGe { let mayLoad = 1; let mayStore = 0; @@ -215,23 +273,42 @@ class MIMG_Gather_Helper op, string asm, let Gather4 = 1; let hasPostISelHook = 0; let WQM = wqm; + let D16 = d16; let isAsmParserOnly = 1; // TBD: fix it later } + +multiclass MIMG_Gather_Src_Helper_Helper op, string asm, + RegisterClass dst_rc, + int channels, bit wqm, + bit d16_bit, string suffix> { + def _V1 # suffix : MIMG_Gather_Helper , + MIMG_Mask; + def _V2 # suffix : MIMG_Gather_Helper , + MIMG_Mask; + def _V4 # suffix : MIMG_Gather_Helper , + MIMG_Mask; + def _V8 # suffix : MIMG_Gather_Helper , + MIMG_Mask; + def _V16 # suffix : MIMG_Gather_Helper , + MIMG_Mask; +} + multiclass MIMG_Gather_Src_Helper op, string asm, RegisterClass dst_rc, int channels, bit wqm> { - def _V1 : MIMG_Gather_Helper , - MIMG_Mask; - def _V2 : MIMG_Gather_Helper , - MIMG_Mask; - def _V4 : MIMG_Gather_Helper , - MIMG_Mask; - def _V8 : MIMG_Gather_Helper , - MIMG_Mask; - def _V16 : MIMG_Gather_Helper , - MIMG_Mask; + defm : MIMG_Gather_Src_Helper_Helper; + + let d16 = 1 in { + let SubtargetPredicate = HasPackedD16VMem in { + defm : MIMG_Gather_Src_Helper_Helper; + } // End HasPackedD16VMem. + + let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in { + defm : MIMG_Gather_Src_Helper_Helper; + } // End HasUnpackedD16VMem. + } // End d16 = 1. } multiclass MIMG_Gather op, string asm, bit wqm=0> { @@ -257,7 +334,11 @@ defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">; defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">; //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>; //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>; + +let mayLoad = 0, mayStore = 0 in { defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">; +} + defm IMAGE_ATOMIC_SWAP : MIMG_Atomic , "image_atomic_swap">; defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic , "image_atomic_cmpswap", VReg_64>; defm IMAGE_ATOMIC_ADD : MIMG_Atomic , "image_atomic_add">; @@ -331,7 +412,11 @@ defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">; defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">; defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">; defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">; + +let mayLoad = 0, mayStore = 0 in { defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">; +} + defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">; defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">; defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">; @@ -348,29 +433,11 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" /********** Image sampling patterns **********/ /********** ======================= **********/ -// Image + sampler -class SampleRawPattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, - i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), - (opcode $addr, $rsrc, $sampler, - (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) ->; - -multiclass SampleRawPatterns { - def : SampleRawPattern(opcode # _V4_V1), i32>; - def : SampleRawPattern(opcode # _V4_V2), v2i32>; - def : SampleRawPattern(opcode # _V4_V4), v4i32>; - def : SampleRawPattern(opcode # _V4_V8), v8i32>; - def : SampleRawPattern(opcode # _V4_V16), v16i32>; -} - -// Image + sampler for amdgcn +// ImageSample for amdgcn // TODO: -// 1. Handle half data type like v4f16, and add D16 bit support; -// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). -// 3. Add A16 support when we pass address of half type. -multiclass AMDGCNSamplePattern { +// 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). +// 2. Add A16 support when we pass address of half type. +multiclass ImageSamplePattern { def : GCNPat< (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), @@ -380,36 +447,44 @@ multiclass AMDGCNSamplePattern; } -multiclass AMDGCNSampleDataPatterns { - defm : AMDGCNSamplePattern(opcode # _V1), dt, f32>; - defm : AMDGCNSamplePattern(opcode # _V2), dt, v2f32>; - defm : AMDGCNSamplePattern(opcode # _V4), dt, v4f32>; - defm : AMDGCNSamplePattern(opcode # _V8), dt, v8f32>; - defm : AMDGCNSamplePattern(opcode # _V16), dt, v16f32>; +multiclass ImageSampleDataPatterns { + defm : ImageSamplePattern(opcode # _V1 # suffix), dt, f32>; + defm : ImageSamplePattern(opcode # _V2 # suffix), dt, v2f32>; + defm : ImageSamplePattern(opcode # _V4 # suffix), dt, v4f32>; + defm : ImageSamplePattern(opcode # _V8 # suffix), dt, v8f32>; + defm : ImageSamplePattern(opcode # _V16 # suffix), dt, v16f32>; } -// TODO: support v3f32. -multiclass AMDGCNSamplePatterns { - defm : AMDGCNSampleDataPatterns(opcode # _V1), f32>; - defm : AMDGCNSampleDataPatterns(opcode # _V2), v2f32>; - defm : AMDGCNSampleDataPatterns(opcode # _V4), v4f32>; +// ImageSample patterns. +multiclass ImageSamplePatterns { + defm : ImageSampleDataPatterns(opcode # _V1), f32>; + defm : ImageSampleDataPatterns(opcode # _V2), v2f32>; + defm : ImageSampleDataPatterns(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V1), f16, "_D16">; + defm : ImageSampleDataPatterns(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. } -// Image only -class ImagePattern : GCNPat < - (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, - imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe), - (opcode $addr, $rsrc, - (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc), - (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da)) ->; +// ImageSample alternative patterns for illegal vector half Types. +multiclass ImageSampleAltPatterns { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageSampleDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnpackedD16VMem. -multiclass ImagePatterns { - def : ImagePattern(opcode # _V4_V1), i32>; - def : ImagePattern(opcode # _V4_V2), v2i32>; - def : ImagePattern(opcode # _V4_V4), v4i32>; + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageSampleDataPatterns(opcode # _V1), i32, "_D16">; + defm : ImageSampleDataPatterns(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. } +// ImageLoad for amdgcn. multiclass ImageLoadPattern { def : GCNPat < (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, @@ -420,19 +495,43 @@ multiclass ImageLoadPattern; } -multiclass ImageLoadDataPatterns { - defm : ImageLoadPattern(opcode # _V1), dt, i32>; - defm : ImageLoadPattern(opcode # _V2), dt, v2i32>; - defm : ImageLoadPattern(opcode # _V4), dt, v4i32>; +multiclass ImageLoadDataPatterns { + defm : ImageLoadPattern(opcode # _V1 # suffix), dt, i32>; + defm : ImageLoadPattern(opcode # _V2 # suffix), dt, v2i32>; + defm : ImageLoadPattern(opcode # _V4 # suffix), dt, v4i32>; } +// ImageLoad patterns. // TODO: support v3f32. multiclass ImageLoadPatterns { defm : ImageLoadDataPatterns(opcode # _V1), f32>; defm : ImageLoadDataPatterns(opcode # _V2), v2f32>; defm : ImageLoadDataPatterns(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageLoadDataPatterns(opcode # _V1), f16, "_D16">; + defm : ImageLoadDataPatterns(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. +} + +// ImageLoad alternative patterns for illegal vector half Types. +multiclass ImageLoadAltPatterns { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageLoadDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageLoadDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnPackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageLoadDataPatterns(opcode # _V1), i32, "_D16">; + defm : ImageLoadDataPatterns(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. } +// ImageStore for amdgcn. multiclass ImageStorePattern { def : GCNPat < (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, @@ -443,30 +542,56 @@ multiclass ImageStorePattern; } -multiclass ImageStoreDataPatterns { - defm : ImageStorePattern(opcode # _V1), dt, i32>; - defm : ImageStorePattern(opcode # _V2), dt, v2i32>; - defm : ImageStorePattern(opcode # _V4), dt, v4i32>; +multiclass ImageStoreDataPatterns { + defm : ImageStorePattern(opcode # _V1 # suffix), dt, i32>; + defm : ImageStorePattern(opcode # _V2 # suffix), dt, v2i32>; + defm : ImageStorePattern(opcode # _V4 # suffix), dt, v4i32>; } +// ImageStore patterns. // TODO: support v3f32. multiclass ImageStorePatterns { defm : ImageStoreDataPatterns(opcode # _V1), f32>; defm : ImageStoreDataPatterns(opcode # _V2), v2f32>; defm : ImageStoreDataPatterns(opcode # _V4), v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageStoreDataPatterns(opcode # _V1), f16, "_D16">; + defm : ImageStoreDataPatterns(opcode # _V1), v2f16, "_D16">; + } // End HasPackedD16VMem. +} + +// ImageStore alternative patterns. +multiclass ImageStoreAltPatterns { + let SubtargetPredicate = HasUnpackedD16VMem in { + defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16_gfx80">; + defm : ImageStoreDataPatterns(opcode # _V4), v4i32, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + defm : ImageStoreDataPatterns(opcode # _V1), i32, "_D16">; + defm : ImageStoreDataPatterns(opcode # _V2), v2i32, "_D16">; + } // End HasPackedD16VMem. } +// ImageAtomic for amdgcn. class ImageAtomicPattern : GCNPat < (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)) >; +// ImageAtomic patterns. multiclass ImageAtomicPatterns { def : ImageAtomicPattern(opcode # _V1), i32>; def : ImageAtomicPattern(opcode # _V2), v2i32>; def : ImageAtomicPattern(opcode # _V4), v4i32>; } +// ImageAtomicCmpSwap for amdgcn. class ImageAtomicCmpSwapPattern : GCNPat < (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), @@ -478,93 +603,180 @@ class ImageAtomicCmpSwapPattern : GCNPat < // ======= amdgcn Image Intrinsics ============== -// Image load +// Image load. defm : ImageLoadPatterns; defm : ImageLoadPatterns; defm : ImageLoadPatterns; - -// Image store -defm : ImageStorePatterns; -defm : ImageStorePatterns; - -// Basic sample -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -// Sample with comparison -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -// Sample with offsets -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -// Sample with comparison and offsets -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -// Gather opcodes -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; -defm : AMDGCNSamplePatterns; - -defm : AMDGCNSamplePatterns; +defm : ImageLoadAltPatterns; +defm : ImageLoadAltPatterns; + +// Image store. +defm : ImageStorePatterns; +defm : ImageStorePatterns; +defm : ImageStoreAltPatterns; +defm : ImageStoreAltPatterns; + +// Basic sample. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Sample with comparison. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Sample with offsets. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Sample with comparison and offsets. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Basic gather4. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Gather4 with comparison. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Gather4 with offsets. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Gather4 with comparison and offsets. +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; +defm : ImageSamplePatterns; + +// Basic sample alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Sample with comparison alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Sample with offsets alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Sample with comparison and offsets alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Basic gather4 alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Gather4 with comparison alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Gather4 with offsets alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +// Gather4 with comparison and offsets alternative. +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; +defm : ImageSampleAltPatterns; + +defm : ImageSamplePatterns; // Image atomics defm : ImageAtomicPatterns; diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp index 8db66e600ecb..5e1ba6b506da 100644 --- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -180,7 +180,7 @@ bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, } bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; const R600Subtarget &ST = MF.getSubtarget(); diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index be6a45da1161..0e788df1c9c0 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -512,14 +512,14 @@ class R600ControlFlowFinalizer : public MachineFunctionPass { R600MachineFunctionInfo *MFI = MF.getInfo(); - CFStack CFStack(ST, MF.getFunction()->getCallingConv()); + CFStack CFStack(ST, MF.getFunction().getCallingConv()); for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; ++MB) { MachineBasicBlock &MBB = *MB; unsigned CfCount = 0; std::vector>> LoopStack; std::vector IfThenElseStack; - if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) { + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), getHWInstrDesc(CF_CALL_FS)); CfCount++; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 0d62c5a32d4d..66291d0be4e6 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -211,6 +211,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + if (!Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + } + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 21945c4cce13..23e646c8147c 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -197,7 +197,7 @@ bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); - return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && + return !AMDGPU::isCompute(MF->getFunction().getCallingConv()) && usesVertexCache(MI.getOpcode()); } @@ -207,7 +207,7 @@ bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); - return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && + return (AMDGPU::isCompute(MF->getFunction().getCallingConv()) && usesVertexCache(MI.getOpcode())) || usesTextureCache(MI.getOpcode()); } diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index f422f441af4f..801e4e61fca6 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -989,7 +989,10 @@ class MULADD_IEEE_Common inst> : R600_3OP < class FMA_Common inst> : R600_3OP < inst, "FMA", [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU ->; +> +{ + let OtherPredicates = [FMA]; +} class CNDE_Common inst> : R600_3OP < inst, "CNDE", diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index 972e61d376dd..4a14d95f1cc4 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -12,16 +12,16 @@ /// common data and/or have enough undef subreg using swizzle abilities. /// /// For instance let's consider the following pseudo code : -/// vreg5 = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 +/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 /// ... -/// vreg7 = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 +/// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 +/// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 /// /// is turned into : -/// vreg5 = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 +/// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 /// ... -/// vreg7 = INSERT_SUBREG vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 +/// %7 = INSERT_SUBREG %4, sub3 +/// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 /// /// This allow regalloc to reduce register pressure for vector registers and /// to reduce MOV count. @@ -336,7 +336,7 @@ void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { } bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; const R600Subtarget &ST = Fn.getSubtarget(); diff --git a/lib/Target/AMDGPU/R600Processors.td b/lib/Target/AMDGPU/R600Processors.td index 8ef1fe191c31..89194dc1bdf6 100644 --- a/lib/Target/AMDGPU/R600Processors.td +++ b/lib/Target/AMDGPU/R600Processors.td @@ -24,7 +24,7 @@ def : Processor<"rs880", R600_VLIW5_Itin, >; def : Processor<"rv670", R600_VLIW5_Itin, - [FeatureR600, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureR600, FeatureWavefrontSize64, FeatureVertexCache] >; //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ def : Processor<"rv730", R600_VLIW5_Itin, >; def : Processor<"rv770", R600_VLIW5_Itin, - [FeatureR700, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureR700, FeatureWavefrontSize64, FeatureVertexCache] >; //===----------------------------------------------------------------------===// @@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin, >; def : Processor<"cypress", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA] >; def : Processor<"juniper", R600_VLIW5_Itin, @@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin, >; def : Processor<"cayman", R600_VLIW4_Itin, - [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA] + [FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA] >; def : Processor<"turks", R600_VLIW5_Itin, diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index 150d8c3dc3d3..97983ea21edd 100644 --- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -422,7 +422,11 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { openIf(Term); } - assert(Stack.empty()); + if (!Stack.empty()) { + // CFG was probably not structured. + report_fatal_error("failed to annotate CFG"); + } + return true; } diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 23bdd6953254..1b93c2f5248b 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -85,7 +85,10 @@ enum : uint64_t { ClampHi = UINT64_C(1) << 48, // Is a packed VOP3P instruction. - IsPacked = UINT64_C(1) << 49 + IsPacked = UINT64_C(1) << 49, + + // "d16" bit set or not. + D16 = UINT64_C(1) << 50 }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. @@ -137,7 +140,6 @@ namespace AMDGPU { OPERAND_INPUT_MODS, // Operand for SDWA instructions - OPERAND_SDWA_SRC, OPERAND_SDWA_VOPC_DST, /// Operand with 32-bit immediate that uses the constant bus. @@ -194,8 +196,10 @@ namespace EncValues { // Encoding values of enum9/8/7 operands enum { SGPR_MIN = 0, SGPR_MAX = 101, - TTMP_MIN = 112, - TTMP_MAX = 123, + TTMP_VI_MIN = 112, + TTMP_VI_MAX = 123, + TTMP_GFX9_MIN = 108, + TTMP_GFX9_MAX = 123, INLINE_INTEGER_C_MIN = 128, INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 INLINE_INTEGER_C_MAX = 208, @@ -271,8 +275,9 @@ enum Id { // HwRegCode, (6) [5:0] ID_GPR_ALLOC = 5, ID_LDS_ALLOC = 6, ID_IB_STS = 7, - ID_SYMBOLIC_LAST_ = 8, ID_MEM_BASES = 15, + ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES, + ID_SYMBOLIC_LAST_ = 16, ID_SHIFT_ = 0, ID_WIDTH_ = 6, ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_) @@ -368,6 +373,8 @@ enum SDWA9EncValues{ SRC_VGPR_MAX = 255, SRC_SGPR_MIN = 256, SRC_SGPR_MAX = 357, + SRC_TTMP_MIN = 364, + SRC_TTMP_MAX = 379, }; } // namespace SDWA diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 34b1f758f7b5..8b155c2d2780 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -14,46 +14,46 @@ /// Register Class is the union of and /// /// BB0: -/// %vreg0 = SCALAR_INST -/// %vreg1 = COPY %vreg0 +/// %0 = SCALAR_INST +/// %1 = COPY %0 /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 = VECTOR_INST -/// %vreg3 = COPY %vreg2 +/// %2 = VECTOR_INST +/// %3 = COPY %2 /// BB2: -/// %vreg4 = PHI %vreg1 , , %vreg3 , -/// %vreg5 = VECTOR_INST %vreg4 +/// %4 = PHI %1 , <%bb.0>, %3 , <%bb.1> +/// %5 = VECTOR_INST %4 /// /// /// The coalescer will begin at BB0 and eliminate its copy, then the resulting /// code will look like this: /// /// BB0: -/// %vreg0 = SCALAR_INST +/// %0 = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 = VECTOR_INST -/// %vreg3 = COPY %vreg2 +/// %2 = VECTOR_INST +/// %3 = COPY %2 /// BB2: -/// %vreg4 = PHI %vreg0 , , %vreg3 , -/// %vreg5 = VECTOR_INST %vreg4 +/// %4 = PHI %0 , <%bb.0>, %3 , <%bb.1> +/// %5 = VECTOR_INST %4 /// /// Now that the result of the PHI instruction is an SGPR, the register -/// allocator is now forced to constrain the register class of %vreg3 to +/// allocator is now forced to constrain the register class of %3 to /// so we end up with final code like this: /// /// BB0: -/// %vreg0 = SCALAR_INST +/// %0 = SCALAR_INST /// ... /// BRANCH %cond BB1, BB2 /// BB1: -/// %vreg2 = VECTOR_INST -/// %vreg3 = COPY %vreg2 +/// %2 = VECTOR_INST +/// %3 = COPY %2 /// BB2: -/// %vreg4 = PHI %vreg0 , , %vreg3 , -/// %vreg5 = VECTOR_INST %vreg4 +/// %4 = PHI %0 , <%bb.0>, %3 , <%bb.1> +/// %5 = VECTOR_INST %4 /// /// Now this code contains an illegal copy from a VGPR to an SGPR. /// @@ -81,6 +81,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" @@ -109,7 +110,12 @@ namespace { class SIFixSGPRCopies : public MachineFunctionPass { MachineDominatorTree *MDT; - + MachinePostDominatorTree *MPDT; + DenseMap> PDF; + void computePDF(MachineFunction * MF); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void printPDF(); +#endif public: static char ID; @@ -122,6 +128,8 @@ class SIFixSGPRCopies : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -409,12 +417,6 @@ bool searchPredecessors(const MachineBasicBlock *MBB, return false; } -static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI) { - return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { - return hasTerminatorThatModifiesExec(*MBB, *TRI); }); -} - // Checks if there is potential path From instruction To instruction. // If CutOff is specified and it sits in between of that path we ignore // a higher portion of the path and report it is not reachable. @@ -513,8 +515,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, if (MDT.dominates(MI1, MI2)) { if (!intereferes(MI2, MI1)) { - DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber() - << " " << *MI2); + DEBUG(dbgs() << "Erasing from " + << printMBBReference(*MI2->getParent()) << " " + << *MI2); MI2->eraseFromParent(); Defs.erase(I2++); Changed = true; @@ -522,8 +525,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } else if (MDT.dominates(MI2, MI1)) { if (!intereferes(MI1, MI2)) { - DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() - << " " << *MI1); + DEBUG(dbgs() << "Erasing from " + << printMBBReference(*MI1->getParent()) << " " + << *MI1); MI1->eraseFromParent(); Defs.erase(I1++); Changed = true; @@ -539,10 +543,11 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); if (!intereferes(MI1, I) && !intereferes(MI2, I)) { - DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() - << " " << *MI1 << "and moving from BB#" - << MI2->getParent()->getNumber() << " to BB#" - << I->getParent()->getNumber() << " " << *MI2); + DEBUG(dbgs() << "Erasing from " + << printMBBReference(*MI1->getParent()) << " " << *MI1 + << "and moving from " + << printMBBReference(*MI2->getParent()) << " to " + << printMBBReference(*I->getParent()) << " " << *MI2); I->getParent()->splice(I, MI2->getParent(), MI2); MI1->eraseFromParent(); Defs.erase(I1++); @@ -562,12 +567,47 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, return Changed; } +void SIFixSGPRCopies::computePDF(MachineFunction *MF) { + MachineFunction::iterator B = MF->begin(); + MachineFunction::iterator E = MF->end(); + for (; B != E; ++B) { + if (B->succ_size() > 1) { + for (auto S : B->successors()) { + MachineDomTreeNode *runner = MPDT->getNode(&*S); + MachineDomTreeNode *sentinel = MPDT->getNode(&*B)->getIDom(); + while (runner && runner != sentinel) { + PDF[runner->getBlock()].insert(&*B); + runner = runner->getIDom(); + } + } + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SIFixSGPRCopies::printPDF() { + dbgs() << "\n######## PostDominanceFrontiers set #########\n"; + for (auto &I : PDF) { + dbgs() << "PDF[ " << I.first->getNumber() << "] : "; + for (auto &J : I.second) { + dbgs() << J->getNumber() << ' '; + } + dbgs() << '\n'; + } + dbgs() << "\n##############################################\n"; +} +#endif + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); MDT = &getAnalysis(); + MPDT = &getAnalysis(); + PDF.clear(); + computePDF(&MF); + DEBUG(printPDF()); SmallVector Worklist; @@ -621,15 +661,27 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) break; - // We don't need to fix the PHI if the common dominator of the - // two incoming blocks terminates with a uniform branch. + // We don't need to fix the PHI if all the source blocks + // have no divergent control dependecies bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); - if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { - MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); - MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - - if (!predsHasDivergentTerminator(MBB0, TRI) && - !predsHasDivergentTerminator(MBB1, TRI)) { + if (!HasVGPROperand) { + bool Uniform = true; + MachineBasicBlock * Join = MI.getParent(); + for (auto &O : MI.explicit_operands()) { + if (O.isMBB()) { + MachineBasicBlock * Source = O.getMBB(); + SetVector &SourcePDF = PDF[Source]; + SetVector &JoinPDF = PDF[Join]; + SetVector CDList; + for (auto &I : SourcePDF) { + if (!JoinPDF.count(I) || /* back edge */MDT->dominates(Join, I)) { + if (hasTerminatorThatModifiesExec(*I, *TRI)) + Uniform = false; + } + } + } + } + if (Uniform) { DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n'); break; } diff --git a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp index 47db89825372..3493c7775f0c 100644 --- a/lib/Target/AMDGPU/SIFixWWMLiveness.cpp +++ b/lib/Target/AMDGPU/SIFixWWMLiveness.cpp @@ -17,8 +17,8 @@ /// %vgpr0 = V_MOV_B32_e32 0.0 /// if (...) { /// %vgpr1 = ... -/// %vgpr2 = WWM %vgpr1 -/// ... = %vgpr2 +/// %vgpr2 = WWM killed %vgpr1 +/// ... = killed %vgpr2 /// %vgpr0 = V_MOV_B32_e32 1.0 /// } /// ... = %vgpr0 @@ -57,7 +57,7 @@ #include "SIRegisterInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 0fa6712527fa..783181980342 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -14,7 +14,7 @@ #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -290,11 +290,11 @@ void SIFoldOperands::foldOperand( // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // - // %vreg1 = COPY %vreg0:sub1 - // %vreg2 = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1 + // %1 = COPY %0:sub1 + // %2 = V_MAC_{F16, F32} %3, %4, %1 // // into - // %vreg2 = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1 + // %2 = V_MAC_{F16, F32} %3, %4, %0:sub1 if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } @@ -926,7 +926,7 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) { } bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -971,9 +971,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // - // %vreg3 = COPY %VGPR0; VGPR_32:%vreg3 + // %3 = COPY %vgpr0; VGPR_32:%3 // ... - // %VGPR0 = V_MOV_B32_e32 1, %EXEC + // %vgpr0 = V_MOV_B32_e32 1, implicit %exec MachineOperand &Dst = MI.getOperand(0); if (Dst.isReg() && !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 08a7419612bd..89bb98dbd028 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -394,7 +394,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, // We now have the GIT ptr - now get the scratch descriptor from the entry // at offset 0. PointerType *PtrTy = - PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()), AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); @@ -425,7 +425,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, if (MFI->hasImplicitBufferPtr()) { unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); - if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) { const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64); BuildMI(MBB, I, DL, Mov64, Rsrc01) @@ -435,7 +435,7 @@ void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM); PointerType *PtrTy = - PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + PointerType::get(Type::getInt64Ty(MF.getFunction().getContext()), AMDGPUAS::CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); auto MMO = MF.getMachineMemOperand(PtrInfo, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 2561f7f09fe5..7dc9dcf31fcb 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -207,11 +207,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i1, Expand); @@ -226,6 +229,14 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::ADDCARRY, MVT::i32, Legal); setOperationAction(ISD::SUBCARRY, MVT::i32, Legal); +#if 0 + setOperationAction(ISD::ADDCARRY, MVT::i64, Legal); + setOperationAction(ISD::SUBCARRY, MVT::i64, Legal); +#endif + + //setOperationAction(ISD::ADDC, MVT::i64, Expand); + //setOperationAction(ISD::SUBC, MVT::i64, Expand); + // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, @@ -550,19 +561,239 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, + MachineFunction &MF, unsigned IntrID) const { switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: { + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(CI.getType()); Info.ptrVal = CI.getOperand(0); Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); - Info.vol = !Vol || !Vol->isZero(); - Info.readMem = true; - Info.writeMem = true; + if (!Vol || !Vol->isZero()) + Info.flags |= MachineMemOperand::MOVolatile; + + return true; + } + + // Image load. + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: + + // Sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(1)); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + Info.align = 0; + return true; + } + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_image_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(3)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_tbuffer_load: + case Intrinsic::amdgcn_buffer_load: + case Intrinsic::amdgcn_buffer_load_format: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(0)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + + // There is a constant offset component, but there are additional register + // offsets which could break AA if we set the offset to anything non-0. + return true; + } + case Intrinsic::amdgcn_tbuffer_store: + case Intrinsic::amdgcn_buffer_store: + case Intrinsic::amdgcn_buffer_store_format: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_VOID; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(1)); + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_buffer_atomic_swap: + case Intrinsic::amdgcn_buffer_atomic_add: + case Intrinsic::amdgcn_buffer_atomic_sub: + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_buffer_atomic_xor: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(1)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_buffer_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + Info.memVT = MVT::getVT(CI.getType()); + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOVolatile; return true; } default: @@ -575,7 +806,10 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, Type *&AccessTy) const { switch (II->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: { + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: { Value *Ptr = II->getArgOperand(0); AccessTy = II->getType(); Ops.push_back(Ptr); @@ -1450,14 +1684,14 @@ SDValue SITargetLowering::LowerFormalArguments( const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); - FunctionType *FType = MF.getFunction()->getFunctionType(); + FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); const SISubtarget &ST = MF.getSubtarget(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { - const Function *Fn = MF.getFunction(); + const Function &Fn = MF.getFunction(); DiagnosticInfoUnsupported NoGraphicsHSA( - *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc()); + Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc()); DAG.getContext()->diagnose(NoGraphicsHSA); return DAG.getEntryNode(); } @@ -1686,7 +1920,7 @@ SDValue SITargetLowering::LowerFormalArguments( auto &ArgUsageInfo = DAG.getPass()->getAnalysis(); - ArgUsageInfo.setFuncArgInfo(*MF.getFunction(), Info->getArgInfo()); + ArgUsageInfo.setFuncArgInfo(MF.getFunction(), Info->getArgInfo()); unsigned StackArgSize = CCInfo.getNextStackOffset(); Info->setBytesInStackArgArea(StackArgSize); @@ -2022,8 +2256,8 @@ bool SITargetLowering::isEligibleForTailCallOptimization( return false; MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); @@ -2044,7 +2278,7 @@ bool SITargetLowering::isEligibleForTailCallOptimization( if (IsVarArg) return false; - for (const Argument &Arg : CallerF->args()) { + for (const Argument &Arg : CallerF.args()) { if (Arg.hasByValAttr()) return false; } @@ -2262,8 +2496,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned LocMemOffset = VA.getLocMemOffset(); int32_t Offset = LocMemOffset; - SDValue PtrOff = DAG.getConstant(Offset, DL, MVT::i32); - PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); + + SDValue PtrOff = DAG.getObjectPtrOffset(DL, StackPtr, Offset); if (IsTailCall) { ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; @@ -2273,8 +2507,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Offset = Offset + FPDiff; int FI = MFI.CreateFixedObject(OpSize, Offset, true); - DstAddr = DAG.getFrameIndex(FI, PtrVT); - DstAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, DstAddr, StackPtr); + DstAddr = DAG.getObjectPtrOffset(DL, DAG.getFrameIndex(FI, PtrVT), + StackPtr); DstInfo = MachinePointerInfo::getFixedStack(MF, FI); // Make sure any stack arguments overlapping with where we're storing @@ -2936,21 +3170,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( SIMachineFunctionInfo *MFI = MF->getInfo(); if (TII->isMIMG(MI)) { - if (!MI.memoperands_empty()) - return BB; + if (MI.memoperands_empty() && MI.mayLoadOrStore()) { + report_fatal_error("missing mem operand from MIMG instruction"); + } // Add a memoperand for mimg instructions so that they aren't assumed to // be ordered memory instuctions. - MachinePointerInfo PtrInfo(MFI->getImagePSV()); - MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; - if (MI.mayStore()) - Flags |= MachineMemOperand::MOStore; - - if (MI.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - - auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); - MI.addMemOperand(*MF, MMO); return BB; } @@ -3285,6 +3510,350 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +static unsigned getImageOpcode(unsigned IID) { + switch (IID) { + case Intrinsic::amdgcn_image_load: + return AMDGPUISD::IMAGE_LOAD; + case Intrinsic::amdgcn_image_load_mip: + return AMDGPUISD::IMAGE_LOAD_MIP; + + // Basic sample. + case Intrinsic::amdgcn_image_sample: + return AMDGPUISD::IMAGE_SAMPLE; + case Intrinsic::amdgcn_image_sample_cl: + return AMDGPUISD::IMAGE_SAMPLE_CL; + case Intrinsic::amdgcn_image_sample_d: + return AMDGPUISD::IMAGE_SAMPLE_D; + case Intrinsic::amdgcn_image_sample_d_cl: + return AMDGPUISD::IMAGE_SAMPLE_D_CL; + case Intrinsic::amdgcn_image_sample_l: + return AMDGPUISD::IMAGE_SAMPLE_L; + case Intrinsic::amdgcn_image_sample_b: + return AMDGPUISD::IMAGE_SAMPLE_B; + case Intrinsic::amdgcn_image_sample_b_cl: + return AMDGPUISD::IMAGE_SAMPLE_B_CL; + case Intrinsic::amdgcn_image_sample_lz: + return AMDGPUISD::IMAGE_SAMPLE_LZ; + case Intrinsic::amdgcn_image_sample_cd: + return AMDGPUISD::IMAGE_SAMPLE_CD; + case Intrinsic::amdgcn_image_sample_cd_cl: + return AMDGPUISD::IMAGE_SAMPLE_CD_CL; + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + return AMDGPUISD::IMAGE_SAMPLE_C; + case Intrinsic::amdgcn_image_sample_c_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_CL; + case Intrinsic::amdgcn_image_sample_c_d: + return AMDGPUISD::IMAGE_SAMPLE_C_D; + case Intrinsic::amdgcn_image_sample_c_d_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_D_CL; + case Intrinsic::amdgcn_image_sample_c_l: + return AMDGPUISD::IMAGE_SAMPLE_C_L; + case Intrinsic::amdgcn_image_sample_c_b: + return AMDGPUISD::IMAGE_SAMPLE_C_B; + case Intrinsic::amdgcn_image_sample_c_b_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_B_CL; + case Intrinsic::amdgcn_image_sample_c_lz: + return AMDGPUISD::IMAGE_SAMPLE_C_LZ; + case Intrinsic::amdgcn_image_sample_c_cd: + return AMDGPUISD::IMAGE_SAMPLE_C_CD; + case Intrinsic::amdgcn_image_sample_c_cd_cl: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL; + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + return AMDGPUISD::IMAGE_SAMPLE_O; + case Intrinsic::amdgcn_image_sample_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_CL_O; + case Intrinsic::amdgcn_image_sample_d_o: + return AMDGPUISD::IMAGE_SAMPLE_D_O; + case Intrinsic::amdgcn_image_sample_d_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_D_CL_O; + case Intrinsic::amdgcn_image_sample_l_o: + return AMDGPUISD::IMAGE_SAMPLE_L_O; + case Intrinsic::amdgcn_image_sample_b_o: + return AMDGPUISD::IMAGE_SAMPLE_B_O; + case Intrinsic::amdgcn_image_sample_b_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_B_CL_O; + case Intrinsic::amdgcn_image_sample_lz_o: + return AMDGPUISD::IMAGE_SAMPLE_LZ_O; + case Intrinsic::amdgcn_image_sample_cd_o: + return AMDGPUISD::IMAGE_SAMPLE_CD_O; + case Intrinsic::amdgcn_image_sample_cd_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_CD_CL_O; + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + return AMDGPUISD::IMAGE_SAMPLE_C_O; + case Intrinsic::amdgcn_image_sample_c_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CL_O; + case Intrinsic::amdgcn_image_sample_c_d_o: + return AMDGPUISD::IMAGE_SAMPLE_C_D_O; + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O; + case Intrinsic::amdgcn_image_sample_c_l_o: + return AMDGPUISD::IMAGE_SAMPLE_C_L_O; + case Intrinsic::amdgcn_image_sample_c_b_o: + return AMDGPUISD::IMAGE_SAMPLE_C_B_O; + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O; + case Intrinsic::amdgcn_image_sample_c_lz_o: + return AMDGPUISD::IMAGE_SAMPLE_C_LZ_O; + case Intrinsic::amdgcn_image_sample_c_cd_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_O; + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + return AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O; + + // Basic gather4. + case Intrinsic::amdgcn_image_gather4: + return AMDGPUISD::IMAGE_GATHER4; + case Intrinsic::amdgcn_image_gather4_cl: + return AMDGPUISD::IMAGE_GATHER4_CL; + case Intrinsic::amdgcn_image_gather4_l: + return AMDGPUISD::IMAGE_GATHER4_L; + case Intrinsic::amdgcn_image_gather4_b: + return AMDGPUISD::IMAGE_GATHER4_B; + case Intrinsic::amdgcn_image_gather4_b_cl: + return AMDGPUISD::IMAGE_GATHER4_B_CL; + case Intrinsic::amdgcn_image_gather4_lz: + return AMDGPUISD::IMAGE_GATHER4_LZ; + + // Gather4 with comparison. + case Intrinsic::amdgcn_image_gather4_c: + return AMDGPUISD::IMAGE_GATHER4_C; + case Intrinsic::amdgcn_image_gather4_c_cl: + return AMDGPUISD::IMAGE_GATHER4_C_CL; + case Intrinsic::amdgcn_image_gather4_c_l: + return AMDGPUISD::IMAGE_GATHER4_C_L; + case Intrinsic::amdgcn_image_gather4_c_b: + return AMDGPUISD::IMAGE_GATHER4_C_B; + case Intrinsic::amdgcn_image_gather4_c_b_cl: + return AMDGPUISD::IMAGE_GATHER4_C_B_CL; + case Intrinsic::amdgcn_image_gather4_c_lz: + return AMDGPUISD::IMAGE_GATHER4_C_LZ; + + // Gather4 with offsets. + case Intrinsic::amdgcn_image_gather4_o: + return AMDGPUISD::IMAGE_GATHER4_O; + case Intrinsic::amdgcn_image_gather4_cl_o: + return AMDGPUISD::IMAGE_GATHER4_CL_O; + case Intrinsic::amdgcn_image_gather4_l_o: + return AMDGPUISD::IMAGE_GATHER4_L_O; + case Intrinsic::amdgcn_image_gather4_b_o: + return AMDGPUISD::IMAGE_GATHER4_B_O; + case Intrinsic::amdgcn_image_gather4_b_cl_o: + return AMDGPUISD::IMAGE_GATHER4_B_CL_O; + case Intrinsic::amdgcn_image_gather4_lz_o: + return AMDGPUISD::IMAGE_GATHER4_LZ_O; + + // Gather4 with comparison and offsets. + case Intrinsic::amdgcn_image_gather4_c_o: + return AMDGPUISD::IMAGE_GATHER4_C_O; + case Intrinsic::amdgcn_image_gather4_c_cl_o: + return AMDGPUISD::IMAGE_GATHER4_C_CL_O; + case Intrinsic::amdgcn_image_gather4_c_l_o: + return AMDGPUISD::IMAGE_GATHER4_C_L_O; + case Intrinsic::amdgcn_image_gather4_c_b_o: + return AMDGPUISD::IMAGE_GATHER4_C_B_O; + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + return AMDGPUISD::IMAGE_GATHER4_C_B_CL_O; + case Intrinsic::amdgcn_image_gather4_c_lz_o: + return AMDGPUISD::IMAGE_GATHER4_C_LZ_O; + + default: + break; + } + return 0; +} + +static SDValue adjustLoadValueType(SDValue Result, EVT LoadVT, SDLoc DL, + SelectionDAG &DAG, bool Unpacked) { + if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16. + // Truncate to v2i16/v4i16. + EVT IntLoadVT = LoadVT.changeTypeToInteger(); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IntLoadVT, Result); + // Bitcast to original type (v2f16/v4f16). + return DAG.getNode(ISD::BITCAST, DL, LoadVT, Trunc); + } + // Cast back to the original packed type. + return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result); +} + +// This is to lower INTRINSIC_W_CHAIN with illegal result types. +SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op, + SDValue &Chain, SelectionDAG &DAG) const { + EVT LoadVT = Op.getValueType(); + // TODO: handle v3f16. + if (LoadVT != MVT::v2f16 && LoadVT != MVT::v4f16) + return SDValue(); + + bool Unpacked = Subtarget->hasUnpackedD16VMem(); + EVT UnpackedLoadVT = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32; + EVT EquivLoadVT = Unpacked ? UnpackedLoadVT : + getEquivalentMemType(*DAG.getContext(), LoadVT); + // Change from v4f16/v2f16 to EquivLoadVT. + SDVTList VTList = DAG.getVTList(EquivLoadVT, MVT::Other); + + SDValue Res; + SDLoc DL(Op); + MemSDNode *M = cast(Op); + unsigned IID = cast(Op.getOperand(1))->getZExtValue(); + switch (IID) { + case Intrinsic::amdgcn_tbuffer_load: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // voffset + Op.getOperand(5), // soffset + Op.getOperand(6), // offset + Op.getOperand(7), // dfmt + Op.getOperand(8), // nfmt + Op.getOperand(9), // glc + Op.getOperand(10) // slc + }; + Res = DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, DL, + VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + case Intrinsic::amdgcn_buffer_load_format: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Op.getOperand(4), // offset + Op.getOperand(5), // glc + Op.getOperand(6) // slc + }; + Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, + DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // vaddr + Op.getOperand(3), // rsrc + Op.getOperand(4), // dmask + Op.getOperand(5), // glc + Op.getOperand(6), // slc + Op.getOperand(7), // lwe + Op.getOperand(8) // da + }; + unsigned Opc = getImageOpcode(IID); + Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + // Basic sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // vaddr + Op.getOperand(3), // rsrc + Op.getOperand(4), // sampler + Op.getOperand(5), // dmask + Op.getOperand(6), // unorm + Op.getOperand(7), // glc + Op.getOperand(8), // slc + Op.getOperand(9), // lwe + Op.getOperand(10) // da + }; + unsigned Opc = getImageOpcode(IID); + Res = DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, M->getMemoryVT(), + M->getMemOperand()); + Chain = Res.getValue(1); + return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked); + } + default: + return SDValue(); + } +} + void SITargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { @@ -3312,6 +3881,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } break; } + case ISD::INTRINSIC_W_CHAIN: { + SDValue Chain; + if (SDValue Res = lowerIntrinsicWChain_IllegalReturnType(SDValue(N, 0), + Chain, DAG)) { + Results.push_back(Res); + Results.push_back(Chain); + return; + } + break; + } case ISD::SELECT: { SDLoc SL(N); EVT VT = N->getValueType(0); @@ -3581,11 +4160,11 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { case SISubtarget::TrapIDLLVMTrap: return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); case SISubtarget::TrapIDLLVMDebugTrap: { - DiagnosticInfoUnsupported NoTrap(*MF.getFunction(), + DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", Op.getDebugLoc(), DS_Warning); - LLVMContext &Ctx = MF.getFunction()->getContext(); + LLVMContext &Ctx = MF.getFunction().getContext(); Ctx.diagnose(NoTrap); return Chain; } @@ -3630,8 +4209,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, // private_segment_aperture_base_hi. uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44; - SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, QueuePtr, - DAG.getConstant(StructOffset, DL, MVT::i64)); + SDValue Ptr = DAG.getObjectPtrOffset(DL, QueuePtr, StructOffset); // TODO: Use custom target PseudoSourceValue. // TODO: We should use the value from the IR intrinsic call, but it might not @@ -3699,7 +4277,7 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, const MachineFunction &MF = DAG.getMachineFunction(); DiagnosticInfoUnsupported InvalidAddrSpaceCast( - *MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc()); + MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc()); DAG.getContext()->diagnose(InvalidAddrSpaceCast); return DAG.getUNDEF(ASC->getValueType(0)); @@ -3901,7 +4479,7 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG, static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT) { - DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(), + DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(), "non-hsa intrinsic with hsa target", DL.getDebugLoc()); DAG.getContext()->diagnose(BadIntrin); @@ -3910,7 +4488,7 @@ static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL, EVT VT) { - DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(), + DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(), "intrinsic not supported on subtarget", DL.getDebugLoc()); DAG.getContext()->diagnose(BadIntrin); @@ -3939,7 +4517,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_queue_ptr: { if (!Subtarget->isAmdCodeObjectV2(MF)) { DiagnosticInfoUnsupported BadIntrin( - *MF.getFunction(), "unsupported hsa intrinsic without hsa target", + MF.getFunction(), "unsupported hsa intrinsic without hsa target", DL.getDebugLoc()); DAG.getContext()->diagnose(BadIntrin); return DAG.getUNDEF(VT); @@ -4117,7 +4695,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return SDValue(); DiagnosticInfoUnsupported BadIntrin( - *MF.getFunction(), "intrinsic not supported on subtarget", + MF.getFunction(), "intrinsic not supported on subtarget", DL.getDebugLoc()); DAG.getContext()->diagnose(BadIntrin); return DAG.getUNDEF(VT); @@ -4226,6 +4804,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return SDValue(DAG.getMachineNode(AMDGPU::WWM, DL, Src.getValueType(), Src), 0); } + case Intrinsic::amdgcn_image_getlod: + case Intrinsic::amdgcn_image_getresinfo: { + unsigned Idx = (IntrinsicID == Intrinsic::amdgcn_image_getresinfo) ? 3 : 4; + + // Replace dmask with everything disabled with undef. + const ConstantSDNode *DMask = dyn_cast(Op.getOperand(Idx)); + if (!DMask || DMask->isNullValue()) + return DAG.getUNDEF(Op.getValueType()); + return SDValue(); + } default: return Op; } @@ -4235,14 +4823,34 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); - MachineFunction &MF = DAG.getMachineFunction(); switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec: { + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: { MemSDNode *M = cast(Op); - unsigned Opc = (IntrID == Intrinsic::amdgcn_atomic_inc) ? - AMDGPUISD::ATOMIC_INC : AMDGPUISD::ATOMIC_DEC; + unsigned Opc; + switch (IntrID) { + case Intrinsic::amdgcn_atomic_inc: + Opc = AMDGPUISD::ATOMIC_INC; + break; + case Intrinsic::amdgcn_atomic_dec: + Opc = AMDGPUISD::ATOMIC_DEC; + break; + case Intrinsic::amdgcn_atomic_fadd: + Opc = AMDGPUISD::ATOMIC_LOAD_FADD; + break; + case Intrinsic::amdgcn_atomic_fmin: + Opc = AMDGPUISD::ATOMIC_LOAD_FMIN; + break; + case Intrinsic::amdgcn_atomic_fmax: + Opc = AMDGPUISD::ATOMIC_LOAD_FMAX; + break; + default: + llvm_unreachable("Unknown intrinsic!"); + } SDValue Ops[] = { M->getOperand(0), // Chain M->getOperand(2), // Ptr @@ -4262,21 +4870,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // glc Op.getOperand(6) // slc }; - SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(MFI->getBufferPSV()), - MachineMemOperand::MOLoad, - VT.getStoreSize(), VT.getStoreSize()); - - return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO); + auto *M = cast(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, + M->getMemOperand()); } case Intrinsic::amdgcn_tbuffer_load: { + MemSDNode *M = cast(Op); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // rsrc @@ -4290,14 +4895,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(10) // slc }; - EVT VT = Op.getOperand(2).getValueType(); + EVT VT = Op.getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad, - VT.getStoreSize(), VT.getStoreSize()); return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, - Op->getVTList(), Ops, VT, MMO); + Op->getVTList(), Ops, VT, M->getMemOperand()); } case Intrinsic::amdgcn_buffer_atomic_swap: case Intrinsic::amdgcn_buffer_atomic_add: @@ -4317,14 +4918,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // offset Op.getOperand(6) // slc }; - EVT VT = Op.getOperand(3).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile, - VT.getStoreSize(), 4); + EVT VT = Op.getValueType(); + + auto *M = cast(Op); unsigned Opcode = 0; switch (IntrID) { @@ -4362,7 +4958,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, llvm_unreachable("unhandled atomic opcode"); } - return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); + return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, + M->getMemOperand()); } case Intrinsic::amdgcn_buffer_atomic_cmpswap: { @@ -4375,17 +4972,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // offset Op.getOperand(7) // slc }; - EVT VT = Op.getOperand(4).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile, - VT.getStoreSize(), 4); + EVT VT = Op.getValueType(); + auto *M = cast(Op); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, - Op->getVTList(), Ops, VT, MMO); + Op->getVTList(), Ops, VT, M->getMemOperand()); } // Basic sample. @@ -4434,9 +5025,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::amdgcn_image_sample_c_b_cl_o: case Intrinsic::amdgcn_image_sample_c_lz_o: case Intrinsic::amdgcn_image_sample_c_cd_o: - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - - case Intrinsic::amdgcn_image_getlod: { + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: { // Replace dmask with everything disabled with undef. const ConstantSDNode *DMask = dyn_cast(Op.getOperand(5)); if (!DMask || DMask->isNullValue()) { @@ -4451,6 +5040,31 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, } } +SDValue SITargetLowering::handleD16VData(SDValue VData, + SelectionDAG &DAG) const { + EVT StoreVT = VData.getValueType(); + SDLoc DL(VData); + + if (StoreVT.isVector()) { + assert ((StoreVT.getVectorNumElements() != 3) && "Handle v3f16"); + if (!Subtarget->hasUnpackedD16VMem()) { + if (!isTypeLegal(StoreVT)) { + // If Target supports packed vmem, we just need to workaround + // the illegal type by casting to an equivalent one. + EVT EquivStoreVT = getEquivalentMemType(*DAG.getContext(), StoreVT); + return DAG.getNode(ISD::BITCAST, DL, EquivStoreVT, VData); + } + } else { // We need to unpack the packed data to store. + EVT IntStoreVT = StoreVT.changeTypeToInteger(); + SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData); + EVT EquivStoreVT = (StoreVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32; + return DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData); + } + } + // No change for f16 and legal vector D16 types. + return VData; +} + SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -4539,7 +5153,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { const SISubtarget &ST = MF.getSubtarget(); - unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second; + unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second; if (WGSize <= ST.getWavefrontSize()) return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other, Op.getOperand(0)), 0); @@ -4593,9 +5207,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } case Intrinsic::amdgcn_tbuffer_store: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); SDValue Ops[] = { Chain, - Op.getOperand(2), // vdata + VData, // vdata Op.getOperand(3), // rsrc Op.getOperand(4), // vindex Op.getOperand(5), // voffset @@ -4606,37 +5224,58 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(10), // glc Op.getOperand(11) // slc }; - EVT VT = Op.getOperand(3).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOStore, - VT.getStoreSize(), 4); - return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, - Op->getVTList(), Ops, VT, MMO); + unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : + AMDGPUISD::TBUFFER_STORE_FORMAT; + MemSDNode *M = cast(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); } case Intrinsic::amdgcn_buffer_store: case Intrinsic::amdgcn_buffer_store_format: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); SDValue Ops[] = { Chain, - Op.getOperand(2), // vdata + VData, // vdata Op.getOperand(3), // rsrc Op.getOperand(4), // vindex Op.getOperand(5), // offset Op.getOperand(6), // glc Op.getOperand(7) // slc }; - EVT VT = Op.getOperand(3).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable, - VT.getStoreSize(), 4); + unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? + AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; + Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; + MemSDNode *M = cast(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); + } - unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ? - AMDGPUISD::BUFFER_STORE : - AMDGPUISD::BUFFER_STORE_FORMAT; - return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO); + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); + SDValue Ops[] = { + Chain, // Chain + VData, // vdata + Op.getOperand(3), // vaddr + Op.getOperand(4), // rsrc + Op.getOperand(5), // dmask + Op.getOperand(6), // glc + Op.getOperand(7), // slc + Op.getOperand(8), // lwe + Op.getOperand(9) // da + }; + unsigned Opc = (IntrinsicID==Intrinsic::amdgcn_image_store) ? + AMDGPUISD::IMAGE_STORE : AMDGPUISD::IMAGE_STORE_MIP; + MemSDNode *M = cast(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); } default: @@ -6501,7 +7140,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: case AMDGPUISD::ATOMIC_INC: - case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics. + case AMDGPUISD::ATOMIC_DEC: + case AMDGPUISD::ATOMIC_LOAD_FADD: + case AMDGPUISD::ATOMIC_LOAD_FMIN: + case AMDGPUISD::ATOMIC_LOAD_FMAX: // TODO: Target mem intrinsics. if (DCI.isBeforeLegalize()) break; return performMemSDNodeCombine(cast(N), DCI); @@ -6579,13 +7221,19 @@ static unsigned SubIdx2Lane(unsigned Idx) { } /// \brief Adjust the writemask of MIMG instructions -void SITargetLowering::adjustWritemask(MachineSDNode *&Node, - SelectionDAG &DAG) const { - SDNode *Users[4] = { }; +SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node, + SelectionDAG &DAG) const { + SDNode *Users[4] = { nullptr }; unsigned Lane = 0; unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3; unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx); unsigned NewDmask = 0; + bool HasChain = Node->getNumValues() > 1; + + if (OldDmask == 0) { + // These are folded out, but on the chance it happens don't assert. + return Node; + } // Try to figure out the used register components for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); @@ -6598,9 +7246,9 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Abort if we can't understand the usage if (!I->isMachineOpcode() || I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) - return; + return Node; - // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used. + // Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used. // Note that subregs are packed, i.e. Lane==0 is the first bit set // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit // set, etc. @@ -6609,14 +7257,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Set which texture component corresponds to the lane. unsigned Comp; for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) { - assert(Dmask); Comp = countTrailingZeros(Dmask); Dmask &= ~(1 << Comp); } // Abort if we have more than one user per component if (Users[Lane]) - return; + return Node; Users[Lane] = *I; NewDmask |= 1 << Comp; @@ -6624,25 +7271,47 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, // Abort if there's no change if (NewDmask == OldDmask) - return; + return Node; + + unsigned BitsSet = countPopulation(NewDmask); + + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + int NewOpcode = AMDGPU::getMaskedMIMGOp(*TII, + Node->getMachineOpcode(), BitsSet); + assert(NewOpcode != -1 && + NewOpcode != static_cast(Node->getMachineOpcode()) && + "failed to find equivalent MIMG op"); // Adjust the writemask in the node - std::vector Ops; + SmallVector Ops; Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx); Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32)); Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end()); - Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops); - - // If we only got one lane, replace it with a copy - // (if NewDmask has only one bit set...) - if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { - SDValue RC = DAG.getTargetConstant(AMDGPU::VGPR_32RegClassID, SDLoc(), - MVT::i32); - SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - SDLoc(), Users[Lane]->getValueType(0), - SDValue(Node, 0), RC); + + MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT(); + + MVT ResultVT = BitsSet == 1 ? + SVT : MVT::getVectorVT(SVT, BitsSet == 3 ? 4 : BitsSet); + SDVTList NewVTList = HasChain ? + DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT); + + + MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node), + NewVTList, Ops); + + if (HasChain) { + // Update chain. + NewNode->setMemRefs(Node->memoperands_begin(), Node->memoperands_end()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1)); + } + + if (BitsSet == 1) { + assert(Node->hasNUsesOfValue(1, 0)); + SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY, + SDLoc(Node), Users[Lane]->getValueType(0), + SDValue(NewNode, 0)); DAG.ReplaceAllUsesWith(Users[Lane], Copy); - return; + return nullptr; } // Update the users of the node with the new indices @@ -6652,7 +7321,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, continue; SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32); - DAG.UpdateNodeOperands(User, User->getOperand(0), Op); + DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op); switch (Idx) { default: break; @@ -6661,6 +7330,9 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, case AMDGPU::sub2: Idx = AMDGPU::sub3; break; } } + + DAG.RemoveDeadNode(Node); + return nullptr; } static bool isFrameIndexOp(SDValue Op) { @@ -6718,14 +7390,16 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, } /// \brief Fold the instructions after selecting them. +/// Returns null if users were already updated. SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); unsigned Opcode = Node->getMachineOpcode(); if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && - !TII->isGather4(Opcode)) - adjustWritemask(Node, DAG); + !TII->isGather4(Opcode) && !TII->isD16(Opcode)) { + return adjustWritemask(Node, DAG); + } if (Opcode == AMDGPU::INSERT_SUBREG || Opcode == AMDGPU::REG_SEQUENCE) { @@ -6803,31 +7477,6 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, return; } - if (TII->isMIMG(MI)) { - unsigned VReg = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = MRI.getRegClass(VReg); - // TODO: Need mapping tables to handle other cases (register classes). - if (RC != &AMDGPU::VReg_128RegClass) - return; - - unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4; - unsigned Writemask = MI.getOperand(DmaskIdx).getImm(); - unsigned BitsSet = 0; - for (unsigned i = 0; i < 4; ++i) - BitsSet += Writemask & (1 << i) ? 1 : 0; - switch (BitsSet) { - default: return; - case 1: RC = &AMDGPU::VGPR_32RegClass; break; - case 2: RC = &AMDGPU::VReg_64RegClass; break; - case 3: RC = &AMDGPU::VReg_96RegClass; break; - } - - unsigned NewOpcode = TII->getMaskedMIMGOp(MI.getOpcode(), BitsSet); - MI.setDesc(TII->get(NewOpcode)); - MRI.setRegClass(VReg, RC); - return; - } - // Replace unused atomics with the no return version. int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode()); if (NoRetAtomicOp != -1) { diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index f68f7dc28cdc..2c6e61d316db 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -60,6 +60,10 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerIntrinsicWChain_IllegalReturnType(SDValue Op, SDValue &Chain, + SelectionDAG &DAG) const; + SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const; + /// \brief Converts \p Op, which must be of floating point type, to the /// floating point type \p VT, by either extending or truncating it. SDValue getFPExtOrFPTrunc(SelectionDAG &DAG, @@ -82,7 +86,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; - void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; + SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; SDValue performUCharToFloatCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -152,6 +156,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool isShuffleMaskLegal(ArrayRef /*Mask*/, EVT /*VT*/) const override; bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, + MachineFunction &MF, unsigned IntrinsicID) const override; bool getAddrModeArguments(IntrinsicInst * /*I*/, diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index 1b8c9f277125..a2f844d7854e 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -166,7 +166,7 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction *MF = MBB.getParent(); - if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS || + if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS || !shouldSkip(MBB, MBB.getParent()->back())) return false; diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 2d41d8965b15..6bbe5979316d 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1269,7 +1269,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { BlockWaitcntBracketsMap[pred].get(); bool Visited = BlockVisitedSet.find(pred) != BlockVisitedSet.end(); if (!Visited || PredScoreBrackets->getWaitAtBeginning()) { - break; + continue; } for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { @@ -1308,7 +1308,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { BlockWaitcntBracketsMap[Pred].get(); bool Visited = BlockVisitedSet.find(Pred) != BlockVisitedSet.end(); if (!Visited || PredScoreBrackets->getWaitAtBeginning()) { - break; + continue; } int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) - @@ -1355,7 +1355,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { // Set the register scoreboard. for (MachineBasicBlock *Pred : Block.predecessors()) { if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) { - break; + continue; } BlockWaitcntBrackets *PredScoreBrackets = @@ -1469,7 +1469,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { // the delayed nature of these operations. for (MachineBasicBlock *Pred : Block.predecessors()) { if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) { - break; + continue; } BlockWaitcntBrackets *PredScoreBrackets = diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index 25917cc06e6a..af9908b9846b 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -118,6 +118,9 @@ class InstSI DisableSIDecoder = 0; @@ -247,6 +252,7 @@ class MIMGe op> : Enc64 { bits<1> tfe; bits<1> lwe; bits<1> slc; + bits<1> d16 = 0; bits<8> vaddr; bits<7> srsrc; bits<7> ssamp; @@ -265,6 +271,7 @@ class MIMGe op> : Enc64 { let Inst{47-40} = vdata; let Inst{52-48} = srsrc{6-2}; let Inst{57-53} = ssamp{6-2}; + let Inst{63} = d16; } class EXPe : Enc64 { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 8df1c58848ed..61967605432e 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -375,7 +375,7 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, unsigned BaseReg1, if (!Base1 || !Base2) return false; const MachineFunction &MF = *MI1.getParent()->getParent(); - const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + const DataLayout &DL = MF.getFunction().getParent()->getDataLayout(); Base1 = GetUnderlyingObject(Base1, DL); Base2 = GetUnderlyingObject(Base1, DL); @@ -442,10 +442,10 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) { MachineFunction *MF = MBB.getParent(); - DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(), + DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), "illegal SGPR to VGPR copy", DL, DS_Error); - LLVMContext &C = MF->getFunction()->getContext(); + LLVMContext &C = MF->getFunction().getContext(); C.diagnose(IllegalCopy); BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg) @@ -873,8 +873,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, return; } - if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { - LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { + LLVMContext &Ctx = MF->getFunction().getContext(); Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" " spill register"); BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) @@ -975,8 +975,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, return; } - if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { - LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!ST.isVGPRSpillingEnabled(MF->getFunction())) { + LLVMContext &Ctx = MF->getFunction().getContext(); Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" " restore register"); BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); @@ -1017,7 +1017,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( if (TIDReg == AMDGPU::NoRegister) return TIDReg; - if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && + if (!AMDGPU::isShader(MF->getFunction().getCallingConv()) && WorkGroupSize > WavefrontSize) { unsigned TIDIGXReg = MFI->getPreloadedReg(AMDGPUFunctionArgInfo::WORKGROUP_ID_X); @@ -1057,9 +1057,9 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( .addReg(TIDIGYReg) .addReg(TIDReg); // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z - BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) - .addReg(TIDReg) - .addReg(TIDIGZReg); + getAddNoCarry(Entry, Insert, DL, TIDReg) + .addReg(TIDReg) + .addReg(TIDIGZReg); } else { // Get the wave id BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), @@ -1082,9 +1082,9 @@ unsigned SIInstrInfo::calculateLDSSpillAddress( // Add FrameIndex to LDS offset unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize); - BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) - .addImm(LDSOffset) - .addReg(TIDReg); + getAddNoCarry(MBB, MI, DL, TmpReg) + .addImm(LDSOffset) + .addReg(TIDReg); return TmpReg; } @@ -2687,6 +2687,28 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } } + + const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused); + if (DstUnused && DstUnused->isImm() && + DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) { + const MachineOperand &Dst = MI.getOperand(DstIdx); + if (!Dst.isReg() || !Dst.isTied()) { + ErrInfo = "Dst register should have tied register"; + return false; + } + + const MachineOperand &TiedMO = + MI.getOperand(MI.findTiedOperandIdx(DstIdx)); + if (!TiedMO.isReg() || !TiedMO.isImplicit() || !TiedMO.isUse()) { + ErrInfo = + "Dst register should be tied to implicit use of preserved register"; + return false; + } else if (TargetRegisterInfo::isPhysicalRegister(TiedMO.getReg()) && + Dst.getReg() != TiedMO.getReg()) { + ErrInfo = "Dst register should use same physical register as preserved"; + return false; + } + } } // Verify VOP* @@ -2831,7 +2853,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, return true; } -unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { +unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return AMDGPU::INSTRUCTION_LIST_END; case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; @@ -2844,10 +2866,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { return MI.getOperand(1).isReg() ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; case AMDGPU::S_ADD_I32: - case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; - case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; + return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32; + case AMDGPU::S_ADDC_U32: + return AMDGPU::V_ADDC_U32_e32; case AMDGPU::S_SUB_I32: - case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; + return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; + // FIXME: These are not consistently handled, and selected when the carry is + // used. + case AMDGPU::S_ADD_U32: + return AMDGPU::V_ADD_I32_e32; + case AMDGPU::S_SUB_U32: + return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64; @@ -2894,10 +2923,6 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { } } -bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { - return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; -} - const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, unsigned OpNo) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -3419,7 +3444,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const { // scratch memory access. In both cases, the legalization never involves // conversion to the addr64 form. if (isMIMG(MI) || - (AMDGPU::isShader(MF.getFunction()->getCallingConv()) && + (AMDGPU::isShader(MF.getFunction().getCallingConv()) && (isMUBUF(MI) || isMTBUF(MI)))) { MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { @@ -3612,6 +3637,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { splitScalar64BitAddSub(Worklist, Inst); Inst.eraseFromParent(); continue; + case AMDGPU::S_ADD_I32: + case AMDGPU::S_SUB_I32: + // FIXME: The u32 versions currently selected use the carry. + if (moveScalarAddSub(Worklist, Inst)) + continue; + + // Default handling + break; case AMDGPU::S_AND_B64: splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); Inst.eraseFromParent(); @@ -3720,6 +3753,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { auto Add = MRI.getUniqueVRegDef(VAddr->getReg()); unsigned Offset = 0; + // FIXME: This isn't safe because the addressing mode doesn't work + // correctly if vaddr is negative. + // + // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate + // being in src0. + // + // FIXME: Should probably be done somewhere else, maybe SIFoldOperands. + // // See if we can extract an immediate offset by recognizing one of these: // V_ADD_I32_e32 dst, imm, src1 // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1 @@ -3728,7 +3769,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { const MachineOperand *Src = getNamedOperand(*Add, AMDGPU::OpName::src0); - if (Src && Src->isReg()) { + if (Src->isReg()) { auto Mov = MRI.getUniqueVRegDef(Src->getReg()); if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) Src = &Mov->getOperand(1); @@ -3858,6 +3899,41 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { } } +// Add/sub require special handling to deal with carry outs. +bool SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, + MachineInstr &Inst) const { + if (ST.hasAddNoCarry()) { + // Assume there is no user of scc since we don't select this in that case. + // Since scc isn't used, it doesn't really matter if the i32 or u32 variant + // is used. + + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + unsigned OldDstReg = Inst.getOperand(0).getReg(); + unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + unsigned Opc = Inst.getOpcode(); + assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32); + + unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? + AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64; + + assert(Inst.getOperand(3).getReg() == AMDGPU::SCC); + Inst.RemoveOperand(3); + + Inst.setDesc(get(NewOpc)); + Inst.addImplicitDefUseOperands(*MBB.getParent()); + MRI.replaceRegWith(OldDstReg, ResultReg); + legalizeOperands(Inst); + + addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); + return true; + } + + return false; +} + void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const { MachineBasicBlock &MBB = *Inst.getParent(); @@ -3870,7 +3946,10 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) + unsigned SubOp = ST.hasAddNoCarry() ? + AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32; + + BuildMI(MBB, MII, DL, get(SubOp), TmpReg) .addImm(0) .addReg(Src.getReg()); @@ -4707,9 +4786,12 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (ST.hasAddNoCarry()) + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MRI.setRegAllocationHint(UnusedCarry, 0, AMDGPU::VCC); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index d7d3918f6bf7..13f9959c4d83 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -76,6 +76,9 @@ class SIInstrInfo final : public AMDGPUInstrInfo { private: void swapOperands(MachineInstr &Inst) const; + bool moveScalarAddSub(SetVectorType &Worklist, + MachineInstr &Inst) const; + void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; @@ -453,6 +456,14 @@ class SIInstrInfo final : public AMDGPUInstrInfo { return get(Opcode).TSFlags & SIInstrFlags::Gather4; } + static bool isD16(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::D16; + } + + bool isD16(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::D16; + } + static bool isFLAT(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::FLAT; } @@ -691,9 +702,7 @@ class SIInstrInfo final : public AMDGPUInstrInfo { bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; - static unsigned getVALUOp(const MachineInstr &MI); - - bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const; + unsigned getVALUOp(const MachineInstr &MI) const; /// \brief Return the correct register class for \p OpNo. For target-specific /// instructions, this will return the register class that has been defined diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 1a2366596443..f4516988b198 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -25,7 +25,8 @@ def SIEncodingFamily { int VI = 1; int SDWA = 2; int SDWA9 = 3; - int GFX9 = 4; + int GFX80 = 4; + int GFX9 = 5; } //===----------------------------------------------------------------------===// @@ -45,22 +46,41 @@ def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; -def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", - SDTypeProfile<1, 9, - [ // vdata - SDTCisVT<1, v4i32>, // rsrc - SDTCisVT<2, i32>, // vindex(VGPR) - SDTCisVT<3, i32>, // voffset(VGPR) - SDTCisVT<4, i32>, // soffset(SGPR) - SDTCisVT<5, i32>, // offset(imm) - SDTCisVT<6, i32>, // dfmt(imm) - SDTCisVT<7, i32>, // nfmt(imm) - SDTCisVT<8, i32>, // glc(imm) - SDTCisVT<9, i32> // slc(imm) - ]>, - [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] +def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ + SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> +]>; + +def SIatomic_fadd : SDNode<"AMDGPUISD::ATOMIC_LOAD_FADD", SDTAtomic2_f32, + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] +>; + +def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] +>; + +def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] >; +def SDTbuffer_load : SDTypeProfile<1, 9, + [ // vdata + SDTCisVT<1, v4i32>, // rsrc + SDTCisVT<2, i32>, // vindex(VGPR) + SDTCisVT<3, i32>, // voffset(VGPR) + SDTCisVT<4, i32>, // soffset(SGPR) + SDTCisVT<5, i32>, // offset(imm) + SDTCisVT<6, i32>, // dfmt(imm) + SDTCisVT<7, i32>, // nfmt(imm) + SDTCisVT<8, i32>, // glc(imm) + SDTCisVT<9, i32> // slc(imm) + ]>; + +def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTbuffer_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; +def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", + SDTbuffer_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; + def SDTtbuffer_store : SDTypeProfile<0, 10, [ // vdata SDTCisVT<1, v4i32>, // rsrc @@ -79,6 +99,9 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3", SDTtbuffer_store, [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", + SDTtbuffer_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; def SDTBufferLoad : SDTypeProfile<1, 5, [ // vdata @@ -92,6 +115,9 @@ def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; +def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", + SDTBufferLoad, + [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; def SDTBufferStore : SDTypeProfile<0, 6, [ // vdata @@ -102,9 +128,13 @@ def SDTBufferStore : SDTypeProfile<0, 6, SDTCisVT<5, i1>]>; // slc def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, - [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>; -def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", SDTBufferStore, - [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>; + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", + SDTBufferStore, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", + SDTBufferStore, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; class SDBufferAtomic : SDNode ; +def SDTImage_load : SDTypeProfile<1, 7, + [ + SDTCisInt<1>, // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, i32>, // dmask + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>, // slc + SDTCisVT<6, i1>, // lwe + SDTCisVT<7, i1> // da + ]>; +def SIImage_load : SDNode<"AMDGPUISD::IMAGE_LOAD", SDTImage_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; +def SIImage_load_mip : SDNode<"AMDGPUISD::IMAGE_LOAD_MIP", SDTImage_load, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; + +def SDTImage_store : SDTypeProfile<0, 8, + [ + SDTCisInt<1>, // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, i32>, // dmask + SDTCisVT<4, i1>, // glc + SDTCisVT<5, i1>, // slc + SDTCisVT<6, i1>, // lwe + SDTCisVT<7, i1> // da + ]>; +def SIImage_store : SDNode <"AMDGPUISD::IMAGE_STORE", + SDTImage_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; +def SIImage_store_mip : SDNode <"AMDGPUISD::IMAGE_STORE_MIP", + SDTImage_store, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; + +class SDTImage_sample : SDNode , // vaddr + SDTCisInt<2>, // rsrc + SDTCisVT<3, v4i32>, // sampler + SDTCisVT<4, i32>, // dmask + SDTCisVT<5, i1>, // unorm + SDTCisVT<6, i1>, // glc + SDTCisVT<7, i1>, // slc + SDTCisVT<8, i1>, // lwe + SDTCisVT<9, i1> // da + ]>, + [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] +>; + +// Basic sample. +def SIImage_sample : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE">; +def SIImage_sample_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL">; +def SIImage_sample_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D">; +def SIImage_sample_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL">; +def SIImage_sample_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L">; +def SIImage_sample_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B">; +def SIImage_sample_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL">; +def SIImage_sample_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ">; +def SIImage_sample_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD">; +def SIImage_sample_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL">; + +// Sample with comparison. +def SIImage_sample_c : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C">; +def SIImage_sample_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL">; +def SIImage_sample_c_d : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D">; +def SIImage_sample_c_d_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL">; +def SIImage_sample_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L">; +def SIImage_sample_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B">; +def SIImage_sample_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL">; +def SIImage_sample_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ">; +def SIImage_sample_c_cd : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD">; +def SIImage_sample_c_cd_cl : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL">; + +// Sample with offsets. +def SIImage_sample_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_O">; +def SIImage_sample_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CL_O">; +def SIImage_sample_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_O">; +def SIImage_sample_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_D_CL_O">; +def SIImage_sample_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_L_O">; +def SIImage_sample_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_O">; +def SIImage_sample_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_B_CL_O">; +def SIImage_sample_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_LZ_O">; +def SIImage_sample_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_O">; +def SIImage_sample_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_CD_CL_O">; + +// Sample with comparison and offsets. +def SIImage_sample_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_O">; +def SIImage_sample_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CL_O">; +def SIImage_sample_c_d_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_O">; +def SIImage_sample_c_d_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_D_CL_O">; +def SIImage_sample_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_L_O">; +def SIImage_sample_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_O">; +def SIImage_sample_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_B_CL_O">; +def SIImage_sample_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_LZ_O">; +def SIImage_sample_c_cd_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_O">; +def SIImage_sample_c_cd_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_SAMPLE_C_CD_CL_O">; + +// Basic gather4. +def SIImage_gather4 : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4">; +def SIImage_gather4_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL">; +def SIImage_gather4_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L">; +def SIImage_gather4_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B">; +def SIImage_gather4_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL">; +def SIImage_gather4_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ">; + +// Gather4 with comparison. +def SIImage_gather4_c : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C">; +def SIImage_gather4_c_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL">; +def SIImage_gather4_c_l : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L">; +def SIImage_gather4_c_b : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B">; +def SIImage_gather4_c_b_cl : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL">; +def SIImage_gather4_c_lz : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ">; + +// Gather4 with offsets. +def SIImage_gather4_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_O">; +def SIImage_gather4_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_CL_O">; +def SIImage_gather4_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_L_O">; +def SIImage_gather4_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_O">; +def SIImage_gather4_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_B_CL_O">; +def SIImage_gather4_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_LZ_O">; + +// Gather4 with comparison and offsets. +def SIImage_gather4_c_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_O">; +def SIImage_gather4_c_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_CL_O">; +def SIImage_gather4_c_l_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_L_O">; +def SIImage_gather4_c_b_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_O">; +def SIImage_gather4_c_b_cl_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">; +def SIImage_gather4_c_lz_o : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">; + class SDSample : SDNode , SDTCisVT<2, v8i32>, SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> @@ -154,6 +312,36 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> >; +//===----------------------------------------------------------------------===// +// ValueType helpers +//===----------------------------------------------------------------------===// + +// Returns 1 if the source arguments have modifiers, 0 if they do not. +// XXX - do f16 instructions? +class isFloatType { + bit ret = + !if(!eq(SrcVT.Value, f16.Value), 1, + !if(!eq(SrcVT.Value, f32.Value), 1, + !if(!eq(SrcVT.Value, f64.Value), 1, + !if(!eq(SrcVT.Value, v2f16.Value), 1, + 0)))); +} + +class isIntType { + bit ret = + !if(!eq(SrcVT.Value, i16.Value), 1, + !if(!eq(SrcVT.Value, i32.Value), 1, + !if(!eq(SrcVT.Value, i64.Value), 1, + 0))); +} + +class isPackedType { + bit ret = + !if(!eq(SrcVT.Value, v2i16.Value), 1, + !if(!eq(SrcVT.Value, v2f16.Value), 1, 0) + ); +} + //===----------------------------------------------------------------------===// // PatFrags for global memory operations //===----------------------------------------------------------------------===// @@ -161,6 +349,12 @@ def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", defm atomic_inc_global : global_binary_atomic_op; defm atomic_dec_global : global_binary_atomic_op; +def atomic_inc_local : local_binary_atomic_op; +def atomic_dec_local : local_binary_atomic_op; +def atomic_load_fadd_local : local_binary_atomic_op; +def atomic_load_fmin_local : local_binary_atomic_op; +def atomic_load_fmax_local : local_binary_atomic_op; + //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. // This is for SDNodes and PatFrag for local loads and stores to @@ -294,10 +488,11 @@ def lshl_rev : PatFrag < (shl $src0, $src1) >; -multiclass SIAtomicM0Glue2 { +multiclass SIAtomicM0Glue2 { def _glue : SDNode < - !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, SDTAtomic2, + !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; @@ -316,6 +511,9 @@ defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; +defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 1, SDTAtomic2_f32>; +defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32>; +defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32>; def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] @@ -549,19 +747,18 @@ def ExpSrc3 : RegisterOperand { let ParserMatchClass = VReg32OrOffClass; } -class SDWASrc : RegisterOperand { +class SDWASrc : RegisterOperand { let OperandNamespace = "AMDGPU"; - let OperandType = "OPERAND_SDWA_SRC"; + string Type = !if(isFloatType.ret, "FP", "INT"); + let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; + let DecoderMethod = "decodeSDWASrc"#vt.Size; let EncoderMethod = "getSDWASrcEncoding"; } -def SDWASrc32 : SDWASrc { - let DecoderMethod = "decodeSDWASrc32"; -} - -def SDWASrc16 : SDWASrc { - let DecoderMethod = "decodeSDWASrc16"; -} +def SDWASrc_i32 : SDWASrc; +def SDWASrc_i16 : SDWASrc; +def SDWASrc_f32 : SDWASrc; +def SDWASrc_f16 : SDWASrc; def SDWAVopcDst : VOPDstOperand { let OperandNamespace = "AMDGPU"; @@ -744,16 +941,23 @@ class OpSelModsMatchClass : AsmOperandClass { def IntOpSelModsMatchClass : OpSelModsMatchClass; def IntOpSelMods : InputMods; -def FPRegSDWAInputModsMatchClass : AsmOperandClass { - let Name = "SDWARegWithFPInputMods"; - let ParserMethod = "parseRegWithFPInputMods"; - let PredicateMethod = "isSDWARegKind"; +class FPSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWAWithFP"#opSize#"InputMods"; + let ParserMethod = "parseRegOrImmWithFPInputMods"; + let PredicateMethod = "isSDWAFP"#opSize#"Operand"; } -def FPRegSDWAInputMods : InputMods { +def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; +def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; + +class FPSDWAInputMods : + InputMods { let PrintMethod = "printOperandAndFPInputMods"; } +def FP16SDWAInputMods : FPSDWAInputMods; +def FP32SDWAInputMods : FPSDWAInputMods; + def FPVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -764,17 +968,23 @@ def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } - -def IntRegSDWAInputModsMatchClass : AsmOperandClass { - let Name = "SDWARegWithIntInputMods"; - let ParserMethod = "parseRegWithIntInputMods"; - let PredicateMethod = "isSDWARegKind"; +class IntSDWAInputModsMatchClass : AsmOperandClass { + let Name = "SDWAWithInt"#opSize#"InputMods"; + let ParserMethod = "parseRegOrImmWithIntInputMods"; + let PredicateMethod = "isSDWAInt"#opSize#"Operand"; } -def IntRegSDWAInputMods : InputMods { +def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; +def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; + +class IntSDWAInputMods : + InputMods { let PrintMethod = "printOperandAndIntInputMods"; } +def Int16SDWAInputMods : IntSDWAInputMods; +def Int32SDWAInputMods : IntSDWAInputMods; + def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; @@ -1020,7 +1230,12 @@ class getVregSrcForVT { } class getSDWASrcForVT { - RegisterOperand ret = !if(!eq(VT.Size, 16), SDWASrc16, SDWASrc32); + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + 0)); + RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); + RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); + RegisterOperand ret = !if(isFP, retFlt, retInt); } // Returns the register class to use for sources of VOP3 instructions for the @@ -1061,32 +1276,6 @@ class getVOP3SrcForVT { ); } -// Returns 1 if the source arguments have modifiers, 0 if they do not. -// XXX - do f16 instructions? -class isFloatType { - bit ret = - !if(!eq(SrcVT.Value, f16.Value), 1, - !if(!eq(SrcVT.Value, f32.Value), 1, - !if(!eq(SrcVT.Value, f64.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, - 0)))); -} - -class isIntType { - bit ret = - !if(!eq(SrcVT.Value, i16.Value), 1, - !if(!eq(SrcVT.Value, i32.Value), 1, - !if(!eq(SrcVT.Value, i64.Value), 1, - 0))); -} - -class isPackedType { - bit ret = - !if(!eq(SrcVT.Value, v2i16.Value), 1, - !if(!eq(SrcVT.Value, v2f16.Value), 1, 0) - ); -} - // Float or packed int class isModifierType { bit ret = @@ -1131,11 +1320,10 @@ class getSrcModExt { // Return type of input modifiers operand specified input operand for SDWA class getSrcModSDWA { - bit isFP = !if(!eq(VT.Value, f16.Value), 1, - !if(!eq(VT.Value, f32.Value), 1, - !if(!eq(VT.Value, f64.Value), 1, - 0))); - Operand ret = !if(isFP, FPRegSDWAInputMods, IntRegSDWAInputMods); + Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, + !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, + !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, + Int32SDWAInputMods))); } // Returns the input arguments for VOP[12C] instructions for the given SrcVT. @@ -1820,7 +2008,31 @@ def getBasicFromSDWAOp : InstrMapping { let ValueCols = [["Default"]]; } -def getMaskedMIMGOp : InstrMapping { +def getMaskedMIMGOp1 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["1"]; + let ValueCols = [["2"], ["3"], ["4"] ]; +} + +def getMaskedMIMGOp2 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["2"]; + let ValueCols = [["1"], ["3"], ["4"] ]; +} + +def getMaskedMIMGOp3 : InstrMapping { + let FilterClass = "MIMG_Mask"; + let RowFields = ["Op"]; + let ColFields = ["Channels"]; + let KeyCol = ["3"]; + let ValueCols = [["1"], ["2"], ["4"] ]; +} + +def getMaskedMIMGOp4 : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; let ColFields = ["Channels"]; @@ -1855,6 +2067,11 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.VI)], [!cast(SIEncodingFamily.SDWA)], [!cast(SIEncodingFamily.SDWA9)], + // GFX80 encoding is added to work around a multiple matching + // issue for buffer instructions with unpacked d16 data. This + // does not actually change the encoding, and thus may be + // removed later. + [!cast(SIEncodingFamily.GFX80)], [!cast(SIEncodingFamily.GFX9)]]; } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 76612d0c9557..9740a18b7248 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -159,10 +159,14 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI < [(set SReg_64:$vdst, (sub i64:$src0, i64:$src1))] >; -def S_ADDC_U64_PSEUDO : SPseudoInstSI <(outs SReg_64:$vdst, SReg_64:$sdst), - (ins SSrc_b64:$src0, SSrc_b64:$src1)>; -def S_SUBC_U64_PSEUDO : SPseudoInstSI <(outs SReg_64:$vdst, SReg_64:$sdst), - (ins SSrc_b64:$src0, SSrc_b64:$src1)>; +def S_ADD_U64_CO_PSEUDO : SPseudoInstSI < + (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) +>; + +def S_SUB_U64_CO_PSEUDO : SPseudoInstSI < + (outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1) +>; + } // End usesCustomInserter = 1, Defs = [SCC] let usesCustomInserter = 1, SALU = 1 in { diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 48bfc2dac2d5..65bb5f371339 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -112,7 +112,13 @@ class SILoadStoreOptimizer : public MachineFunctionPass { static bool offsetsCanBeCombined(CombineInfo &CI); bool findMatchingInst(CombineInfo &CI); + + unsigned read2Opcode(unsigned EltSize) const; + unsigned read2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI); + + unsigned write2Opcode(unsigned EltSize) const; + unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); @@ -131,7 +137,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; - StringRef getPassName() const override { return "SI Load / Store Optimizer"; } + StringRef getPassName() const override { return "SI Load Store Optimizer"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -144,10 +150,10 @@ class SILoadStoreOptimizer : public MachineFunctionPass { } // end anonymous namespace. INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, - "SI Load / Store Optimizer", false, false) + "SI Load Store Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE, - "SI Load / Store Optimizer", false, false) + "SI Load Store Optimizer", false, false) char SILoadStoreOptimizer::ID = 0; @@ -436,6 +442,20 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { return false; } +unsigned SILoadStoreOptimizer::read2Opcode(unsigned EltSize) const { + if (STM->ldsRequiresM0Init()) + return (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64; + return (EltSize == 4) ? AMDGPU::DS_READ2_B32_gfx9 : AMDGPU::DS_READ2_B64_gfx9; +} + +unsigned SILoadStoreOptimizer::read2ST64Opcode(unsigned EltSize) const { + if (STM->ldsRequiresM0Init()) + return (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64; + + return (EltSize == 4) ? + AMDGPU::DS_READ2ST64_B32_gfx9 : AMDGPU::DS_READ2ST64_B64_gfx9; +} + MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -449,12 +469,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( unsigned NewOffset0 = CI.Offset0; unsigned NewOffset1 = CI.Offset1; - unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2_B32 - : AMDGPU::DS_READ2_B64; - - if (CI.UseST64) - Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 - : AMDGPU::DS_READ2ST64_B64; + unsigned Opc = CI.UseST64 ? + read2ST64Opcode(CI.EltSize) : read2Opcode(CI.EltSize); unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; @@ -480,11 +496,16 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( unsigned BaseReg = AddrReg->getReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { + unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) + .addImm(CI.BaseOff); + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BaseRegFlags = RegState::Kill; - BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg) - .addImm(CI.BaseOff) - .addReg(AddrReg->getReg()); + + TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) + .addReg(ImmReg) + .addReg(AddrReg->getReg()); } MachineInstrBuilder Read2 = @@ -517,25 +538,35 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( return Next; } +unsigned SILoadStoreOptimizer::write2Opcode(unsigned EltSize) const { + if (STM->ldsRequiresM0Init()) + return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64; + return (EltSize == 4) ? AMDGPU::DS_WRITE2_B32_gfx9 : AMDGPU::DS_WRITE2_B64_gfx9; +} + +unsigned SILoadStoreOptimizer::write2ST64Opcode(unsigned EltSize) const { + if (STM->ldsRequiresM0Init()) + return (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64; + + return (EltSize == 4) ? + AMDGPU::DS_WRITE2ST64_B32_gfx9 : AMDGPU::DS_WRITE2ST64_B64_gfx9; +} + MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); // Be sure to use .addOperand(), and not .addReg() with these. We want to be // sure we preserve the subregister index and any register flags set on them. - const MachineOperand *Addr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr); + const MachineOperand *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr); const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0); const MachineOperand *Data1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0); unsigned NewOffset0 = CI.Offset0; unsigned NewOffset1 = CI.Offset1; - unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2_B32 - : AMDGPU::DS_WRITE2_B64; - - if (CI.UseST64) - Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 - : AMDGPU::DS_WRITE2ST64_B64; + unsigned Opc = CI.UseST64 ? + write2ST64Opcode(CI.EltSize) : write2Opcode(CI.EltSize); if (NewOffset0 > NewOffset1) { // Canonicalize the merged instruction so the smaller offset comes first. @@ -550,14 +581,19 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( const MCInstrDesc &Write2Desc = TII->get(Opc); DebugLoc DL = CI.I->getDebugLoc(); - unsigned BaseReg = Addr->getReg(); + unsigned BaseReg = AddrReg->getReg(); unsigned BaseRegFlags = 0; if (CI.BaseOff) { + unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass); + BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::S_MOV_B32), ImmReg) + .addImm(CI.BaseOff); + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); BaseRegFlags = RegState::Kill; - BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg) - .addImm(CI.BaseOff) - .addReg(Addr->getReg()); + + TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg) + .addReg(ImmReg) + .addReg(AddrReg->getReg()); } MachineInstrBuilder Write2 = @@ -786,9 +822,13 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { CombineInfo CI; CI.I = I; unsigned Opc = MI.getOpcode(); - if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) { + if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64 || + Opc == AMDGPU::DS_READ_B32_gfx9 || Opc == AMDGPU::DS_READ_B64_gfx9) { + CI.InstClass = DS_READ_WRITE; - CI.EltSize = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4; + CI.EltSize = + (Opc == AMDGPU::DS_READ_B64 || Opc == AMDGPU::DS_READ_B64_gfx9) ? 8 : 4; + if (findMatchingInst(CI)) { Modified = true; I = mergeRead2Pair(CI); @@ -797,10 +837,13 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { } continue; - } - if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) { + } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64 || + Opc == AMDGPU::DS_WRITE_B32_gfx9 || + Opc == AMDGPU::DS_WRITE_B64_gfx9) { CI.InstClass = DS_READ_WRITE; - CI.EltSize = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4; + CI.EltSize + = (Opc == AMDGPU::DS_WRITE_B64 || Opc == AMDGPU::DS_WRITE_B64_gfx9) ? 8 : 4; + if (findMatchingInst(CI)) { Modified = true; I = mergeWrite2Pair(CI); @@ -874,7 +917,7 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { } bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; STM = &MF.getSubtarget(); diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 15210d2a31c1..a9af83323976 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -21,31 +21,31 @@ /// EXEC to update the predicates. /// /// For example: -/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 -/// %SGPR0 = SI_IF %VCC -/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 -/// %SGPR0 = SI_ELSE %SGPR0 -/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 -/// SI_END_CF %SGPR0 +/// %vcc = V_CMP_GT_F32 %vgpr1, %vgpr2 +/// %sgpr0 = SI_IF %vcc +/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 +/// %sgpr0 = SI_ELSE %sgpr0 +/// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0 +/// SI_END_CF %sgpr0 /// /// becomes: /// -/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask -/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask +/// %sgpr0 = S_AND_SAVEEXEC_B64 %vcc // Save and update the exec mask +/// %sgpr0 = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask /// S_CBRANCH_EXECZ label0 // This instruction is an optional /// // optimization which allows us to /// // branch if all the bits of /// // EXEC are zero. -/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch +/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch /// /// label0: -/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block -/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask +/// %sgpr0 = S_OR_SAVEEXEC_B64 %exec // Restore the exec mask for the Then block +/// %exec = S_XOR_B64 %sgpr0, %exec // Clear live bits from saved exec mask /// S_BRANCH_EXECZ label1 // Use our branch optimization /// // instruction again. -/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block +/// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr // Do the THEN block /// label1: -/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits +/// %exec = S_OR_B64 %exec, %sgpr0 // Re-enable saved exec mask bits //===----------------------------------------------------------------------===// #include "AMDGPU.h" @@ -53,7 +53,7 @@ #include "SIInstrInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3880d052bf89..da57b90dd8c4 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -17,7 +17,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 0a92cd176541..888d8f978aff 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -28,8 +28,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -51,9 +49,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ImplicitArgPtr(false), GITPtrHigh(0xffffffff) { const SISubtarget &ST = MF.getSubtarget(); - const Function *F = MF.getFunction(); - FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); - WavesPerEU = ST.getWavesPerEU(*F); + const Function &F = MF.getFunction(); + FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); + WavesPerEU = ST.getWavesPerEU(F); if (!isEntryFunction()) { // Non-entry functions have no special inputs for now, other registers @@ -68,21 +66,21 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(ScratchWaveOffsetReg); - if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) ImplicitArgPtr = true; } else { - if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) + if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) KernargSegmentPtr = true; } - CallingConv::ID CC = F->getCallingConv(); + CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - if (!F->arg_empty()) + if (!F.arg_empty()) KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } else if (CC == CallingConv::AMDGPU_PS) { - PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); + PSInputAddr = AMDGPU::getInitialPSInputAddr(F); } if (ST.debuggerEmitPrologue()) { @@ -94,27 +92,27 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDY = true; WorkItemIDZ = true; } else { - if (F->hasFnAttribute("amdgpu-work-group-id-x")) + if (F.hasFnAttribute("amdgpu-work-group-id-x")) WorkGroupIDX = true; - if (F->hasFnAttribute("amdgpu-work-group-id-y")) + if (F.hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; - if (F->hasFnAttribute("amdgpu-work-group-id-z")) + if (F.hasFnAttribute("amdgpu-work-group-id-z")) WorkGroupIDZ = true; - if (F->hasFnAttribute("amdgpu-work-item-id-x")) + if (F.hasFnAttribute("amdgpu-work-item-id-x")) WorkItemIDX = true; - if (F->hasFnAttribute("amdgpu-work-item-id-y")) + if (F.hasFnAttribute("amdgpu-work-item-id-y")) WorkItemIDY = true; - if (F->hasFnAttribute("amdgpu-work-item-id-z")) + if (F.hasFnAttribute("amdgpu-work-item-id-z")) WorkItemIDZ = true; } const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - bool MaySpill = ST.isVGPRSpillingEnabled(*F); + bool MaySpill = ST.isVGPRSpillingEnabled(F); bool HasStackObjects = FrameInfo.hasStackObjects(); if (isEntryFunction()) { @@ -139,30 +137,30 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; - if (F->hasFnAttribute("amdgpu-dispatch-ptr")) + if (F.hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; - if (F->hasFnAttribute("amdgpu-queue-ptr")) + if (F.hasFnAttribute("amdgpu-queue-ptr")) QueuePtr = true; - if (F->hasFnAttribute("amdgpu-dispatch-id")) + if (F.hasFnAttribute("amdgpu-dispatch-id")) DispatchID = true; } else if (ST.isMesaGfxShader(MF)) { if (HasStackObjects || MaySpill) ImplicitBufferPtr = true; } - if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr")) + if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) KernargSegmentPtr = true; if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) { // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. - if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) + if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } - Attribute A = F->getFnAttribute("amdgpu-git-ptr-high"); + Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); StringRef S = A.getValueAsString(); if (!S.empty()) S.consumeInteger(0, GITPtrHigh); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 5dde72910ee3..63875c55df03 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -34,12 +34,14 @@ namespace llvm { class MachineFrameInfo; class MachineFunction; +class SIInstrInfo; class TargetRegisterClass; class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: + // TODO: Is the img rsrc useful? explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -48,15 +50,11 @@ class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { } bool isAliased(const MachineFrameInfo *) const override { - // FIXME: If we ever change image intrinsics to accept fat pointers, then - // this could be true for some cases. - return false; + return true; } bool mayAlias(const MachineFrameInfo *) const override { - // FIXME: If we ever change image intrinsics to accept fat pointers, then - // this could be true for some cases. - return false; + return true; } }; @@ -135,8 +133,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // Stack object indices for work item IDs. std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; - AMDGPUBufferPseudoSourceValue BufferPSV; - AMDGPUImagePseudoSourceValue ImagePSV; + DenseMap> BufferPSVs; + DenseMap> ImagePSVs; private: unsigned LDSWaveSpillSize = 0; @@ -629,12 +629,22 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return LDSWaveSpillSize; } - const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { - return &BufferPSV; + const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII, + const Value *BufferRsrc) { + assert(BufferRsrc); + auto PSV = BufferPSVs.try_emplace( + BufferRsrc, + llvm::make_unique(TII)); + return PSV.first->second.get(); } - const AMDGPUImagePseudoSourceValue *getImagePSV() const { - return &ImagePSV; + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + llvm::make_unique(TII)); + return PSV.first->second.get(); } }; diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index c13148bf0a2b..6b67b76652ed 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -19,7 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -2050,9 +2050,9 @@ void SIScheduleDAGMI::schedule() placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } diff --git a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index c66aed9ef752..c73fb10b7ea0 100644 --- a/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -340,9 +340,9 @@ Optional SIMemOpInfo::getAtomicRmwInfo( /* static */ void SIMemOpInfo::reportUnknownSyncScope( const MachineBasicBlock::iterator &MI) { - DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), + DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(), "Unsupported synchronization scope"); - LLVMContext *CTX = &MI->getParent()->getParent()->getFunction()->getContext(); + LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext(); CTX->diagnose(Diag); } diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index aa95161c1b68..2dc6f2702b3b 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -205,7 +205,7 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { } bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; const SISubtarget &ST = MF.getSubtarget(); diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 6b2668fe052f..83074773c495 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -23,7 +23,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -103,7 +103,7 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI, } bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; const SISubtarget &ST = MF.getSubtarget(); @@ -134,8 +134,11 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { } while (I != E) { - if (I->isDebugValue()) + if (I->isDebugValue()) { + I = std::next(I); continue; + } + if (I->mayStore() || I->isBarrier() || I->isCall() || I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef()) break; diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 5738077f9890..5ed7fdf220bf 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -10,12 +10,12 @@ /// \file This pass tries to apply several peephole SDWA patterns. /// /// E.g. original: -/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1 -/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3 -/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2 +/// V_LSHRREV_B32_e32 %0, 16, %1 +/// V_ADD_I32_e32 %2, %0, %3 +/// V_LSHLREV_B32_e32 %4, 16, %2 /// /// Replace: -/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3 +/// V_ADD_I32_sdwa %4, %1, %3 /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD /// //===----------------------------------------------------------------------===// @@ -61,6 +61,7 @@ STATISTIC(NumSDWAInstructionsPeepholed, namespace { class SDWAOperand; +class SDWADstOperand; class SIPeepholeSDWA : public MachineFunctionPass { public: @@ -86,6 +87,7 @@ class SIPeepholeSDWA : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineFunction &MF); + std::unique_ptr matchSDWAOperand(MachineInstr &MI); bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; @@ -122,6 +124,11 @@ class SDWAOperand { MachineRegisterInfo *getMRI() const { return &getParentInst()->getParent()->getParent()->getRegInfo(); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + virtual void print(raw_ostream& OS) const = 0; + void dump() const { print(dbgs()); } +#endif }; using namespace AMDGPU::SDWA; @@ -137,8 +144,8 @@ class SDWASrcOperand : public SDWAOperand { SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, bool Sext_ = false) - : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_), - Neg(Neg_), Sext(Sext_) {} + : SDWAOperand(TargetOp, ReplacedOp), + SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; @@ -150,6 +157,10 @@ class SDWASrcOperand : public SDWAOperand { uint64_t getSrcMods(const SIInstrInfo *TII, const MachineOperand *SrcOp) const; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream& OS) const override; +#endif }; class SDWADstOperand : public SDWAOperand { @@ -158,15 +169,39 @@ class SDWADstOperand : public SDWAOperand { DstUnused DstUn; public: + SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) - : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} + : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; SdwaSel getDstSel() const { return DstSel; } DstUnused getDstUnused() const { return DstUn; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream& OS) const override; +#endif +}; + +class SDWADstPreserveOperand : public SDWADstOperand { +private: + MachineOperand *Preserve; + +public: + SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, + MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD) + : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE), + Preserve(PreserveOp) {} + + bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; + + MachineOperand *getPreservedOperand() const { return Preserve; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream& OS) const override; +#endif }; } // end anonymous namespace @@ -181,7 +216,8 @@ FunctionPass *llvm::createSIPeepholeSDWAPass() { return new SIPeepholeSDWA(); } -#ifndef NDEBUG + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) { switch(Sel) { case BYTE_0: OS << "BYTE_0"; break; @@ -204,20 +240,33 @@ static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { return OS; } -static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) { - OS << "SDWA src: " << *Src.getTargetOperand() - << " src_sel:" << Src.getSrcSel() - << " abs:" << Src.getAbs() << " neg:" << Src.getNeg() - << " sext:" << Src.getSext() << '\n'; +static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { + Operand.print(OS); return OS; } -static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) { - OS << "SDWA dst: " << *Dst.getTargetOperand() - << " dst_sel:" << Dst.getDstSel() - << " dst_unused:" << Dst.getDstUnused() << '\n'; - return OS; +LLVM_DUMP_METHOD +void SDWASrcOperand::print(raw_ostream& OS) const { + OS << "SDWA src: " << *getTargetOperand() + << " src_sel:" << getSrcSel() + << " abs:" << getAbs() << " neg:" << getNeg() + << " sext:" << getSext() << '\n'; } + +LLVM_DUMP_METHOD +void SDWADstOperand::print(raw_ostream& OS) const { + OS << "SDWA dst: " << *getTargetOperand() + << " dst_sel:" << getDstSel() + << " dst_unused:" << getDstUnused() << '\n'; +} + +LLVM_DUMP_METHOD +void SDWADstPreserveOperand::print(raw_ostream& OS) const { + OS << "SDWA preserve dst: " << *getTargetOperand() + << " dst_sel:" << getDstSel() + << " preserve:" << *getPreservedOperand() << '\n'; +} + #endif static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { @@ -239,23 +288,44 @@ static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { LHS.getSubReg() == RHS.getSubReg(); } -static bool isSubregOf(const MachineOperand &SubReg, - const MachineOperand &SuperReg, - const TargetRegisterInfo *TRI) { +static MachineOperand *findSingleRegUse(const MachineOperand *Reg, + const MachineRegisterInfo *MRI) { + if (!Reg->isReg() || !Reg->isDef()) + return nullptr; - if (!SuperReg.isReg() || !SubReg.isReg()) - return false; + MachineOperand *ResMO = nullptr; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) { + // If there exist use of subreg of Reg then return nullptr + if (!isSameReg(UseMO, *Reg)) + return nullptr; - if (isSameReg(SuperReg, SubReg)) - return true; + // Check that there is only one instruction that uses Reg + if (!ResMO) { + ResMO = &UseMO; + } else if (ResMO->getParent() != UseMO.getParent()) { + return nullptr; + } + } - if (SuperReg.getReg() != SubReg.getReg()) - return false; + return ResMO; +} + +static MachineOperand *findSingleRegDef(const MachineOperand *Reg, + const MachineRegisterInfo *MRI) { + if (!Reg->isReg()) + return nullptr; + + MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg()); + if (!DefInstr) + return nullptr; - LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg()); - LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg()); - SuperMask |= ~SubMask; - return SuperMask.all(); + for (auto &DefMO : DefInstr->defs()) { + if (DefMO.isReg() && DefMO.getReg() == Reg->getReg()) + return &DefMO; + } + + // Ignore implicit defs. + return nullptr; } uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, @@ -286,30 +356,11 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { // For SDWA src operand potential instruction is one that use register // defined by parent instruction - MachineRegisterInfo *MRI = getMRI(); - MachineOperand *Replaced = getReplacedOperand(); - assert(Replaced->isReg()); + MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI()); + if (!PotentialMO) + return nullptr; - MachineInstr *PotentialMI = nullptr; - for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) { - // If this is use of another subreg of dst reg then do nothing - if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) - continue; - - // If there exist use of superreg of dst then we should not combine this - // opernad - if (!isSameReg(PotentialMO, *Replaced)) - return nullptr; - - // Check that PotentialMI is only instruction that uses dst reg - if (PotentialMI == nullptr) { - PotentialMI = PotentialMO.getParent(); - } else if (PotentialMI != PotentialMO.getParent()) { - return nullptr; - } - } - - return PotentialMI; + return PotentialMO->getParent(); } bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { @@ -331,7 +382,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && - !isSameReg(*Src, *getReplacedOperand())) { + !isSameReg(*Src, *getReplacedOperand())) { // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to // src2. This is not allowed. return false; @@ -351,29 +402,18 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { // that this operand uses MachineRegisterInfo *MRI = getMRI(); MachineInstr *ParentMI = getParentInst(); - MachineOperand *Replaced = getReplacedOperand(); - assert(Replaced->isReg()); - for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) { - if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) - continue; + MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI); + if (!PotentialMO) + return nullptr; - if (!isSameReg(*Replaced, PotentialMO)) + // Check that ParentMI is the only instruction that uses replaced register + for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) { + if (&UseInst != ParentMI) return nullptr; - - // Check that ParentMI is the only instruction that uses replaced register - for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) { - if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) && - UseMO.getParent() != ParentMI) { - return nullptr; - } - } - - // Due to SSA this should be onle def of replaced register, so return it - return PotentialMO.getParent(); } - return nullptr; + return PotentialMO->getParent(); } bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { @@ -404,13 +444,43 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { return true; } +bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI, + const SIInstrInfo *TII) { + // MI should be moved right before v_or_b32. + // For this we should clear all kill flags on uses of MI src-operands or else + // we can encounter problem with use of killed operand. + for (MachineOperand &MO : MI.uses()) { + if (!MO.isReg()) + continue; + getMRI()->clearKillFlags(MO.getReg()); + } + + // Move MI before v_or_b32 + auto MBB = MI.getParent(); + MBB->remove(&MI); + MBB->insert(getParentInst(), &MI); + + // Add Implicit use of preserved register + MachineInstrBuilder MIB(*MBB->getParent(), MI); + MIB.addReg(getPreservedOperand()->getReg(), + RegState::ImplicitKill, + getPreservedOperand()->getSubReg()); + + // Tie dst to implicit use + MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst), + MI.getNumOperands() - 1); + + // Convert MI as any other SDWADstOperand and remove v_or_b32 + return SDWADstOperand::convertToSDWA(MI, TII); +} + Optional SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { if (Op.isImm()) { return Op.getImm(); } // If this is not immediate then it can be copy of immediate value, e.g.: - // %vreg1 = S_MOV_B32 255; + // %1 = S_MOV_B32 255; if (Op.isReg()) { for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { if (!isSameReg(Op, Def)) @@ -431,195 +501,316 @@ Optional SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { return None; } -void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - case AMDGPU::V_LSHRREV_B32_e32: - case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: - case AMDGPU::V_LSHRREV_B32_e64: - case AMDGPU::V_ASHRREV_I32_e64: - case AMDGPU::V_LSHLREV_B32_e64: { - // from: v_lshrrev_b32_e32 v1, 16/24, v0 - // to SDWA src:v0 src_sel:WORD_1/BYTE_3 - - // from: v_ashrrev_i32_e32 v1, 16/24, v0 - // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 - - // from: v_lshlrev_b32_e32 v1, 16/24, v0 - // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD - MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - auto Imm = foldToImm(*Src0); - if (!Imm) - break; - - if (*Imm != 16 && *Imm != 24) - break; - - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) - break; - - if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || - Opcode == AMDGPU::V_LSHLREV_B32_e64) { - auto SDWADst = make_unique( - Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); - SDWAOperands[&MI] = std::move(SDWADst); - ++NumSDWAPatternsFound; - } else { - auto SDWASrc = make_unique( - Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode != AMDGPU::V_LSHRREV_B32_e32 && - Opcode != AMDGPU::V_LSHRREV_B32_e64); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); - SDWAOperands[&MI] = std::move(SDWASrc); - ++NumSDWAPatternsFound; - } - break; - } +std::unique_ptr +SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AMDGPU::V_LSHRREV_B32_e32: + case AMDGPU::V_ASHRREV_I32_e32: + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { + // from: v_lshrrev_b32_e32 v1, 16/24, v0 + // to SDWA src:v0 src_sel:WORD_1/BYTE_3 + + // from: v_ashrrev_i32_e32 v1, 16/24, v0 + // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 + + // from: v_lshlrev_b32_e32 v1, 16/24, v0 + // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + auto Imm = foldToImm(*Src0); + if (!Imm) + break; + + if (*Imm != 16 && *Imm != 24) + break; + + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + if (TRI->isPhysicalRegister(Src1->getReg()) || + TRI->isPhysicalRegister(Dst->getReg())) + break; + + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { + return make_unique( + Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); + } else { + return make_unique( + Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); + } + break; + } - case AMDGPU::V_LSHRREV_B16_e32: - case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: - case AMDGPU::V_LSHRREV_B16_e64: - case AMDGPU::V_ASHRREV_I16_e64: - case AMDGPU::V_LSHLREV_B16_e64: { - // from: v_lshrrev_b16_e32 v1, 8, v0 - // to SDWA src:v0 src_sel:BYTE_1 - - // from: v_ashrrev_i16_e32 v1, 8, v0 - // to SDWA src:v0 src_sel:BYTE_1 sext:1 - - // from: v_lshlrev_b16_e32 v1, 8, v0 - // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD - MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - auto Imm = foldToImm(*Src0); - if (!Imm || *Imm != 8) - break; - - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) - break; - - if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || - Opcode == AMDGPU::V_LSHLREV_B16_e64) { - auto SDWADst = - make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); - SDWAOperands[&MI] = std::move(SDWADst); - ++NumSDWAPatternsFound; - } else { - auto SDWASrc = make_unique( - Src1, Dst, BYTE_1, false, false, - Opcode != AMDGPU::V_LSHRREV_B16_e32 && - Opcode != AMDGPU::V_LSHRREV_B16_e64); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); - SDWAOperands[&MI] = std::move(SDWASrc); - ++NumSDWAPatternsFound; - } - break; - } + case AMDGPU::V_LSHRREV_B16_e32: + case AMDGPU::V_ASHRREV_I16_e32: + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { + // from: v_lshrrev_b16_e32 v1, 8, v0 + // to SDWA src:v0 src_sel:BYTE_1 + + // from: v_ashrrev_i16_e32 v1, 8, v0 + // to SDWA src:v0 src_sel:BYTE_1 sext:1 + + // from: v_lshlrev_b16_e32 v1, 8, v0 + // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + auto Imm = foldToImm(*Src0); + if (!Imm || *Imm != 8) + break; + + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + + if (TRI->isPhysicalRegister(Src1->getReg()) || + TRI->isPhysicalRegister(Dst->getReg())) + break; + + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { + return make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); + } else { + return make_unique( + Src1, Dst, BYTE_1, false, false, + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); + } + break; + } - case AMDGPU::V_BFE_I32: - case AMDGPU::V_BFE_U32: { - // e.g.: - // from: v_bfe_u32 v1, v0, 8, 8 - // to SDWA src:v0 src_sel:BYTE_1 - - // offset | width | src_sel - // ------------------------ - // 0 | 8 | BYTE_0 - // 0 | 16 | WORD_0 - // 0 | 32 | DWORD ? - // 8 | 8 | BYTE_1 - // 16 | 8 | BYTE_2 - // 16 | 16 | WORD_1 - // 24 | 8 | BYTE_3 - - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - auto Offset = foldToImm(*Src1); - if (!Offset) - break; - - MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); - auto Width = foldToImm(*Src2); - if (!Width) - break; - - SdwaSel SrcSel = DWORD; - - if (*Offset == 0 && *Width == 8) - SrcSel = BYTE_0; - else if (*Offset == 0 && *Width == 16) - SrcSel = WORD_0; - else if (*Offset == 0 && *Width == 32) - SrcSel = DWORD; - else if (*Offset == 8 && *Width == 8) - SrcSel = BYTE_1; - else if (*Offset == 16 && *Width == 8) - SrcSel = BYTE_2; - else if (*Offset == 16 && *Width == 16) - SrcSel = WORD_1; - else if (*Offset == 24 && *Width == 8) - SrcSel = BYTE_3; - else - break; - - MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - - if (TRI->isPhysicalRegister(Src0->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) - break; - - auto SDWASrc = make_unique( - Src0, Dst, SrcSel, false, false, - Opcode != AMDGPU::V_BFE_U32); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); - SDWAOperands[&MI] = std::move(SDWASrc); - ++NumSDWAPatternsFound; + case AMDGPU::V_BFE_I32: + case AMDGPU::V_BFE_U32: { + // e.g.: + // from: v_bfe_u32 v1, v0, 8, 8 + // to SDWA src:v0 src_sel:BYTE_1 + + // offset | width | src_sel + // ------------------------ + // 0 | 8 | BYTE_0 + // 0 | 16 | WORD_0 + // 0 | 32 | DWORD ? + // 8 | 8 | BYTE_1 + // 16 | 8 | BYTE_2 + // 16 | 16 | WORD_1 + // 24 | 8 | BYTE_3 + + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto Offset = foldToImm(*Src1); + if (!Offset) + break; + + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + auto Width = foldToImm(*Src2); + if (!Width) + break; + + SdwaSel SrcSel = DWORD; + + if (*Offset == 0 && *Width == 8) + SrcSel = BYTE_0; + else if (*Offset == 0 && *Width == 16) + SrcSel = WORD_0; + else if (*Offset == 0 && *Width == 32) + SrcSel = DWORD; + else if (*Offset == 8 && *Width == 8) + SrcSel = BYTE_1; + else if (*Offset == 16 && *Width == 8) + SrcSel = BYTE_2; + else if (*Offset == 16 && *Width == 16) + SrcSel = WORD_1; + else if (*Offset == 24 && *Width == 8) + SrcSel = BYTE_3; + else + break; + + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + + if (TRI->isPhysicalRegister(Src0->getReg()) || + TRI->isPhysicalRegister(Dst->getReg())) + break; + + return make_unique( + Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); + } + + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { + // e.g.: + // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 + // to SDWA src:v0 src_sel:WORD_0/BYTE_0 + + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; + auto Imm = foldToImm(*Src0); + + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) + break; + + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + + if (TRI->isPhysicalRegister(Src1->getReg()) || + TRI->isPhysicalRegister(Dst->getReg())) + break; + + return make_unique( + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + } + + case AMDGPU::V_OR_B32_e32: + case AMDGPU::V_OR_B32_e64: { + // Patterns for dst_unused:UNUSED_PRESERVE. + // e.g., from: + // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD + // src1_sel:WORD_1 src2_sel:WORD1 + // v_add_f16_e32 v3, v1, v2 + // v_or_b32_e32 v4, v0, v3 + // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3 + + // Check if one of operands of v_or_b32 is SDWA instruction + using CheckRetType = Optional>; + auto CheckOROperandsForSDWA = + [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType { + if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg()) + return CheckRetType(None); + + MachineOperand *Op1Def = findSingleRegDef(Op1, MRI); + if (!Op1Def) + return CheckRetType(None); + + MachineInstr *Op1Inst = Op1Def->getParent(); + if (!TII->isSDWA(*Op1Inst)) + return CheckRetType(None); + + MachineOperand *Op2Def = findSingleRegDef(Op2, MRI); + if (!Op2Def) + return CheckRetType(None); + + return CheckRetType(std::make_pair(Op1Def, Op2Def)); + }; + + MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + assert(OrSDWA && OrOther); + auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther); + if (!Res) { + OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + assert(OrSDWA && OrOther); + Res = CheckOROperandsForSDWA(OrSDWA, OrOther); + if (!Res) break; - } - case AMDGPU::V_AND_B32_e32: - case AMDGPU::V_AND_B32_e64: { - // e.g.: - // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 - // to SDWA src:v0 src_sel:WORD_0/BYTE_0 - - MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - auto ValSrc = Src1; - auto Imm = foldToImm(*Src0); - - if (!Imm) { - Imm = foldToImm(*Src1); - ValSrc = Src0; - } - - if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) - break; - - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - - if (TRI->isPhysicalRegister(Src1->getReg()) || - TRI->isPhysicalRegister(Dst->getReg())) - break; - - auto SDWASrc = make_unique( - ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); - DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); - SDWAOperands[&MI] = std::move(SDWASrc); + } + + MachineOperand *OrSDWADef = Res->first; + MachineOperand *OrOtherDef = Res->second; + assert(OrSDWADef && OrOtherDef); + + MachineInstr *SDWAInst = OrSDWADef->getParent(); + MachineInstr *OtherInst = OrOtherDef->getParent(); + + // Check that OtherInstr is actually bitwise compatible with SDWAInst = their + // destination patterns don't overlap. Compatible instruction can be either + // regular instruction with compatible bitness or SDWA instruction with + // correct dst_sel + // SDWAInst | OtherInst bitness / OtherInst dst_sel + // ----------------------------------------------------- + // DWORD | no / no + // WORD_0 | no / BYTE_2/3, WORD_1 + // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0 + // BYTE_0 | no / BYTE_1/2/3, WORD_1 + // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1 + // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0 + // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0 + // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK + // but v_add_f32 is not. + + // TODO: add support for non-SDWA instructions as OtherInst. + // For now this only works with SDWA instructions. For regular instructions + // there is no way to determine if instruction write only 8/16/24-bit out of + // full register size and all registers are at min 32-bit wide. + if (!TII->isSDWA(*OtherInst)) + break; + + SdwaSel DstSel = static_cast( + TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));; + SdwaSel OtherDstSel = static_cast( + TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel)); + + bool DstSelAgree = false; + switch (DstSel) { + case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) || + (OtherDstSel == BYTE_3) || + (OtherDstSel == WORD_1)); + break; + case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) || + (OtherDstSel == BYTE_1) || + (OtherDstSel == WORD_0)); + break; + case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) || + (OtherDstSel == BYTE_2) || + (OtherDstSel == BYTE_3) || + (OtherDstSel == WORD_1)); + break; + case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) || + (OtherDstSel == BYTE_2) || + (OtherDstSel == BYTE_3) || + (OtherDstSel == WORD_1)); + break; + case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) || + (OtherDstSel == BYTE_1) || + (OtherDstSel == BYTE_3) || + (OtherDstSel == WORD_0)); + break; + case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) || + (OtherDstSel == BYTE_1) || + (OtherDstSel == BYTE_2) || + (OtherDstSel == WORD_0)); + break; + default: DstSelAgree = false; + } + + if (!DstSelAgree) + break; + + // Also OtherInst dst_unused should be UNUSED_PAD + DstUnused OtherDstUnused = static_cast( + TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused)); + if (OtherDstUnused != DstUnused::UNUSED_PAD) + break; + + // Create DstPreserveOperand + MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + assert(OrDst && OrDst->isReg()); + + return make_unique( + OrDst, OrSDWADef, OrOtherDef, DstSel); + + } + } + + return std::unique_ptr(nullptr); +} + +void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (auto Operand = matchSDWAOperand(MI)) { + DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); + SDWAOperands[&MI] = std::move(Operand); ++NumSDWAPatternsFound; - break; - } } } } @@ -627,12 +818,16 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const { + // Check if this is already an SDWA instruction + unsigned Opc = MI.getOpcode(); + if (TII->isSDWA(Opc)) + return true; + // Check if this instruction has opcode that supports SDWA - int Opc = MI.getOpcode(); if (AMDGPU::getSDWAOp(Opc) == -1) Opc = AMDGPU::getVOPe32(Opc); - if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1) + if (AMDGPU::getSDWAOp(Opc) == -1) return false; if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) @@ -665,9 +860,15 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { // Convert to sdwa - int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); - if (SDWAOpcode == -1) - SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); + int SDWAOpcode; + unsigned Opcode = MI.getOpcode(); + if (TII->isSDWA(Opcode)) { + SDWAOpcode = Opcode; + } else { + SDWAOpcode = AMDGPU::getSDWAOp(Opcode); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode)); + } assert(SDWAOpcode != -1); const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); @@ -743,25 +944,44 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } } - // Initialize dst_sel if present + // Copy dst_sel if present, initialize otherwise if needed if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { - SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); + if (DstSel) { + SDWAInst.add(*DstSel); + } else { + SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } } - // Initialize dst_unused if present + // Copy dst_unused if present, initialize otherwise if needed if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { - SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); + MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); + if (DstUnused) { + SDWAInst.add(*DstUnused); + } else { + SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); + } } - // Initialize src0_sel + // Copy src0_sel if present, initialize otherwise assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); - SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); - + MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); + if (Src0Sel) { + SDWAInst.add(*Src0Sel); + } else { + SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } - // Initialize src1_sel if present + // Copy src1_sel if present, initialize otherwise if needed if (Src1) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); - SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); + if (Src1Sel) { + SDWAInst.add(*Src1Sel); + } else { + SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } } // Apply all sdwa operand pattenrs @@ -800,7 +1020,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const { const MCInstrDesc &Desc = TII->get(MI.getOpcode()); unsigned ConstantBusCount = 0; - for (MachineOperand &Op: MI.explicit_uses()) { + for (MachineOperand &Op : MI.explicit_uses()) { if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) continue; @@ -830,7 +1050,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); - if (!ST.hasSDWA() || skipFunction(*MF.getFunction())) + if (!ST.hasSDWA() || skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -838,27 +1058,35 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); // Find all SDWA operands in MF. - matchSDWAOperands(MF); + bool Changed = false; + bool Ret = false; + do { + matchSDWAOperands(MF); + + for (const auto &OperandPair : SDWAOperands) { + const auto &Operand = OperandPair.second; + MachineInstr *PotentialMI = Operand->potentialToConvert(TII); + if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { + PotentialMatches[PotentialMI].push_back(Operand.get()); + } + } - for (const auto &OperandPair : SDWAOperands) { - const auto &Operand = OperandPair.second; - MachineInstr *PotentialMI = Operand->potentialToConvert(TII); - if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { - PotentialMatches[PotentialMI].push_back(Operand.get()); + for (auto &PotentialPair : PotentialMatches) { + MachineInstr &PotentialMI = *PotentialPair.first; + convertToSDWA(PotentialMI, PotentialPair.second); } - } - for (auto &PotentialPair : PotentialMatches) { - MachineInstr &PotentialMI = *PotentialPair.first; - convertToSDWA(PotentialMI, PotentialPair.second); - } + PotentialMatches.clear(); + SDWAOperands.clear(); + + Changed = !ConvertedInstructions.empty(); - PotentialMatches.clear(); - SDWAOperands.clear(); + if (Changed) + Ret = true; - bool Ret = !ConvertedInstructions.empty(); - while (!ConvertedInstructions.empty()) - legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); + while (!ConvertedInstructions.empty()) + legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); + } while (Changed); return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 152b24599e9d..3c73dd78f20c 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -163,6 +163,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); + // Reserve xnack_mask registers - support is not implemented in Codegen. + reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); + // Reserve Trap Handler registers - support is not implemented in Codegen. reserveRegisterTuples(Reserved, AMDGPU::TBA); reserveRegisterTuples(Reserved, AMDGPU::TMA); @@ -172,6 +175,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); + reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); + reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); const SISubtarget &ST = MF.getSubtarget(); @@ -1049,8 +1054,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // Convert to an absolute stack address by finding the offset from the // scratch wave base and scaling by the wave size. // - // In an entry function/kernel the stack address is already the absolute - // address relative to the the scratch wave offset. + // In an entry function/kernel the stack address is already the + // absolute address relative to the scratch wave offset. unsigned DiffReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); @@ -1071,8 +1076,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, .addImm(Log2_32(ST.getWavefrontSize())) .addReg(DiffReg); } else { - unsigned CarryOut - = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned ScaledReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -1082,8 +1085,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // TODO: Fold if use instruction is another add of a constant. if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) + TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addImm(Offset) .addReg(ScaledReg, RegState::Kill); } else { @@ -1092,13 +1094,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) + TII->getAddNoCarry(*MBB, MI, DL, ResultReg) .addReg(ConstOffsetReg, RegState::Kill) .addReg(ScaledReg, RegState::Kill); } - - MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); } // Don't introduce an extra copy if we're just materializing in a mov. @@ -1347,13 +1346,13 @@ bool SIRegisterInfo::shouldRewriteCopySrc( // class. // // e.g. if we have something like - // vreg0 = ... - // vreg1 = ... - // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 - // vreg3 = COPY vreg2, sub0 + // %0 = ... + // %1 = ... + // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 + // %3 = COPY %2, sub0 // // We want to look through the COPY to find: - // => vreg3 = COPY vreg0 + // => %3 = COPY %0 // Plain copy. return getCommonSubClass(DefRC, SrcRC) != nullptr; @@ -1518,7 +1517,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), - *MF.getFunction()); + MF.getFunction()); switch (RC->getID()) { default: return AMDGPURegisterInfo::getRegPressureLimit(RC, MF); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index 5062a626d941..0c93125a58ae 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -7,6 +7,26 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Helpers +//===----------------------------------------------------------------------===// + +class getSubRegs { + list ret2 = [sub0, sub1]; + list ret3 = [sub0, sub1, sub2]; + list ret4 = [sub0, sub1, sub2, sub3]; + list ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7]; + list ret16 = [sub0, sub1, sub2, sub3, + sub4, sub5, sub6, sub7, + sub8, sub9, sub10, sub11, + sub12, sub13, sub14, sub15]; + + list ret = !if(!eq(size, 2), ret2, + !if(!eq(size, 3), ret3, + !if(!eq(size, 4), ret4, + !if(!eq(size, 8), ret8, ret16)))); +} + //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// @@ -56,6 +76,16 @@ def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>; def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>; def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>; +def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; +def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; + +def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, + DwarfRegAlias { + let Namespace = "AMDGPU"; + let SubRegIndices = [sub0, sub1]; + let HWEncoding = 104; +} + // Trap handler registers def TBA_LO : SIReg<"tba_lo", 108>; def TBA_HI : SIReg<"tba_hi", 109>; @@ -77,18 +107,11 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, let HWEncoding = 110; } -def TTMP0 : SIReg <"ttmp0", 112>; -def TTMP1 : SIReg <"ttmp1", 113>; -def TTMP2 : SIReg <"ttmp2", 114>; -def TTMP3 : SIReg <"ttmp3", 115>; -def TTMP4 : SIReg <"ttmp4", 116>; -def TTMP5 : SIReg <"ttmp5", 117>; -def TTMP6 : SIReg <"ttmp6", 118>; -def TTMP7 : SIReg <"ttmp7", 119>; -def TTMP8 : SIReg <"ttmp8", 120>; -def TTMP9 : SIReg <"ttmp9", 121>; -def TTMP10 : SIReg <"ttmp10", 122>; -def TTMP11 : SIReg <"ttmp11", 123>; +foreach Index = 0-15 in { + def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>; + def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>; + def TTMP#Index : SIReg<"", 0>; +} multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { def _ci : SIReg; @@ -148,19 +171,19 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // SGPR 64-bit registers -def SGPR_64Regs : RegisterTuples<[sub0, sub1], +def SGPR_64Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 2)), (add (decimate (shl SGPR_32, 1), 2))]>; // SGPR 128-bit registers -def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def SGPR_128Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), (add (decimate (shl SGPR_32, 3), 4))]>; // SGPR 256-bit registers -def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def SGPR_256Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), @@ -171,8 +194,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], (add (decimate (shl SGPR_32, 7), 4))]>; // SGPR 512-bit registers -def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, - sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def SGPR_512Regs : RegisterTuples.ret, [(add (decimate SGPR_32, 4)), (add (decimate (shl SGPR_32, 1), 4)), (add (decimate (shl SGPR_32, 2), 4)), @@ -192,22 +214,130 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, // Trap handler TMP 32-bit registers def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32, - (add (sequence "TTMP%u", 0, 11))> { + (add (sequence "TTMP%u", 0, 15))> { let isAllocatable = 0; } // Trap handler TMP 64-bit registers -def TTMP_64Regs : RegisterTuples<[sub0, sub1], +def TTMP_64Regs : RegisterTuples.ret, [(add (decimate TTMP_32, 2)), (add (decimate (shl TTMP_32, 1), 2))]>; // Trap handler TMP 128-bit registers -def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], +def TTMP_128Regs : RegisterTuples.ret, [(add (decimate TTMP_32, 4)), (add (decimate (shl TTMP_32, 1), 4)), (add (decimate (shl TTMP_32, 2), 4)), (add (decimate (shl TTMP_32, 3), 4))]>; +def TTMP_256Regs : RegisterTuples.ret, + [(add (decimate TTMP_32, 4)), + (add (decimate (shl TTMP_32, 1), 4)), + (add (decimate (shl TTMP_32, 2), 4)), + (add (decimate (shl TTMP_32, 3), 4)), + (add (decimate (shl TTMP_32, 4), 4)), + (add (decimate (shl TTMP_32, 5), 4)), + (add (decimate (shl TTMP_32, 6), 4)), + (add (decimate (shl TTMP_32, 7), 4))]>; + +def TTMP_512Regs : RegisterTuples.ret, + [(add (decimate TTMP_32, 4)), + (add (decimate (shl TTMP_32, 1), 4)), + (add (decimate (shl TTMP_32, 2), 4)), + (add (decimate (shl TTMP_32, 3), 4)), + (add (decimate (shl TTMP_32, 4), 4)), + (add (decimate (shl TTMP_32, 5), 4)), + (add (decimate (shl TTMP_32, 6), 4)), + (add (decimate (shl TTMP_32, 7), 4)), + (add (decimate (shl TTMP_32, 8), 4)), + (add (decimate (shl TTMP_32, 9), 4)), + (add (decimate (shl TTMP_32, 10), 4)), + (add (decimate (shl TTMP_32, 11), 4)), + (add (decimate (shl TTMP_32, 12), 4)), + (add (decimate (shl TTMP_32, 13), 4)), + (add (decimate (shl TTMP_32, 14), 4)), + (add (decimate (shl TTMP_32, 15), 4))]>; + +class TmpRegTuplesBase subRegs, + list indices = getSubRegs.ret, + int index1 = !add(index, !add(size, -1)), + string name = "ttmp["#index#":"#index1#"]"> : + RegisterWithSubRegs { + let HWEncoding = subRegs[0].HWEncoding; + let SubRegIndices = indices; +} + +class TmpRegTuples("TTMP"#index0#tgt), + Register r1 = !cast("TTMP"#index1#tgt), + Register r2 = !cast("TTMP"#index2#tgt), + Register r3 = !cast("TTMP"#index3#tgt), + Register r4 = !cast("TTMP"#index4#tgt), + Register r5 = !cast("TTMP"#index5#tgt), + Register r6 = !cast("TTMP"#index6#tgt), + Register r7 = !cast("TTMP"#index7#tgt)> : + TmpRegTuplesBase.ret>; + +foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in { + def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>; + def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>; +} + +foreach Index = {0, 4, 8, 12} in { + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>; + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>; +} + +foreach Index = {0, 4, 8} in { + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)# + _TTMP#!add(Index,4)# + _TTMP#!add(Index,5)# + _TTMP#!add(Index,6)# + _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi", 8, Index>; + def TTMP#Index#_TTMP#!add(Index,1)# + _TTMP#!add(Index,2)# + _TTMP#!add(Index,3)# + _TTMP#!add(Index,4)# + _TTMP#!add(Index,5)# + _TTMP#!add(Index,6)# + _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>; +} + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi : + TmpRegTuplesBase<0, 16, + [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi, + TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi, + TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi, + TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>; + +def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 : + TmpRegTuplesBase<0, 16, + [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9, + TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9, + TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9, + TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>; + + // VGPR 32-bit registers // i16/f16 only on VI+ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, @@ -217,25 +347,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, } // VGPR 64-bit registers -def VGPR_64 : RegisterTuples<[sub0, sub1], +def VGPR_64 : RegisterTuples.ret, [(add (trunc VGPR_32, 255)), (add (shl VGPR_32, 1))]>; // VGPR 96-bit registers -def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], +def VGPR_96 : RegisterTuples.ret, [(add (trunc VGPR_32, 254)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2))]>; // VGPR 128-bit registers -def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], +def VGPR_128 : RegisterTuples.ret, [(add (trunc VGPR_32, 253)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), (add (shl VGPR_32, 3))]>; // VGPR 256-bit registers -def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], +def VGPR_256 : RegisterTuples.ret, [(add (trunc VGPR_32, 249)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), @@ -246,8 +376,7 @@ def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], (add (shl VGPR_32, 7))]>; // VGPR 512-bit registers -def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, - sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], +def VGPR_512 : RegisterTuples.ret, [(add (trunc VGPR_32, 241)), (add (shl VGPR_32, 1)), (add (shl VGPR_32, 2)), @@ -284,7 +413,7 @@ def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64], 32, // Subset of SReg_32 without M0 for SMRD instructions and alike. // See comments in SIInstructions.td for more info. def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, + (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> { let AllocationPriority = 7; @@ -316,7 +445,7 @@ def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> } def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, - (add SGPR_64, VCC, FLAT_SCR, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { let CopyCost = 1; let AllocationPriority = 8; } @@ -345,13 +474,31 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, } // End CopyCost = 2 -def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { + let AllocationPriority = 11; +} + +def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { + let isAllocatable = 0; +} + +def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, + (add SGPR_256, TTMP_256)> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; let AllocationPriority = 11; } -def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> { +def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { + let AllocationPriority = 12; +} + +def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { + let isAllocatable = 0; +} + +def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add SGPR_512, TTMP_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 12; diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 874fbadca7f3..41f989ad3228 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -286,7 +286,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { } bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 186497331033..53aefe829737 100644 --- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -65,7 +65,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -224,7 +224,8 @@ FunctionPass *llvm::createSIWholeQuadModePass() { #ifndef NDEBUG LLVM_DUMP_METHOD void SIWholeQuadMode::printInfo() { for (const auto &BII : Blocks) { - dbgs() << "\nBB#" << BII.first->getNumber() << ":\n" + dbgs() << "\n" + << printMBBReference(*BII.first) << ":\n" << " InNeeds = " << PrintState(BII.second.InNeeds) << ", Needs = " << PrintState(BII.second.Needs) << ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n"; @@ -306,7 +307,7 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag, char SIWholeQuadMode::scanInstructions(MachineFunction &MF, std::vector &Worklist) { char GlobalFlags = 0; - bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs"); + bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs"); SmallVector SetInactiveInstrs; // We need to visit the basic blocks in reverse post-order so that we visit @@ -680,7 +681,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact) return; - DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n"); + DEBUG(dbgs() << "\nProcessing block " << printMBBReference(MBB) << ":\n"); unsigned SavedWQMReg = 0; unsigned SavedNonWWMReg = 0; @@ -841,7 +842,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { Blocks.clear(); LiveMaskQueries.clear(); LowerToCopyInstrs.clear(); - CallingConv = MF.getFunction()->getCallingConv(); + CallingConv = MF.getFunction().getCallingConv(); const SISubtarget &ST = MF.getSubtarget(); diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 5e72a2e88287..8f347986eb8a 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -129,11 +129,8 @@ class SM_Time_Pseudo : SM_Pseudo< opName, (outs SReg_64_XEXEC:$sdst), (ins), " $sdst", [(set i64:$sdst, (node))]> { let hasSideEffects = 1; - // FIXME: mayStore = ? is a workaround for tablegen bug for different - // inferred mayStore flags for the instruction pattern vs. standalone - // Pat. Each considers the other contradictory. - let mayStore = ?; - let mayLoad = ?; + let mayStore = 0; + let mayLoad = 1; let has_sbase = 0; let has_offset = 0; } diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index 03b11ae80500..9eb4c6513cce 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -61,7 +61,15 @@ const char* const IdSymbolic[] = { "HW_REG_HW_ID", "HW_REG_GPR_ALLOC", "HW_REG_LDS_ALLOC", - "HW_REG_IB_STS" + "HW_REG_IB_STS", + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + "HW_REG_SH_MEM_BASES" }; } // namespace Hwreg diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 15fdbc2d1e4c..0deb66b6452f 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -39,7 +40,9 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #define GET_INSTRINFO_NAMED_OPS +#define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" +#undef GET_INSTRMAP_INFO #undef GET_INSTRINFO_NAMED_OPS namespace { @@ -100,15 +103,76 @@ static cl::opt EnablePackedInlinableLiterals( namespace AMDGPU { +LLVM_READNONE +static inline Channels indexToChannel(unsigned Channel) { + switch (Channel) { + case 1: + return AMDGPU::Channels_1; + case 2: + return AMDGPU::Channels_2; + case 3: + return AMDGPU::Channels_3; + case 4: + return AMDGPU::Channels_4; + default: + llvm_unreachable("invalid MIMG channel"); + } +} + + +// FIXME: Need to handle d16 images correctly. +static unsigned rcToChannels(unsigned RCID) { + switch (RCID) { + case AMDGPU::VGPR_32RegClassID: + return 1; + case AMDGPU::VReg_64RegClassID: + return 2; + case AMDGPU::VReg_96RegClassID: + return 3; + case AMDGPU::VReg_128RegClassID: + return 4; + default: + llvm_unreachable("invalid MIMG register class"); + } +} + +int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) { + AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels); + unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass); + if (NewChannels == OrigChannels) + return Opc; + + switch (OrigChannels) { + case 1: + return AMDGPU::getMaskedMIMGOp1(Opc, Channel); + case 2: + return AMDGPU::getMaskedMIMGOp2(Opc, Channel); + case 3: + return AMDGPU::getMaskedMIMGOp3(Opc, Channel); + case 4: + return AMDGPU::getMaskedMIMGOp4(Opc, Channel); + default: + llvm_unreachable("invalid MIMG channel"); + } +} + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcodeGen(Opcode, static_cast(Gen)); +} + namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { - // SI. + // GCN GFX6 (Southern Islands (SI)). if (Features.test(FeatureISAVersion6_0_0)) return {6, 0, 0}; if (Features.test(FeatureISAVersion6_0_1)) return {6, 0, 1}; - // CI. + + // GCN GFX7 (Sea Islands (CI)). if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; if (Features.test(FeatureISAVersion7_0_1)) @@ -117,8 +181,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 2}; if (Features.test(FeatureISAVersion7_0_3)) return {7, 0, 3}; + if (Features.test(FeatureISAVersion7_0_4)) + return {7, 0, 4}; - // VI. + // GCN GFX8 (Volcanic Islands (VI)). if (Features.test(FeatureISAVersion8_0_0)) return {8, 0, 0}; if (Features.test(FeatureISAVersion8_0_1)) @@ -127,20 +193,14 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {8, 0, 2}; if (Features.test(FeatureISAVersion8_0_3)) return {8, 0, 3}; - if (Features.test(FeatureISAVersion8_0_4)) - return {8, 0, 4}; if (Features.test(FeatureISAVersion8_1_0)) return {8, 1, 0}; - // GFX9. + // GCN GFX9. if (Features.test(FeatureISAVersion9_0_0)) return {9, 0, 0}; - if (Features.test(FeatureISAVersion9_0_1)) - return {9, 0, 1}; if (Features.test(FeatureISAVersion9_0_2)) return {9, 0, 2}; - if (Features.test(FeatureISAVersion9_0_3)) - return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -538,6 +598,10 @@ bool isEntryFunctionCC(CallingConv::ID CC) { } } +bool hasXNACK(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; +} + bool isSI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; } @@ -572,44 +636,72 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { return false; } -unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { +#define MAP_REG2REG \ + using namespace AMDGPU; \ + switch(Reg) { \ + default: return Reg; \ + CASE_CI_VI(FLAT_SCR) \ + CASE_CI_VI(FLAT_SCR_LO) \ + CASE_CI_VI(FLAT_SCR_HI) \ + CASE_VI_GFX9(TTMP0) \ + CASE_VI_GFX9(TTMP1) \ + CASE_VI_GFX9(TTMP2) \ + CASE_VI_GFX9(TTMP3) \ + CASE_VI_GFX9(TTMP4) \ + CASE_VI_GFX9(TTMP5) \ + CASE_VI_GFX9(TTMP6) \ + CASE_VI_GFX9(TTMP7) \ + CASE_VI_GFX9(TTMP8) \ + CASE_VI_GFX9(TTMP9) \ + CASE_VI_GFX9(TTMP10) \ + CASE_VI_GFX9(TTMP11) \ + CASE_VI_GFX9(TTMP12) \ + CASE_VI_GFX9(TTMP13) \ + CASE_VI_GFX9(TTMP14) \ + CASE_VI_GFX9(TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1) \ + CASE_VI_GFX9(TTMP2_TTMP3) \ + CASE_VI_GFX9(TTMP4_TTMP5) \ + CASE_VI_GFX9(TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP8_TTMP9) \ + CASE_VI_GFX9(TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP12_TTMP13) \ + CASE_VI_GFX9(TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \ + CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ + CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ + CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ + } - switch(Reg) { - default: break; - case AMDGPU::FLAT_SCR: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; +#define CASE_CI_VI(node) \ + assert(!isSI(STI)); \ + case node: return isCI(STI) ? node##_ci : node##_vi; - case AMDGPU::FLAT_SCR_LO: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; +#define CASE_VI_GFX9(node) \ + case node: return isGFX9(STI) ? node##_gfx9 : node##_vi; - case AMDGPU::FLAT_SCR_HI: - assert(!isSI(STI)); - return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; - } - return Reg; +unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { + MAP_REG2REG } -unsigned mc2PseudoReg(unsigned Reg) { - switch (Reg) { - case AMDGPU::FLAT_SCR_ci: - case AMDGPU::FLAT_SCR_vi: - return FLAT_SCR; - - case AMDGPU::FLAT_SCR_LO_ci: - case AMDGPU::FLAT_SCR_LO_vi: - return AMDGPU::FLAT_SCR_LO; +#undef CASE_CI_VI +#undef CASE_VI_GFX9 - case AMDGPU::FLAT_SCR_HI_ci: - case AMDGPU::FLAT_SCR_HI_vi: - return AMDGPU::FLAT_SCR_HI; +#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; +#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node; - default: - return Reg; - } +unsigned mc2PseudoReg(unsigned Reg) { + MAP_REG2REG } +#undef CASE_CI_VI +#undef CASE_VI_GFX9 +#undef MAP_REG2REG + bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { assert(OpNo < Desc.NumOperands); unsigned OpType = Desc.OpInfo[OpNo].OperandType; @@ -812,6 +904,7 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { return isGCN3Encoding(ST) ? isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset); } + } // end namespace AMDGPU } // end namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index c4b7779514f0..0c1d69765942 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -156,6 +156,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU); LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +int getMaskedMIMGOp(const MCInstrInfo &MII, + unsigned Opc, unsigned NewChannels); +LLVM_READONLY +int getMCOpcode(uint16_t Opcode, unsigned Gen); + void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); @@ -272,6 +278,8 @@ inline bool isKernel(CallingConv::ID CC) { } } +bool hasXNACK(const MCSubtargetInfo &STI); + bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index ff2bd2454400..29415c2f0d90 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -86,6 +86,7 @@ class VOP1_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + let Defs = ps.Defs; } class VOP1_SDWA_Pseudo pattern=[]> : diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index f870f511ba4e..09cb2bb73bf2 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -107,6 +107,7 @@ class VOP2_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + let Defs = ps.Defs; } class VOP2_SDWA_Pseudo pattern=[]> : @@ -128,15 +129,20 @@ class getVOP2Pat64 : LetDummies { multiclass VOP2Inst { + string revOp = opName, + bit GFX9Renamed = 0> { - def _e32 : VOP2_Pseudo , - Commutable_REV; + let renamedInGFX9 = GFX9Renamed in { + + def _e32 : VOP2_Pseudo , + Commutable_REV; + + def _e64 : VOP3_Pseudo .ret>, + Commutable_REV; - def _e64 : VOP3_Pseudo .ret>, - Commutable_REV; + def _sdwa : VOP2_SDWA_Pseudo ; - def _sdwa : VOP2_SDWA_Pseudo ; + } } multiclass VOP2bInst ; -defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32>; -defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32">; +defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32, null_frag, "v_add_u32", 1>; +defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; +defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; } } // End isCommutable = 1 diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index 146870e21531..f8879d6bd8f6 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -106,6 +106,7 @@ class VOPC_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + let Defs = ps.Defs; } class VOPC_SDWA_Pseudo pattern=[]> : diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index f24ff5ce8dea..520d5dd0f50f 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -149,6 +149,7 @@ class VOP3_Real : let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; + let Defs = ps.Defs; VOPProfile Pfl = ps.Pfl; } diff --git a/lib/Target/ARC/ARCBranchFinalize.cpp b/lib/Target/ARC/ARCBranchFinalize.cpp index e5b0f8f3208e..9341e7bdda41 100644 --- a/lib/Target/ARC/ARCBranchFinalize.cpp +++ b/lib/Target/ARC/ARCBranchFinalize.cpp @@ -142,7 +142,7 @@ void ARCBranchFinalize::replaceWithCmpBcc(MachineInstr *MI) const { bool ARCBranchFinalize::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Running ARC Branch Finalize on " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); std::vector Branches; bool Changed = false; unsigned MaxSize = 0; @@ -172,7 +172,7 @@ bool ARCBranchFinalize::runOnMachineFunction(MachineFunction &MF) { isInt<9>(MaxSize) ? replaceWithBRcc(P.first) : replaceWithCmpBcc(P.first); } - DEBUG(dbgs() << "Estimated function size for " << MF.getFunction()->getName() + DEBUG(dbgs() << "Estimated function size for " << MF.getName() << ": " << MaxSize << "\n"); return Changed; diff --git a/lib/Target/ARC/ARCFrameLowering.cpp b/lib/Target/ARC/ARCFrameLowering.cpp index 2976798eedf6..195a781950be 100644 --- a/lib/Target/ARC/ARCFrameLowering.cpp +++ b/lib/Target/ARC/ARCFrameLowering.cpp @@ -88,7 +88,7 @@ determineLastCalleeSave(const std::vector &CSI) { void ARCFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { - DEBUG(dbgs() << "Determine Callee Saves: " << MF.getFunction()->getName() + DEBUG(dbgs() << "Determine Callee Saves: " << MF.getName() << "\n"); TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); SavedRegs.set(ARC::BLINK); @@ -115,7 +115,7 @@ void ARCFrameLowering::adjustStackToMatchRecords( /// registers onto the stack, when enough callee saved registers are required. void ARCFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - DEBUG(dbgs() << "Emit Prologue: " << MF.getFunction()->getName() << "\n"); + DEBUG(dbgs() << "Emit Prologue: " << MF.getName() << "\n"); auto *AFI = MF.getInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); @@ -131,7 +131,7 @@ void ARCFrameLowering::emitPrologue(MachineFunction &MF, unsigned StackSlotsUsedByFunclet = 0; bool SavedBlink = false; unsigned AlreadyAdjusted = 0; - if (MF.getFunction()->isVarArg()) { + if (MF.getFunction().isVarArg()) { // Add in the varargs area here first. DEBUG(dbgs() << "Varargs\n"); unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex()); @@ -235,7 +235,7 @@ void ARCFrameLowering::emitPrologue(MachineFunction &MF, /// registers onto the stack, when enough callee saved registers are required. void ARCFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - DEBUG(dbgs() << "Emit Epilogue: " << MF.getFunction()->getName() << "\n"); + DEBUG(dbgs() << "Emit Epilogue: " << MF.getName() << "\n"); auto *AFI = MF.getInfo(); const ARCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -302,7 +302,7 @@ void ARCFrameLowering::emitEpilogue(MachineFunction &MF, } // Relieve the varargs area if necessary. - if (MF.getFunction()->isVarArg()) { + if (MF.getFunction().isVarArg()) { // Add in the varargs area here first. DEBUG(dbgs() << "Varargs\n"); unsigned VarArgsBytes = MFI.getObjectSize(AFI->getVarArgsFrameIndex()); @@ -383,7 +383,7 @@ bool ARCFrameLowering::spillCalleeSavedRegisters( const std::vector &CSI, const TargetRegisterInfo *TRI) const { DEBUG(dbgs() << "Spill callee saved registers: " - << MBB.getParent()->getFunction()->getName() << "\n"); + << MBB.getParent()->getName() << "\n"); // There are routines for saving at least 3 registers (r13 to r15, etc.) unsigned Last = determineLastCalleeSave(CSI); if (UseSaveRestoreFunclet && Last > ARC::R14) { @@ -400,7 +400,7 @@ bool ARCFrameLowering::restoreCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector &CSI, const TargetRegisterInfo *TRI) const { DEBUG(dbgs() << "Restore callee saved registers: " - << MBB.getParent()->getFunction()->getName() << "\n"); + << MBB.getParent()->getName() << "\n"); // There are routines for saving at least 3 registers (r13 to r15, etc.) unsigned Last = determineLastCalleeSave(CSI); if (UseSaveRestoreFunclet && Last > ARC::R14) { @@ -415,7 +415,7 @@ void ARCFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); DEBUG(dbgs() << "Process function before frame finalized: " - << MF.getFunction()->getName() << "\n"); + << MF.getName() << "\n"); MachineFrameInfo &MFI = MF.getFrameInfo(); DEBUG(dbgs() << "Current stack size: " << MFI.getStackSize() << "\n"); const TargetRegisterClass *RC = &ARC::GPR32RegClass; @@ -440,8 +440,7 @@ static void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator ARCFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - DEBUG(dbgs() << "EmitCallFramePseudo: " << MF.getFunction()->getName() - << "\n"); + DEBUG(dbgs() << "EmitCallFramePseudo: " << MF.getName() << "\n"); const ARCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); diff --git a/lib/Target/ARC/ARCInstrFormats.td b/lib/Target/ARC/ARCInstrFormats.td index 94240e90a601..50edddd4ea9f 100644 --- a/lib/Target/ARC/ARCInstrFormats.td +++ b/lib/Target/ARC/ARCInstrFormats.td @@ -17,18 +17,23 @@ class Encoding64 { } // Address operands -def immU6 : Operand, PatLeaf<(imm), [{ - return isUInt<6>(N->getSExtValue()); }]> { + +class immU : Operand, PatLeaf<(imm), + "\n return isUInt<"#BSz#">(N->getSExtValue());"> { } -def immS12 : Operand, PatLeaf<(imm), [{ - return isInt<12>(N->getSExtValue()); }]> { - let DecoderMethod = "DecodeS12Operand"; +def immU6 : immU<6>; + +class immS : Operand, PatLeaf<(imm), + "\n return isInt<"#BSz#">(N->getSExtValue());"> { + let DecoderMethod = "DecodeSignedOperand<"#BSz#">"; } -def immS9 : Operand, PatLeaf<(imm), [{ - return isInt<9>(N->getSExtValue()); }]> { - let DecoderMethod = "DecodeS9Operand"; +// e.g. s3 field may encode the signed integers values -1 .. 6 +// using binary codes 111, 000, 001, 010, 011, 100, 101, and 110, respectively +class immC : Operand, PatLeaf<(imm), + "\n return isInt<"#BSz#">(N->getSExtValue());"> { + let DecoderMethod = "DecodeFromCyclicRange<"#BSz#">"; } def MEMii : Operand { @@ -36,7 +41,7 @@ def MEMii : Operand { } def MEMrs9 : Operand { - let MIOperandInfo = (ops GPR32:$B, immS9:$S9); + let MIOperandInfo = (ops GPR32:$B, immS<9>:$S9); let PrintMethod = "printMemOperandRI"; let DecoderMethod = "DecodeMEMrs9"; } @@ -47,6 +52,10 @@ def MEMrlimm : Operand { let DecoderMethod = "DecodeMEMrlimm"; } +def GPR32Reduced : Operand { + let DecoderMethod = "DecodeGBR32ShortRegister"; +} + class InstARC pattern> : Instruction, Encoding64 { @@ -153,7 +162,6 @@ class F32_BR1_BL_COND pat> : let Inst{17} = 0; } - // BRcc targets have limited 9-bit range. These are for compare and branch // in single instruction. Their targets are 2-byte aligned. They also use // a different (3-bit) set of condition codes. @@ -464,6 +472,342 @@ class F32_ST_LIMM zz, dag outs, dag ins, let DecoderMethod = "DecodeStLImmInstruction"; } +// Compact Move/Load. +// |10|9|8|7|6|5|4|3|2|1|0| +// | |h | |i|H | +class F16_COMPACT i, dag outs, dag ins, + string asmstr> : + InstARC<2, outs, ins, asmstr, []> { + + bits<5> h; + + let Inst{15-11} = 0b01000; + let Inst{7-5} = h{2-0}; + let Inst{2} = i; + let Inst{1-0} = h{4-3}; +} + +// Compact Load/Add/Sub. +class F16_LD_ADD_SUB : + InstARC<2, outs, ins, asmstr, []> { + + bits<3> b; + let Inst{15-11} = 0b01001; + let Inst{10-8} = b; +} + +class F16_LD_SUB : + F16_LD_ADD_SUB<(outs GPR32:$a), (ins GPR32:$b, GPR32:$c), + asmstr> { + + bits<3> a; + bits<3> c; + + let Inst{7-5} = c; + let Inst{4} = i; + let Inst{3} = 0; + let Inst{2-0} = a; +} + +class F16_ADD : + F16_LD_ADD_SUB<(outs GPR32:$r), (ins GPR32:$b, immU<6>:$u6), + "add_s\t$r, $b, $u6"> { + + bit r; + bits<6> u6; + + let Inst{7} = r; + let Inst{6-4} = u6{5-3}; + let Inst{3} = 1; + let Inst{2-0} = u6{2-0}; +} + +// Compact Load/Store. +class F16_LD_ST_1 : + InstARC<2, outs, ins, asmstr, []> { + + let Inst{15-11} = 0b01010; +} + +class F16_LD_ST_s11 : + F16_LD_ST_1<(outs), (ins immS<11>:$s11), asmstr> { + + bits<11> s11; + + let Inst{10-5} = s11{10-5}; + let Inst{4} = i; + let Inst{3} = 0; + let Inst{2-0} = s11{4-2}; + let s11{1-0} = 0b00; +} + +class F16_LDI_u7 : + F16_LD_ST_1<(outs GPR32:$b), (ins immU<7>:$u7), + "ldi_s\t$b, [$u7]"> { + + bits<3> b; + bits<7> u7; + + let Inst{10-8} = b; + let Inst{7-4} = u7{6-3}; + let Inst{3} = 1; + let Inst{2-0} = u7{2-0}; +} + +// Indexed Jump or Execute. +class F16_JLI_EI : + InstARC<2, (outs), (ins immU<10>:$u10), + !strconcat(asmstr, "\t$u10"), []> { + + bits<10> u10; + + let Inst{15-11} = 0b01011; + let Inst{10} = i; + let Inst{9-0} = u10; +} + +// Load/Add Register-Register. +class F16_LD_ADD_RR i, string asmstr> : + InstARC<2, (outs GPR32:$a), (ins GPR32:$b, GPR32:$c), + asmstr, []> { + + bits<3> a; + bits<3> b; + bits<3> c; + + let Inst{15-11} = 0b01100; + let Inst{10-8} = b; + let Inst{7-5} = c; + let Inst{4-3} = i; + let Inst{2-0} = a; +} + +// Load/Add GP-Relative. +class F16_GP_LD_ADD i, dag ins, string asmstr> : + InstARC<2, (outs), ins, asmstr, []> { + + let Inst{15-11} = 0b11001; + let Inst{10-9} = i; +} + +// Add/Sub/Shift Register-Immediate. +// |10|9|8|7|6|5|4|3|2|1|0| +// |b |c |i |u | +class F16_ADD_IMM i, string asmstr> : + InstARC<2, (outs GPR32:$c), (ins GPR32:$b, immU<3>:$u3), + !strconcat(asmstr, "\t$c, $b, $u3"), []> { + + bits<3> b; + bits<3> c; + bits<3> u3; + + let Inst{15-11} = 0b01101; + let Inst{10-8} = b; + let Inst{7-5} = c; + let Inst{4-3} = i; + let Inst{2-0} = u3; +} + +// Dual Register Operations. +// |10|9|8|7|6|5|4|3|2|1|0| +// |b/s |h |i |H | +class F16_OP_HREG i, dag outs, dag ins, string asmstr> : + InstARC<2, outs, ins, asmstr, []> { + + bits<3> b_s3; + bits<5> h; + + let Inst{15-11} = 0b01110; + let Inst{10-8} = b_s3; + let Inst{7-5} = h{2-0}; + let Inst{4-2} = i; + let Inst{1-0} = h{4-3}; +} + +class F16_OP_HREG30 i, dag outs, dag ins, string asmstr> : + F16_OP_HREG { + + bits<5> LImmReg = 0b11110; + let Inst{7-5} = LImmReg{2-0}; + let Inst{1-0} = LImmReg{4-3}; +} + +class F16_OP_HREG_LIMM i, dag outs, dag ins, string asmstr> : + F16_OP_HREG30 { + + bits<32> LImm; + let Inst{47-16} = LImm; + let Size = 6; +} + +// General compact DOP format. +class F16_GEN_DOP_BASE i, dag outs, dag ins, string asmstr> : + InstARC<2, outs, ins, asmstr, []> { + + bits<3> b; + bits<3> c; + let Inst{15-11} = 0b01111; + let Inst{10-8} = b; + let Inst{7-5} = c; + let Inst{4-0} = i; +} + +class F16_GEN_DOP i, string asmstr> : + F16_GEN_DOP_BASE; + +class F16_GEN_DOP_NODST i, string asmstr> : + F16_GEN_DOP_BASE; + +class F16_GEN_DOP_SINGLESRC i, string asmstr> : + F16_GEN_DOP_BASE; + +class F16_GEN_SOP_BASE i, dag outs, dag ins, string asmstr> : + F16_GEN_DOP_BASE<0b00000, outs, ins, asmstr> { + + let c = i; +} + +class F16_GEN_SOP i, string asmstr> : + F16_GEN_SOP_BASE; + +class F16_GEN_ZOP i, string asmstr> : + F16_GEN_SOP_BASE<0b111, (outs), (ins), asmstr> { + + let b = i; +} + +// Compact Load/Store with Offset Format. +class F16_LD_ST_OFF opc, dag outs, dag ins, string asmstr> : + InstARC<2, outs, ins, !strconcat(asmstr, "\t$c, [$b, $off]"), []> { + + bits<3> b; + bits<3> c; + let Inst{15-11} = opc; + let Inst{10-8} = b; + let Inst{7-5} = c; +} + +class F16_LD_ST_WORD_OFF opc, dag outs, dag ins, string asmstr> : + F16_LD_ST_OFF { + + bits<7> off; + let Inst{4-0} = off{6-2}; + let off{1-0} = 0b00; +} + +class F16_LD_ST_HALF_OFF opc, dag outs, dag ins, string asmstr> : + F16_LD_ST_OFF { + + bits<6> off; + let Inst{4-0} = off{5-1}; + let off{0} = 0b0; +} + +class F16_LD_ST_BYTE_OFF opc, dag outs, dag ins, string asmstr> : + F16_LD_ST_OFF { + + bits<5> off; + let Inst{4-0} = off; +} + +// Shift/Subtract/Bit Immediate. +// |10|9|8|7|6|5|4|3|2|1|0| +// |b |i |u | +class F16_SH_SUB_BIT i, string asmstr> : + InstARC<2, (outs), (ins GPR32:$b, immU<5>:$u5), asmstr, []> { + + bits<3> b; + bits<5> u5; + + let Inst{15-11} = 0b10111; + let Inst{10-8} = b; + let Inst{7-5} = i; + let Inst{4-0} = u5; +} + +class F16_SH_SUB_BIT_DST i, string asmstr> : + F16_SH_SUB_BIT; + +// 16-bit stack-based operations. +// |10|9|8|7|6|5|4|3|2|1|0| +// |b |i |u | +class F16_SP_OPS i, + dag outs, dag ins, string asmstr> : + InstARC<2, outs, ins, asmstr, []> { + + bits<3> fieldB; + bits<5> fieldU; + + let Inst{15-11} = 0b11000; + let Inst{10-8} = fieldB; + let Inst{7-5} = i; + let Inst{4-0} = fieldU; +} + +class F16_SP_OPS_u7_aligned i, + dag outs, dag ins, string asmstr> : + F16_SP_OPS { + + bits<3> b3; + bits<7> u7; + + let fieldB = b3; + let fieldU = u7{6-2}; + let u7{1-0} = 0b00; +} + +class F16_SP_OPS_bconst b, string asmop> : + F16_SP_OPS_u7_aligned<0b101, + (outs), (ins immU<7>:$u7), + !strconcat(asmop, "\t%sp, %sp, $u7")> { + + let fieldB = b; +} + +class F16_SP_OPS_uconst i, + dag outs, dag ins, string asmop> : + F16_SP_OPS_u7_aligned { + + let fieldU = 0b00001; +} + +class F16_SP_OPS_buconst i, string asmop> : + F16_SP_OPS_u7_aligned { + + let fieldB = 0x000; + let fieldU = 0b10001; +} + +class F16_SP_LD i, string asmop> : F16_SP_OPS_u7_aligned:$u7), + !strconcat(asmop, "\t$b3, [%sp, $u7]")>; + +class F16_SP_ST i, string asmop> : F16_SP_OPS_u7_aligned:$u7), + !strconcat(asmop, "\t$b3, [%sp, $u7]")>; + +// Compact MOV/ADD/CMP Immediate Format. +class F16_OP_IMM opc, dag outs, dag ins, string asmstr> : + InstARC<2, outs, ins, asmstr, []> { + + bits<3> b; + let Inst{15-11} = opc; + let Inst{10-8} = b; +} + +class F16_OP_U7 : + F16_OP_IMM<0b11100, (outs GPR32:$b), (ins immU<7>:$u7), asmstr> { + + bits<7> u7; + let Inst{7} = i; + let Inst{6-0} = u7; +} + // Special types for different instruction operands. def cmovpred : Operand, PredicateOp, ComplexPattern { @@ -481,28 +825,67 @@ def brccond : Operand { let PrintMethod = "printBRCCPredicateOperand"; } -// Branch targets of different offset sizes. -def btarget : Operand { +// Branch/call targets of different offset sizes. +class BCTarget : Operand { let OperandType = "OPERAND_PCREL"; } -def btargetS9 : Operand { - let OperandType = "OPERAND_PCREL"; - let DecoderMethod = "DecodeBranchTargetS9"; +def btarget : BCTarget; + +class BCTargetSigned : BCTarget { + let DecoderMethod = "DecodeBranchTargetS<"#BSz#">"; } -def btargetS21 : Operand { - let OperandType = "OPERAND_PCREL"; - let DecoderMethod = "DecodeBranchTargetS21"; +class BranchTargetS : BCTargetSigned; +def btargetS7 : BranchTargetS<7>; +def btargetS8 : BranchTargetS<8>; +def btargetS9 : BranchTargetS<9>; +def btargetS10 : BranchTargetS<10>; +def btargetS13 : BranchTargetS<13>; +def btargetS21 : BranchTargetS<21>; +def btargetS25 : BranchTargetS<25>; + +class CallTargetS : BCTargetSigned; +def calltargetS25: CallTargetS<25>; + +// Compact Branch on Compare Register with Zero. +class F16_BCC_REG : + InstARC<2, (outs), (ins GPR32:$b, btargetS8:$s8), + !strconcat(asmstr, "\t$b, 0, $s8"), []> { + + bits<3> b; + bits<8> s8; + + let Inst{15-11} = 0b11101; + let Inst{10-8} = b; + let Inst{7} = i; + let Inst{6-0} = s8{7-1}; + let s8{0} = 0b0; } -def btargetS25 : Operand { - let OperandType = "OPERAND_PCREL"; - let DecoderMethod = "DecodeBranchTargetS25"; +// Compact Branch Conditionally Format. +class F16_BCC i, dag ins, string asmstr> : + InstARC<2, (outs), ins, asmstr, []> { + + let Inst{15-11} = 0b11110; + let Inst{10-9} = i; } -def calltargetS25: Operand { - let OperandType = "OPERAND_PCREL"; - let DecoderMethod = "DecodeBranchTargetS25"; +class F16_BCC_s10 i, string asmstr> : + F16_BCC { + + bits<10> s; + let Inst{8-0} = s{9-1}; + let s{0} = 0b0; } +class F16_BCC_s7 i, string asmstr> : + F16_BCC<0b11, (ins btargetS7:$s), + !strconcat(asmstr, "\t$s")> { + + bits<7> s; + let Inst{8-6} = i; + let Inst{5-0} = s{6-1}; + let s{0} = 0b0; +} diff --git a/lib/Target/ARC/ARCInstrInfo.cpp b/lib/Target/ARC/ARCInstrInfo.cpp index a299e32c03a0..4a95fced446b 100644 --- a/lib/Target/ARC/ARCInstrInfo.cpp +++ b/lib/Target/ARC/ARCInstrInfo.cpp @@ -103,6 +103,10 @@ static ARCCC::CondCode GetOppositeBranchCondition(ARCCC::CondCode CC) { return ARCCC::LE; case ARCCC::GE: return ARCCC::LT; + case ARCCC::VS: + return ARCCC::VC; + case ARCCC::VC: + return ARCCC::VS; case ARCCC::LT: return ARCCC::GE; case ARCCC::LE: diff --git a/lib/Target/ARC/ARCInstrInfo.td b/lib/Target/ARC/ARCInstrInfo.td index 79ab42fcef32..edd853fe150d 100644 --- a/lib/Target/ARC/ARCInstrInfo.td +++ b/lib/Target/ARC/ARCInstrInfo.td @@ -117,7 +117,7 @@ def STB_FAR : PseudoInstARC<(outs), (ins GPR32:$dst, MEMrlimm:$addr), // multiclass. These classes do not contain Selection DAG patterns. //===----------------------------------------------------------------------===// -// Generic 3 operand binary instructions (i.e., add, r0, r1, r2). +// Generic 3 operand binary instructions (i.e., add r0, r1, r2). multiclass ArcBinaryInst major, bits<6> mincode, string opasm> { // 3 register variant. @@ -140,7 +140,7 @@ multiclass ArcBinaryInst major, bits<6> mincode, // 2 matched-register with signed 12-bit immediate variant (add r0, r0, -1). def _rrs12 : F32_DOP_RS12:$S12), !strconcat(opasm, "\t$B, $in, $S12"), []> { let Constraints = "$B = $in"; } @@ -194,6 +194,9 @@ multiclass MultiPat; defm SUB : ArcBinaryGEN4Inst<0b000010, "sub">; +defm SUB1 : ArcBinaryGEN4Inst<0b010111, "sub1">; +defm SUB2 : ArcBinaryGEN4Inst<0b011000, "sub2">; +defm SUB3 : ArcBinaryGEN4Inst<0b011001, "sub3">; defm OR : ArcBinaryGEN4Inst<0b000101, "or">; defm AND : ArcBinaryGEN4Inst<0b000100, "and">; defm XOR : ArcBinaryGEN4Inst<0b000111, "xor">; @@ -206,6 +209,7 @@ defm ROR : ArcBinaryEXT5Inst<0b000011, "ror">; defm MPY : ArcBinaryGEN4Inst<0b011010, "mpy">; defm MPYM : ArcBinaryGEN4Inst<0b011011, "mpym">; defm MPYMU : ArcBinaryGEN4Inst<0b011100, "mpymu">; +defm SETEQ : ArcBinaryGEN4Inst<0b111000, "seteq">; // Patterns for 3 operand binary instructions. defm : MultiPat; @@ -223,7 +227,6 @@ defm : MultiPat; defm : MultiPat; defm : MultiPat; - // --------------------------------------------------------------------------- // Unary Instruction definitions. // --------------------------------------------------------------------------- @@ -248,9 +251,9 @@ defm : MultiPat; // --------------------------------------------------------------------------- let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def MOV_rs12 : F32_DOP_RS12<0b00100, 0b001010, 0, - (outs GPR32:$B), (ins immS12:$S12), + (outs GPR32:$B), (ins immS<12>:$S12), "mov\t$B, $S12", - [(set GPR32:$B, immS12:$S12)]>; + [(set GPR32:$B, immS<12>:$S12)]>; } def MOV_rr : F32_DOP_RR<0b00100, 0b001010, 0, @@ -288,96 +291,463 @@ def : Pat<(ARCGAWrapper tjumptable:$addr), // --------------------------------------------------------------------------- // Branch instructions -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -// Unconditional branch. -def BR : F32_BR0_UCOND_FAR<(outs), (ins btargetS25:$S25), - "b\t$S25", [(br bb:$S25)]>; - -let Uses=[STATUS32] in { -// Conditional branch. -def Bcc : F32_BR0_COND<(outs), (ins btargetS21:$S21, ccond:$cc), - "b$cc\t$S21", []>; -} - -// Compare and branch (limited range). -def BRcc_rr : F32_BR1_BCC<(outs), - (ins btargetS9:$S9, GPR32:$B, GPR32:$C, brccond:$cc), - "br$cc\t$B, $C, $S9", 0, []>; -def BRcc_ru6 : F32_BR1_BCC<(outs), - (ins btargetS9:$S9, GPR32:$B, immU6:$C, brccond:$cc), - "br$cc\t$B, $C, $S9", 1, []>; - -// Pseudo compare and branch. -// After register allocation, this can expand into either a limited range -// Compare and branch (BRcc), or into CMP + Bcc. -// At worst, this expands into 2 4-byte instructions. -def BRcc_rr_p : PseudoInstARC<(outs), - (ins btarget:$T, GPR32:$B, GPR32:$C, ccond:$cc), - "pbr$cc\t$B, $C, $T", - [(ARCbrcc bb:$T, i32:$B, i32:$C, imm32:$cc)]> - { let Size = 8; } - -def BRcc_ru6_p : PseudoInstARC<(outs), - (ins btarget:$T, GPR32:$B, i32imm:$C, ccond:$cc), - "pbr$cc\t$B, $C, $T", - [(ARCbrcc bb:$T, i32:$B, immU6:$C, imm32:$cc)]> - { let Size = 8; } -} +let isBranch = 1, isTerminator = 1 in { + + // Unconditional branch. + let isBarrier = 1 in + def BR : F32_BR0_UCOND_FAR<(outs), (ins btargetS25:$S25), + "b\t$S25", [(br bb:$S25)]>; + + let Uses=[STATUS32] in + // Conditional branch. + def Bcc : F32_BR0_COND<(outs), (ins btargetS21:$S21, ccond:$cc), + "b$cc\t$S21", []>; + + // Compare and branch (limited range). + def BRcc_rr : F32_BR1_BCC<(outs), + (ins btargetS9:$S9, GPR32:$B, GPR32:$C, brccond:$cc), + "br$cc\t$B, $C, $S9", 0, []>; + def BRcc_ru6 : F32_BR1_BCC<(outs), + (ins btargetS9:$S9, GPR32:$B, immU6:$C, brccond:$cc), + "br$cc\t$B, $C, $S9", 1, []>; + + // Pseudo compare and branch. + // After register allocation, this can expand into either a limited range + // Compare and branch (BRcc), or into CMP + Bcc. + // At worst, this expands into 2 4-byte instructions. + def BRcc_rr_p : PseudoInstARC<(outs), + (ins btarget:$T, GPR32:$B, GPR32:$C, ccond:$cc), + "pbr$cc\t$B, $C, $T", + [(ARCbrcc bb:$T, i32:$B, i32:$C, imm32:$cc)]> + { let Size = 8; } + + def BRcc_ru6_p : PseudoInstARC<(outs), + (ins btarget:$T, GPR32:$B, i32imm:$C, ccond:$cc), + "pbr$cc\t$B, $C, $T", + [(ARCbrcc bb:$T, i32:$B, immU6:$C, imm32:$cc)]> + { let Size = 8; } +} // let isBranch, isTerminator // Indirect, unconditional Jump. -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in def J : F32_DOP_RR<0b00100, 0b100000, 0, (outs), (ins GPR32:$C), "j\t[$C]", [(brind i32:$C)]>; -} // Call instructions. -let isCall = 1, Defs = [BLINK], Uses = [SP] in { -// Direct unconditional call. -def BL : F32_BR1_BL_UCOND_FAR<(outs), (ins calltargetS25:$S25), - "bl\t$S25", [(ARCBranchLink tglobaladdr:$S25)]>; - -// Indirect unconditional call. -let isIndirectBranch = 1, Defs = [BLINK], Uses = [SP] in { -def JL : F32_DOP_RR<0b00100, 0b100010, 0, (outs), (ins GPR32:$C), - "jl\t[$C]", [(ARCJumpLink i32:$C)]>; -} -} +let isCall = 1, isBarrier = 1, Defs = [BLINK], Uses = [SP] in { + // Direct unconditional call. + def BL : F32_BR1_BL_UCOND_FAR<(outs), (ins calltargetS25:$S25), + "bl\t$S25", [(ARCBranchLink tglobaladdr:$S25)]>; + + // Indirect unconditional call. + let isIndirectBranch = 1 in + def JL : F32_DOP_RR<0b00100, 0b100010, 0, (outs), (ins GPR32:$C), + "jl\t[$C]", [(ARCJumpLink i32:$C)]>; +} // let isCall, isBarrier, Defs, Uses // Pattern to generate BL instruction. def : Pat<(ARCBranchLink texternalsym:$dst), (BL texternalsym:$dst)>; // Return from call. -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { +let isReturn = 1, isTerminator = 1, isBarrier = 1 in // This is a specialized 2-byte instruction that doesn't generalize // to any larger 2-byte class, so go ahead and define it here. def J_S_BLINK : InstARC<2, (outs), (ins), "j_s\t[%blink]", [(ret)]> { let Inst{15-0} = 0b0111111011100000; } -} //---------------------------------------------------------------------------- -// Load/Store instructions. +// Compact stack-based operations. //---------------------------------------------------------------------------- // 2-byte push/pop blink instructions commonly used for prolog/epilog // generation. These 2 instructions are actually specialized 2-byte // format instructions that aren't generalized to a larger 2-byte // class, so we might as well have them here. -let Uses = [BLINK], Defs = [SP] in { -def PUSH_S_BLINK : InstARC<2, (outs), (ins), - "push_s\t%blink", []> { - let Inst{15-0} = 0b1100000011110001; +let Uses = [BLINK], Defs = [SP] in +def PUSH_S_BLINK : F16_SP_OPS_buconst<0b111, "push_s">; + +let Defs = [BLINK, SP] in +def POP_S_BLINK : F16_SP_OPS_buconst<0b110, "pop_s">; + +def PUSH_S_r : F16_SP_OPS_uconst<0b110, + (outs), (ins GPR32Reduced:$b3), "push_s">; +def POP_S_r : F16_SP_OPS_uconst<0b111, + (outs GPR32Reduced:$b3), (ins), "pop_s">; + +def SP_SUB_SP_S : F16_SP_OPS_bconst<0b001, "sub_s">; +def SP_ADD_SP_S : F16_SP_OPS_bconst<0b000, "add_s">; +def SP_ADD_S : F16_SP_OPS_u7_aligned<0b100, + (outs GPR32Reduced:$b3), (ins immU<7>:$u7), + "add_s\t$b3, %sp, $u7">; + +def SP_LD_S : F16_SP_LD<0b000, "ld_s">; +def SP_LDB_S : F16_SP_LD<0b001, "ldb_s">; +def SP_ST_S : F16_SP_ST<0b010, "st_s">; +def SP_STB_S : F16_SP_ST<0b011, "stb_s">; + +def LEAVE_S : F16_SP_OPS<0b110, + (outs), (ins immU<7>:$u7), "leave_s\t$u7"> { + + bits<7> u7; + + let fieldB = u7{6-4}; + let fieldU{4-1} = u7{3-0}; + let fieldU{0} = 0b0; +} + +def ENTER_S : F16_SP_OPS<0b111, + (outs), (ins immU<6>:$u6), "enter_s\t$u6"> { + + bits<6> u6; + + let fieldB{2} = 0; + let fieldB{1-0} = u6{5-4}; + let fieldU{4-1} = u6{3-0}; + let fieldU{0} = 0b0; +} + +//---------------------------------------------------------------------------- +// Compact Move/Load instructions. +//---------------------------------------------------------------------------- +class COMPACT_MOV_S : + F16_COMPACT<0b0, (outs GPR32:$g), (ins GPR32:$h), + "mov_s\t$g, $h"> { + let DecoderMethod = "DecodeMoveHRegInstruction"; +} + +def COMPACT_MOV_S_limm : COMPACT_MOV_S { + bits<32> LImm; + let Inst{47-16} = LImm; + + bits<5> LImmReg = 0b11110; + let Inst{7-5} = LImmReg{2-0}; + let Inst{1-0} = LImmReg{4-3}; + + let Size = 6; +} + +def COMPACT_MOV_S_hreg : COMPACT_MOV_S; + +def COMPACT_LD_S : + F16_COMPACT<0b1, (outs GPR32:$r), (ins GPR32:$h, immU<5>:$u5), + "ld_s\t$r, [$h, $u5]"> { + bits<5> u5; + bits<2> r; + + let Inst{10} = u5{4}; + let Inst{9-8} = r; + let Inst{4-3} = u5{3-2}; + let u5{1-0} = 0b00; +} + +//---------------------------------------------------------------------------- +// Compact Load/Add/Sub. +//---------------------------------------------------------------------------- +def LD_S_AS_rrr : F16_LD_SUB<0b0, "ld_s.as\t$a, [$b, $c]">; +def SUB_S_rrr : F16_LD_SUB<0b1, "sub_s\t$a, $b, $c">; +def ADD_S_rru6 : F16_ADD; + +//---------------------------------------------------------------------------- +// Compact Load/Store. +//---------------------------------------------------------------------------- +def LD_S_s11 : F16_LD_ST_s11<0b0, "ld_s\t%r1, [%gp, $s11]">; +def ST_S_s11 : F16_LD_ST_s11<0b1, "st_s\t%r0, [%gp, $s11]">; +def LDI_S_u7 : F16_LDI_u7; + +//---------------------------------------------------------------------------- +// Indexed Jump or Execute. +//---------------------------------------------------------------------------- +def JLI_S : F16_JLI_EI<0, "jli_s">; +def EI_S : F16_JLI_EI<1, "ei_s">; + +//---------------------------------------------------------------------------- +// Load/Add Register-Register. +//---------------------------------------------------------------------------- +def LD_S_rrr : F16_LD_ADD_RR<0b00, "ld_s\t$a, [$b, $c]">; +def LDB_S_rrr : F16_LD_ADD_RR<0b01, "ldb_s\t$a, [$b, $c]">; +def LDH_S_rrr : F16_LD_ADD_RR<0b10, "ldh_s\t$a, [$b, $c]">; +def ADD_S_rrr : F16_LD_ADD_RR<0b11, "add_s\t$a, $b, $c">; + +//---------------------------------------------------------------------------- +// Load/Add GP-Relative. +//---------------------------------------------------------------------------- +def GP_LD_S : F16_GP_LD_ADD<0b00, (ins immS<11>:$s), + "ld_s\t%r0, [%gp, $s]"> { + + bits<11> s; + let Inst{8-0} = s{10-2}; + let s{1-0} = 0b00; +} + +def GP_LDB_S : F16_GP_LD_ADD<0b01, (ins immS<9>:$s), + "ldb_s\t%r0, [%gp, $s]"> { + + bits<9> s; + let Inst{8-0} = s{8-0}; +} + +def GP_LDH_S : F16_GP_LD_ADD<0b10, (ins immS<10>:$s), + "ldh_s\t%r0, [%gp, $s]"> { + + bits<10> s; + let Inst{8-0} = s{9-1}; + let s{0} = 0b0; +} + +def GP_ADD_S : F16_GP_LD_ADD<0b11, (ins immS<11>:$s), + "add_s\t%r0, %gp, $s"> { + + bits<11> s; + let Inst{8-0} = s{10-2}; + let s{1-0} = 0b00; +} + +//---------------------------------------------------------------------------- +// Load PCL-Relative. +//---------------------------------------------------------------------------- +def PCL_LD : InstARC<2, (outs GPR32:$b), (ins immU<10>:$u10), + "ld_s\t$b, [%pcl, $u10]", []> { + + bits<3> b; + bits<10> u10; + + let Inst{15-11} = 0b11010; + let Inst{10-8} = b; + let Inst{7-0} = u10{9-2}; + let u10{1-0} = 0b00; +} + +let isBranch = 1 in { + //---------------------------------------------------------------------------- + // Branch on Compare Register with Zero. + //---------------------------------------------------------------------------- + def BREQ_S : F16_BCC_REG<0b0, "breq_s">; + def BRNE_S : F16_BCC_REG<0b1, "brne_s">; + + //---------------------------------------------------------------------------- + // Branch Conditionally. + //---------------------------------------------------------------------------- + let isBarrier = 1 in + def B_S : F16_BCC_s10<0b00, "b_s">; + + def BEQ_S : F16_BCC_s10<0b01, "beq_s">; + def BNE_S : F16_BCC_s10<0b10, "bne_s">; + def BGT_S : F16_BCC_s7<0b000, "bgt_s">; + def BGE_S : F16_BCC_s7<0b001, "bge_s">; + def BLT_S : F16_BCC_s7<0b010, "blt_s">; + def BLE_S : F16_BCC_s7<0b011, "ble_s">; + def BHI_S : F16_BCC_s7<0b100, "bhi_s">; + def BHS_S : F16_BCC_s7<0b101, "bhs_s">; + def BLO_S : F16_BCC_s7<0b110, "blo_s">; + def BLS_S : F16_BCC_s7<0b111, "bls_s">; +} // let isBranch + +def BL_S : + InstARC<2, (outs), (ins btargetS13:$s13), "bl_s\t$s13", []> { + + let Inst{15-11} = 0b11111; + + bits<13> s13; + let Inst{10-0} = s13{12-2}; + let s13{1-0} = 0b00; + + let isCall = 1; + let isBarrier = 1; } + +//---------------------------------------------------------------------------- +// Add/Sub/Shift Register-Immediate. +//---------------------------------------------------------------------------- +def ADD_S_ru3 : F16_ADD_IMM<0b00,"add_s">; +def SUB_S_ru3 : F16_ADD_IMM<0b01,"sub_s">; +def ASL_S_ru3 : F16_ADD_IMM<0b10,"asl_s">; +def ASR_S_ru3 : F16_ADD_IMM<0b11,"asr_s">; + +//---------------------------------------------------------------------------- +// Shift/Subtract/Bit Immediate. +//---------------------------------------------------------------------------- +def ASL_S_ru5 : F16_SH_SUB_BIT_DST<0b000,"asl_s">; +def LSR_S_ru5 : F16_SH_SUB_BIT_DST<0b001,"lsr_s">; +def ASR_S_ru5 : F16_SH_SUB_BIT_DST<0b010,"asr_s">; +def SUB_S_ru5 : F16_SH_SUB_BIT_DST<0b011,"sub_s">; +def BSET_S_ru5 : F16_SH_SUB_BIT_DST<0b100,"bset_s">; +def BCLR_S_ru5 : F16_SH_SUB_BIT_DST<0b101,"bclr_s">; +def BMSK_S_ru5 : F16_SH_SUB_BIT_DST<0b110,"bmsk_s">; +def BTST_S_ru5 : F16_SH_SUB_BIT<0b111, "btst_s\t$b, $u5">; + +//---------------------------------------------------------------------------- +// Dual Register Operations. +//---------------------------------------------------------------------------- +def ADD_S_rlimm : + F16_OP_HREG_LIMM<0b000, (outs GPR32:$b_s3), (ins i32imm:$LImm), + !strconcat("add_s", "\t$b_s3, $b_s3, $LImm")>; + +def ADD_S_rr : + F16_OP_HREG<0b000, (outs GPR32:$b_s3), (ins GPR32:$h), + !strconcat("add_s", "\t$b_s3, $b_s3, $h")>; + +def ADD_S_rs3 : + F16_OP_HREG<0b001, (outs GPR32:$h), (ins immC<3>:$b_s3), + !strconcat("add_s", "\t$h, $h, $b_s3")>; + +def ADD_S_limms3 : + F16_OP_HREG_LIMM<0b001, (outs), (ins immC<3>:$b_s3, i32imm:$LImm), + !strconcat("add_s", "\t0, $LImm, $b_s3")>; + +def MOV_S_NE_rlimm : + F16_OP_HREG_LIMM<0b111, (outs GPR32:$b_s3), (ins i32imm:$LImm), + !strconcat("mov_s.ne", "\t$b_s3, $LImm")>; + +def MOV_S_NE_rr : + F16_OP_HREG<0b111,(outs GPR32:$b_s3), (ins GPR32:$h), + !strconcat("mov_s.ne", "\t$b_s3, $h")>; + +def MOV_S_rs3 : + F16_OP_HREG<0b011, (outs GPR32:$h), (ins immC<3>:$b_s3), + !strconcat("mov_s", "\t$h, $b_s3")>; + +def MOV_S_s3 : + F16_OP_HREG30<0b011, (outs), (ins immC<3>:$b_s3), + !strconcat("mov_s", "\t0, $b_s3")>; + +def CMP_S_rlimm : + F16_OP_HREG_LIMM<0b100, (outs GPR32:$b_s3), (ins i32imm:$LImm), + !strconcat("cmp_s", "\t$b_s3, $LImm")>; + +def CMP_S_rr : + F16_OP_HREG<0b100, (outs GPR32:$b_s3), (ins GPR32:$h), + !strconcat("cmp_s", "\t$b_s3, $h")>; + +def CMP_S_rs3 : + F16_OP_HREG<0b101, (outs GPR32:$h), (ins immC<3>:$b_s3), + !strconcat("cmp_s", "\t$h, $b_s3")>; + +def CMP_S_limms3 : + F16_OP_HREG_LIMM<0b101, (outs), (ins immC<3>:$b_s3, i32imm:$LImm), + !strconcat("cmp_s", "\t$LImm, $b_s3")>; + +//---------------------------------------------------------------------------- +// Compact MOV/ADD/CMP Immediate instructions. +//---------------------------------------------------------------------------- +def MOV_S_u8 : + F16_OP_IMM<0b11011, (outs GPR32:$b), (ins immU<8>:$u8), + !strconcat("mov_s", "\t$b, $u8")> { + bits<8> u8; + let Inst{7-0} = u8; } -let Defs = [BLINK, SP] in { -def POP_S_BLINK : InstARC<2, (outs), (ins), - "pop_s\t%blink", []> { - let Inst{15-0} = 0b1100000011010001; +def ADD_S_u7 : + F16_OP_U7<0b0, !strconcat("add_s", "\t$b, $b, $u7")>; + +def CMP_S_u7 : + F16_OP_U7<0b1, !strconcat("cmp_s", "\t$b, $u7")>; + +//---------------------------------------------------------------------------- +// Compact Load/Store instructions with offset. +//---------------------------------------------------------------------------- +def LD_S_OFF : + F16_LD_ST_WORD_OFF<0x10, (outs GPR32:$c), (ins GPR32:$b, immU<7>:$off), + "ld_s">; + +def LDB_S_OFF : + F16_LD_ST_BYTE_OFF<0x11, (outs GPR32:$c), (ins GPR32:$b, immU<5>:$off), + "ldb_s">; + +class F16_LDH_OFF opc, string asmstr> : + F16_LD_ST_HALF_OFF:$off), + asmstr>; + +def LDH_S_OFF : F16_LDH_OFF<0x12, "ldh_s">; +def LDH_S_X_OFF : F16_LDH_OFF<0x13, "ldh_s.x">; + +def ST_S_OFF : + F16_LD_ST_WORD_OFF<0x14, (outs), (ins GPR32:$c, GPR32:$b, immU<7>:$off), + "st_s">; + +def STB_S_OFF : + F16_LD_ST_BYTE_OFF<0x15, (outs), (ins GPR32:$c, GPR32:$b, immU<5>:$off), + "stb_s">; + +def STH_S_OFF : + F16_LD_ST_HALF_OFF<0x16, (outs), (ins GPR32:$c, GPR32:$b, immU<6>:$off), + "sth_s">; + +//---------------------------------------------------------------------------- +// General compact instructions. +//---------------------------------------------------------------------------- +def GEN_SUB_S : F16_GEN_DOP<0x02, "sub_s">; +def GEN_AND_S : F16_GEN_DOP<0x04, "and_s">; +def GEN_OR_S : F16_GEN_DOP<0x05, "or_s">; +def GEN_BIC_S : F16_GEN_DOP<0x06, "bic_s">; +def GEN_XOR_S : F16_GEN_DOP<0x07, "xor_s">; +def GEN_MPYW_S : F16_GEN_DOP<0x09, "mpyw_s">; +def GEN_MPYUW_S : F16_GEN_DOP<0x0a, "mpyuw_s">; +def GEN_TST_S : F16_GEN_DOP_NODST<0x0b, "tst_s">; +def GEN_MPY_S : F16_GEN_DOP<0x0c, "mpy_s">; +def GEN_SEXB_S : F16_GEN_DOP_SINGLESRC<0x0d, "sexb_s">; +def GEN_SEXH_S : F16_GEN_DOP_SINGLESRC<0x0e, "sexh_s">; +def GEN_EXTB_S : F16_GEN_DOP_SINGLESRC<0x0f, "extb_s">; +def GEN_EXTH_S : F16_GEN_DOP_SINGLESRC<0x10, "exth_s">; +def GEN_ABS_S : F16_GEN_DOP_SINGLESRC<0x11, "abs_s">; +def GEN_NOT_S : F16_GEN_DOP_SINGLESRC<0x12, "not_s">; +def GEN_NEG_S : F16_GEN_DOP_SINGLESRC<0x13, "neg_s">; +def GEN_ADD1_S : F16_GEN_DOP<0x14, "add1_s">; +def GEN_ADD2_S : F16_GEN_DOP<0x15, "add2_s">; +def GEN_ADD3_S : F16_GEN_DOP<0x16, "add3_s">; +def GEN_ASL_S : F16_GEN_DOP<0x18, "asl_s">; +def GEN_LSR_S : F16_GEN_DOP<0x19, "lsr_s">; +def GEN_ASR_S : F16_GEN_DOP<0x1a, "asr_s">; +def GEN_AS1L_S : F16_GEN_DOP_SINGLESRC<0x1b, "asl_s">; +def GEN_AS1R_S : F16_GEN_DOP_SINGLESRC<0x1c, "asr_s">; +def GEN_LS1R_S : F16_GEN_DOP_SINGLESRC<0x1d, "lsr_s">; +def GEN_TRAP_S : F16_GEN_DOP_BASE<0x1e, (outs), (ins immU6:$u6), + "trap_s\t$u6"> { + + bits<6> u6; + let b = u6{5-3}; + let c = u6{2-0}; } + +def GEN_BRK_S : F16_GEN_DOP_BASE<0x1f, (outs), (ins), + "brk_s"> { + + let b = 0b111; + let c = 0b111; } +let isBarrier = 1 in { + let isBranch = 1 in { + def GEN_J_S : F16_GEN_SOP<0x0, "j_s\t[$b]">; + def GEN_J_S_D : F16_GEN_SOP<0x1, "j_s.d\t[$b]">; + } // let isBranch + + let isCall = 1 in { + def GEN_JL_S : F16_GEN_SOP<0x2, "jl_s\t[$b]">; + def GEN_JL_S_D : F16_GEN_SOP<0x3, "jl_s.d\t[$b]">; + } // let isCall +} // let isBarrier + +def GEN_SUB_S_NE : F16_GEN_SOP<0x6, "sub_s.ne\t$b, $b, $b">; + +def GEN_NOP_S : F16_GEN_ZOP<0x0, "nop_s">; +def GEN_UNIMP_S : F16_GEN_ZOP<0x1, "unimp_s">; +def GEN_SWI_S : F16_GEN_ZOP<0x2, "swi_s">; + +let isReturn = 1, isTerminator = 1 in { + def GEN_JEQ_S : F16_GEN_ZOP<0x4, "jeq_s\t[%blink]">; + def GEN_JNE_S : F16_GEN_ZOP<0x5, "jne_s\t[%blink]">; + let isBarrier = 1 in { + //def GEN_J_S_BLINK : F16_GEN_ZOP<0x6, "j_s\t[%blink]">; + def GEN_J_S_D_BLINK : F16_GEN_ZOP<0x7, "j_s.d\t[%blink]">; + } // let isBarrier +} // let isReturn, isTerminator + +//---------------------------------------------------------------------------- +// Load/Store instructions. +//---------------------------------------------------------------------------- + // Load instruction variants: // Control bits: x, aa, di, zz // x - sign extend. @@ -412,7 +782,7 @@ multiclass ArcLdInst zz, string asmop> { def _AB_rs9 : F32_LD_RS9<0, 0b10, 0, zz, (outs GPR32:$addrout, GPR32:$A), - (ins GPR32:$B, immS9:$S9), + (ins GPR32:$B, immS<9>:$S9), !strconcat(asmop, ".ab\t$A, [$B,$S9]"), []> { let Constraints = "$addrout = $B"; } } @@ -472,7 +842,7 @@ multiclass ArcStInst zz, string asmop> { !strconcat(asmop, "\t$C, [$addr]"), []>; def _AW_rs9 : F32_ST_RS9<0b01, 0, zz, (outs GPR32:$addrout), - (ins GPR32:$C, GPR32:$B, immS9:$S9), + (ins GPR32:$C, GPR32:$B, immS<9>:$S9), !strconcat(asmop, ".aw\t$C, [$B,$S9]"), []> { let Constraints = "$addrout = $B"; } } diff --git a/lib/Target/ARC/ARCRegisterInfo.cpp b/lib/Target/ARC/ARCRegisterInfo.cpp index 59b22c559f28..cb9f89d3499b 100644 --- a/lib/Target/ARC/ARCRegisterInfo.cpp +++ b/lib/Target/ARC/ARCRegisterInfo.cpp @@ -125,8 +125,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II, ARCRegisterInfo::ARCRegisterInfo() : ARCGenRegisterInfo(ARC::BLINK) {} bool ARCRegisterInfo::needsFrameMoves(const MachineFunction &MF) { - return MF.getMMI().hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); + return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry(); } const MCPhysReg * diff --git a/lib/Target/ARC/ARCTargetMachine.cpp b/lib/Target/ARC/ARCTargetMachine.cpp index d2512c281a61..1acae3a88870 100644 --- a/lib/Target/ARC/ARCTargetMachine.cpp +++ b/lib/Target/ARC/ARCTargetMachine.cpp @@ -88,8 +88,7 @@ extern "C" void LLVMInitializeARCTarget() { RegisterTargetMachine X(getTheARCTarget()); } -TargetIRAnalysis ARCTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(ARCTTIImpl(this, F)); - }); +TargetTransformInfo +ARCTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(ARCTTIImpl(this, F)); } diff --git a/lib/Target/ARC/ARCTargetMachine.h b/lib/Target/ARC/ARCTargetMachine.h index 98021b3dc1d5..18117e3409af 100644 --- a/lib/Target/ARC/ARCTargetMachine.h +++ b/lib/Target/ARC/ARCTargetMachine.h @@ -40,7 +40,7 @@ class ARCTargetMachine : public LLVMTargetMachine { // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp index b49658004f7a..dd181767d81a 100644 --- a/lib/Target/ARC/Disassembler/ARCDisassembler.cpp +++ b/lib/Target/ARC/Disassembler/ARCDisassembler.cpp @@ -67,6 +67,15 @@ static bool readInstruction64(ArrayRef Bytes, uint64_t Address, return true; } +static bool readInstruction48(ArrayRef Bytes, uint64_t Address, + uint64_t &Size, uint64_t &Insn) { + Size = 6; + Insn = ((uint64_t)Bytes[0] << 0) | ((uint64_t)Bytes[1] << 8) | + ((uint64_t)Bytes[2] << 32) | ((uint64_t)Bytes[3] << 40) | + ((uint64_t)Bytes[4] << 16) | ((uint64_t)Bytes[5] << 24); + return true; +} + static bool readInstruction16(ArrayRef Bytes, uint64_t Address, uint64_t &Size, uint32_t &Insn) { Size = 2; @@ -74,32 +83,33 @@ static bool readInstruction16(ArrayRef Bytes, uint64_t Address, return true; } -static MCDisassembler::DecodeStatus DecodeS12Operand(MCInst &, unsigned, - uint64_t, const void *); - -static MCDisassembler::DecodeStatus DecodeS9Operand(MCInst &, unsigned, - uint64_t, const void *); +template +static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS, + uint64_t Address = 0, + const void *Decoder = nullptr); -static MCDisassembler::DecodeStatus -DecodeBranchTargetS9(MCInst &, unsigned, uint64_t, const void *); +template +static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, + uint64_t Address = 0, + const void *Decoder = nullptr); -static MCDisassembler::DecodeStatus -DecodeBranchTargetS21(MCInst &, unsigned, uint64_t, const void *); +template +static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS, + uint64_t Address, const void *Decoder); -static MCDisassembler::DecodeStatus -DecodeBranchTargetS25(MCInst &, unsigned, uint64_t, const void *); +static DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t, const void *); -static MCDisassembler::DecodeStatus DecodeMEMrs9(MCInst &, unsigned, uint64_t, - const void *); +static DecodeStatus DecodeLdLImmInstruction(MCInst &, uint64_t, uint64_t, + const void *); -static MCDisassembler::DecodeStatus -DecodeLdLImmInstruction(MCInst &, uint64_t, uint64_t, const void *); +static DecodeStatus DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t, + const void *); -static MCDisassembler::DecodeStatus -DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t, const void *); +static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t, + const void *); -static MCDisassembler::DecodeStatus -DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t, const void *); +static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t, uint64_t, + const void *); static const uint16_t GPR32DecoderTable[] = { ARC::R0, ARC::R1, ARC::R2, ARC::R3, ARC::R4, ARC::R5, ARC::R6, @@ -115,11 +125,22 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, DEBUG(dbgs() << "Not a GPR32 register."); return MCDisassembler::Fail; } + unsigned Reg = GPR32DecoderTable[RegNo]; Inst.addOperand(MCOperand::createReg(Reg)); return MCDisassembler::Success; } +static DecodeStatus DecodeGBR32ShortRegister(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + // Enumerates registers from ranges [r0-r3],[r12-r15]. + if (RegNo > 3) + RegNo += 8; // 4 for r12, etc... + + return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder); +} + #include "ARCGenDisassemblerTables.inc" static unsigned decodeCField(unsigned Insn) { @@ -135,8 +156,8 @@ static unsigned decodeAField(unsigned Insn) { return fieldFromInstruction(Insn, 0, 6); } -static MCDisassembler::DecodeStatus -DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Dec) { +static DecodeStatus DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Dec) { // We have the 9-bit immediate in the low bits, 6-bit register in high bits. unsigned S9 = Insn & 0x1ff; unsigned R = (Insn & (0x7fff & ~0x1ff)) >> 9; @@ -145,49 +166,59 @@ DecodeMEMrs9(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Dec) { return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus DecodeS9Operand(MCInst &Inst, - unsigned InsnS9, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<9>(0x1ff & InsnS9))); - return MCDisassembler::Success; +static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address, + uint64_t Value, const void *Decoder) { + static const uint64_t atLeast = 2; + // TODO: Try to force emitter to use MCDisassembler* instead of void*. + auto Disassembler = static_cast(Decoder); + return (nullptr != Disassembler && + Disassembler->tryAddingSymbolicOperand(Inst, Value, Address, true, 0, + atLeast)); } -static MCDisassembler::DecodeStatus DecodeS12Operand(MCInst &Inst, - unsigned InsnS12, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<12>(0xfff & InsnS12))); - return MCDisassembler::Success; +static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address, + uint64_t Offset, const void *Decoder) { + uint64_t nextAddress = Address + Offset; + + if (!DecodeSymbolicOperand(Inst, Address, nextAddress, Decoder)) + Inst.addOperand(MCOperand::createImm(Offset)); } -static MCDisassembler::DecodeStatus DecodeBranchTargetS9(MCInst &Inst, - unsigned S, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<9>(S))); +template +static DecodeStatus DecodeBranchTargetS(MCInst &Inst, unsigned InsnS, + uint64_t Address, const void *Decoder) { + + static_assert(B > 0, "field is empty"); + DecodeSymbolicOperandOff(Inst, Address, SignExtend32(InsnS), Decoder); return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus DecodeBranchTargetS21(MCInst &Inst, - unsigned S, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<21>(S))); +template +static DecodeStatus DecodeSignedOperand(MCInst &Inst, unsigned InsnS, + uint64_t /*Address*/, + const void * /*Decoder*/) { + + static_assert(B > 0, "field is empty"); + Inst.addOperand(MCOperand::createImm( + SignExtend32(maskTrailingOnes(B) & InsnS))); return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus DecodeBranchTargetS25(MCInst &Inst, - unsigned S, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::createImm(SignExtend32<25>(S))); +template +static DecodeStatus DecodeFromCyclicRange(MCInst &Inst, unsigned InsnS, + uint64_t /*Address*/, + const void * /*Decoder*/) { + + static_assert(B > 0, "field is empty"); + const unsigned max = (1u << B) - 1; + Inst.addOperand( + MCOperand::createImm(InsnS < max ? static_cast(InsnS) : -1)); return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus -DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn, + uint64_t Address, + const void *Decoder) { unsigned SrcC, DstB, LImm; DstB = decodeBField(Insn); if (DstB != 62) { @@ -202,9 +233,9 @@ DecodeStLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus -DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn, + uint64_t Address, + const void *Decoder) { unsigned DstA, SrcB, LImm; DEBUG(dbgs() << "Decoding LdLImm:\n"); SrcB = decodeBField(Insn); @@ -220,9 +251,9 @@ DecodeLdLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, return MCDisassembler::Success; } -static MCDisassembler::DecodeStatus -DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn, + uint64_t Address, + const void *Decoder) { unsigned DstA, SrcB; DEBUG(dbgs() << "Decoding LdRLimm\n"); DstA = decodeAField(Insn); @@ -237,9 +268,37 @@ DecodeLdRLImmInstruction(MCInst &Inst, uint64_t Insn, uint64_t Address, return MCDisassembler::Success; } -MCDisassembler::DecodeStatus ARCDisassembler::getInstruction( - MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, - raw_ostream &vStream, raw_ostream &cStream) const { +static DecodeStatus DecodeMoveHRegInstruction(MCInst &Inst, uint64_t Insn, + uint64_t Address, + const void *Decoder) { + DEBUG(dbgs() << "Decoding MOV_S h-register\n"); + using Field = decltype(Insn); + Field h = fieldFromInstruction(Insn, 5, 3) | + (fieldFromInstruction(Insn, 0, 2) << 3); + Field g = fieldFromInstruction(Insn, 8, 3) | + (fieldFromInstruction(Insn, 3, 2) << 3); + + auto DecodeRegisterOrImm = [&Inst, Address, Decoder](Field RegNum, + Field Value) { + if (30 == RegNum) { + Inst.addOperand(MCOperand::createImm(Value)); + return MCDisassembler::Success; + } + + return DecodeGPR32RegisterClass(Inst, RegNum, Address, Decoder); + }; + + if (MCDisassembler::Success != DecodeRegisterOrImm(g, 0)) + return MCDisassembler::Fail; + + return DecodeRegisterOrImm(h, Insn >> 16u); +} + +DecodeStatus ARCDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { MCDisassembler::DecodeStatus Result; if (Bytes.size() < 2) { Size = 0; @@ -262,9 +321,9 @@ MCDisassembler::DecodeStatus ARCDisassembler::getInstruction( return Fail; Result = decodeInstruction(DecoderTable64, Instr, Insn64, Address, this, STI); - if (Result == MCDisassembler::Success) { + if (Success == Result) { DEBUG(dbgs() << "Successfully decoded 64-bit instruction."); - return MCDisassembler::Success; + return Result; } DEBUG(dbgs() << "Not a 64-bit instruction, falling back to 32-bit."); } @@ -274,15 +333,28 @@ MCDisassembler::DecodeStatus ARCDisassembler::getInstruction( } // Calling the auto-generated decoder function. return decodeInstruction(DecoderTable32, Instr, Insn32, Address, this, STI); - } + } else { + if (Bytes.size() >= 6) { + // Attempt to treat as instr. with limm data. + uint64_t Insn48; + if (!readInstruction48(Bytes, Address, Size, Insn48)) + return Fail; + Result = + decodeInstruction(DecoderTable48, Instr, Insn48, Address, this, STI); + if (Success == Result) { + DEBUG(dbgs() << "Successfully decoded 16-bit instruction with limm."); + return Result; + } + DEBUG(dbgs() << "Not a 16-bit instruction with limm, try without it."); + } - // 16-bit instruction. - uint32_t Insn16; - if (!readInstruction16(Bytes, Address, Size, Insn16)) { - return Fail; + uint32_t Insn16; + if (!readInstruction16(Bytes, Address, Size, Insn16)) + return Fail; + + // Calling the auto-generated decoder function. + return decodeInstruction(DecoderTable16, Instr, Insn16, Address, this, STI); } - // Calling the auto-generated decoder function. - return decodeInstruction(DecoderTable16, Instr, Insn16, Address, this, STI); } static MCDisassembler *createARCDisassembler(const Target &T, diff --git a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp b/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp index d4f1046db122..48431677bb74 100644 --- a/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp +++ b/lib/Target/ARC/InstPrinter/ARCInstPrinter.cpp @@ -66,6 +66,10 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) { return "gt"; case ARCCC::GE: return "ge"; + case ARCCC::VS: + return "vs"; + case ARCCC::VC: + return "vc"; case ARCCC::LT: return "lt"; case ARCCC::LE: @@ -101,6 +105,12 @@ static void printExpr(const MCExpr *Expr, const MCAsmInfo *MAI, int Offset = 0; const MCSymbolRefExpr *SRE; + if (const auto *CE = dyn_cast(Expr)) { + OS << "0x"; + OS.write_hex(CE->getValue()); + return; + } + if (const auto *BE = dyn_cast(Expr)) { SRE = dyn_cast(BE->getLHS()); const auto *CE = dyn_cast(BE->getRHS()); diff --git a/lib/Target/ARC/MCTargetDesc/ARCInfo.h b/lib/Target/ARC/MCTargetDesc/ARCInfo.h index b9ed99885702..401b4c5e6613 100644 --- a/lib/Target/ARC/MCTargetDesc/ARCInfo.h +++ b/lib/Target/ARC/MCTargetDesc/ARCInfo.h @@ -30,6 +30,8 @@ enum CondCode { N = 0x4, LO = 0x5, HS = 0x6, + VS = 0x7, + VC = 0x8, GT = 0x9, GE = 0xa, LT = 0xb, diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 34e41ba54107..16d5f74d19e3 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -655,7 +655,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; const ARMSubtarget &STI = Fn.getSubtarget(); diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 3aac689c6310..9ffb4c2055f9 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -61,6 +61,7 @@ void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); +void initializeThumb2SizeReducePass(PassRegistry &); } // end namespace llvm diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index c1a3f639461d..705cfe0d3383 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -83,6 +83,9 @@ def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; +def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", + "Has full data barrier (dfb) instruction">; + def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " @@ -504,7 +507,8 @@ def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>; def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>; -def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>; +def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops, + FeatureDSP]>; def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops, FeatureDSP]>; @@ -617,6 +621,7 @@ def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, + FeatureDFB, FeatureDSP, FeatureCRC, FeatureMP, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 753e7edbea43..2412b25eaadd 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -24,13 +24,11 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/BinaryFormat/COFF.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -41,7 +39,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" @@ -51,7 +48,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -113,7 +109,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); SetupMachineFunction(MF); - const Function* F = MF.getFunction(); + const Function &F = MF.getFunction(); const TargetMachine& TM = MF.getTarget(); // Collect all globals that had their storage promoted to a constant pool. @@ -124,13 +120,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Calculate this function's optimization goal. unsigned OptimizationGoal; - if (F->hasFnAttribute(Attribute::OptimizeNone)) + if (F.hasFnAttribute(Attribute::OptimizeNone)) // For best debugging illusion, speed and small size sacrificed OptimizationGoal = 6; - else if (F->optForMinSize()) + else if (F.optForMinSize()) // Aggressively for small size, speed and debug illusion sacrificed OptimizationGoal = 4; - else if (F->optForSize()) + else if (F.optForSize()) // For small size, but speed and debugging illusion preserved OptimizationGoal = 3; else if (TM.getOptLevel() == CodeGenOpt::Aggressive) @@ -150,7 +146,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = F->hasInternalLinkage(); + bool Internal = F.hasInternalLinkage(); COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; @@ -549,29 +545,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - if (TT.isOSBinFormatCOFF()) { - const auto &TLOF = - static_cast(getObjFileLowering()); - - std::string Flags; - raw_string_ostream OS(Flags); - - for (const auto &Function : M) - TLOF.emitLinkerFlagsForGlobal(OS, &Function); - for (const auto &Global : M.globals()) - TLOF.emitLinkerFlagsForGlobal(OS, &Global); - for (const auto &Alias : M.aliases()) - TLOF.emitLinkerFlagsForGlobal(OS, &Alias); - - OS.flush(); - - // Output collected flags - if (!Flags.empty()) { - OutStreamer->SwitchSection(TLOF.getDrectveSection()); - OutStreamer->EmitBytes(Flags); - } - } - // The last attribute to be emitted is ABI_optimization_goals MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast(TS); @@ -1090,6 +1063,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { unsigned StartOp = 2 + 2; // Use all the operands. unsigned NumOffset = 0; + // Amount of SP adjustment folded into a push. + unsigned Pad = 0; switch (Opc) { default: @@ -1111,6 +1086,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { // temporary to workaround PR11902. if (MO.isImplicit()) continue; + // Registers, pushed as a part of folding an SP update into the + // push instruction are marked as undef and should not be + // restored when unwinding, because the function can modify the + // corresponding stack slots. + if (MO.isUndef()) { + assert(RegList.empty() && + "Pad registers must come before restored ones"); + Pad += 4; + continue; + } RegList.push_back(MO.getReg()); } break; @@ -1122,8 +1107,12 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + // Account for the SP adjustment, folded into the push. + if (Pad) + ATS.emitPad(Pad); + } } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6268b9ef2a37..41c2130e3386 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1357,25 +1357,34 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { MachineInstrBuilder LDM, STM; if (isThumb1 || !MI->getOperand(1).isDead()) { + MachineOperand LDWb(MI->getOperand(1)); + LDWb.setIsRenamable(false); LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD : isThumb1 ? ARM::tLDMIA_UPD : ARM::LDMIA_UPD)) - .add(MI->getOperand(1)); + .add(LDWb); } else { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); } if (isThumb1 || !MI->getOperand(0).isDead()) { + MachineOperand STWb(MI->getOperand(0)); + STWb.setIsRenamable(false); STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD : isThumb1 ? ARM::tSTMIA_UPD : ARM::STMIA_UPD)) - .add(MI->getOperand(0)); + .add(STWb); } else { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); } - LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); - STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); + MachineOperand LDBase(MI->getOperand(3)); + LDBase.setIsRenamable(false); + LDM.add(LDBase).add(predOps(ARMCC::AL)); + + MachineOperand STBase(MI->getOperand(2)); + STBase.setIsRenamable(false); + STM.add(STBase).add(predOps(ARMCC::AL)); // Sort the scratch registers into ascending order. const TargetRegisterInfo &TRI = getRegisterInfo(); @@ -1447,7 +1456,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { DEBUG(dbgs() << "widening: " << MI); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); - // Get rid of the old of DstRegD. Leave it if it defines a Q-reg + // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg // or some other super-register. int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) @@ -1503,18 +1512,18 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); else if (ACPV->isExtSymbol()) NewCPV = ARMConstantPoolSymbol:: - Create(MF.getFunction()->getContext(), + Create(MF.getFunction().getContext(), cast(ACPV)->getSymbol(), PCLabelId, 4); else if (ACPV->isBlockAddress()) NewCPV = ARMConstantPoolConstant:: Create(cast(ACPV)->getBlockAddress(), PCLabelId, ARMCP::CPBlockAddress, 4); else if (ACPV->isLSDA()) - NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, + NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId, ARMCP::CPLSDA, 4); else if (ACPV->isMachineBasicBlock()) NewCPV = ARMConstantPoolMBB:: - Create(MF.getFunction()->getContext(), + Create(MF.getFunction().getContext(), cast(ACPV)->getMBB(), PCLabelId, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); @@ -1650,7 +1659,7 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, } for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { - // %vreg12 = PICLDR %vreg11, 0, pred:14, pred:%noreg + // %12 = PICLDR %11, 0, 14, %noreg const MachineOperand &MO0 = MI0.getOperand(i); const MachineOperand &MO1 = MI1.getOperand(i); if (!MO0.isIdenticalTo(MO1)) @@ -1834,7 +1843,7 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, // If we are optimizing for size, see if the branch in the predecessor can be // lowered to cbn?z by the constant island lowering pass, and return false if // so. This results in a shorter instruction sequence. - if (MBB.getParent()->getFunction()->optForSize()) { + if (MBB.getParent()->getFunction().optForSize()) { MachineBasicBlock *Pred = *MBB.pred_begin(); if (!Pred->empty()) { MachineInstr *LastMI = &*Pred->rbegin(); @@ -2201,7 +2210,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->optForMinSize()) + if (!MF.getFunction().optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2268,9 +2277,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, --CurRegEnc) { unsigned CurReg = RegClass->getRegister(CurRegEnc); if (!IsPop) { - // Pushing any register is completely harmless, mark the - // register involved as undef since we don't care about it in - // the slightest. + // Pushing any register is completely harmless, mark the register involved + // as undef since we don't care about its value and must not restore it + // during stack unwinding. RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, false, false, true)); --RegsNeeded; @@ -2525,14 +2534,28 @@ inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { } } +/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by ADD(a,b,X). +inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::HS: return ARMCC::LO; + case ARMCC::LO: return ARMCC::HS; + case ARMCC::VS: return ARMCC::VS; + case ARMCC::VC: return ARMCC::VC; + } +} + /// isRedundantFlagInstr - check whether the first instruction, whose only /// purpose is to update flags, can be made redundant. /// CMPrr can be made redundant by SUBrr if the operands are the same. /// CMPri can be made redundant by SUBri if the operands are the same. +/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). /// This function can be extended later on. -inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, - unsigned SrcReg2, int ImmValue, - MachineInstr *OI) { +inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, + unsigned SrcReg, unsigned SrcReg2, + int ImmValue, const MachineInstr *OI) { if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && (OI->getOpcode() == ARM::SUBrr || @@ -2550,6 +2573,14 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, OI->getOperand(1).getReg() == SrcReg && OI->getOperand(2).getImm() == ImmValue) return true; + + if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || + OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && + OI->getOperand(0).isReg() && OI->getOperand(1).isReg() && + OI->getOperand(0).getReg() == SrcReg && + OI->getOperand(1).getReg() == SrcReg2) + return true; return false; } @@ -2652,17 +2683,18 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( if (I == B) return false; // There are two possible candidates which can be changed to set CPSR: - // One is MI, the other is a SUB instruction. - // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). + // One is MI, the other is a SUB or ADD instruction. + // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or + // ADDr[ri](r1, r2, X). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = nullptr; + MachineInstr *SubAdd = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. MI = nullptr; else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. - // For CMPri w/ CmpValue != 0, a Sub may still be a candidate. + // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. // Thus we cannot return here. if (CmpInstr.getOpcode() == ARM::CMPri || CmpInstr.getOpcode() == ARM::t2CMPri) @@ -2707,11 +2739,20 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( } // Check that CPSR isn't set between the comparison instruction and the one we - // want to change. At the same time, search for Sub. + // want to change. At the same time, search for SubAdd. const TargetRegisterInfo *TRI = &getRegisterInfo(); - --I; - for (; I != E; --I) { - const MachineInstr &Instr = *I; + do { + const MachineInstr &Instr = *--I; + + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) { + SubAdd = &*I; + break; + } + + // Allow E (which was initially MI) to be SubAdd but do not search before E. + if (I == E) + break; if (Instr.modifiesRegister(ARM::CPSR, TRI) || Instr.readsRegister(ARM::CPSR, TRI)) @@ -2719,23 +2760,14 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // change. We can't do this transformation. return false; - // Check whether CmpInstr can be made redundant by the current instruction. - if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { - Sub = &*I; - break; - } - - if (I == B) - // The 'and' is below the comparison instruction. - return false; - } + } while (I != B); // Return false if no candidates exist. - if (!MI && !Sub) + if (!MI && !SubAdd) return false; // The single candidate is called MI. - if (!MI) MI = Sub; + if (!MI) MI = SubAdd; // We can't use a predicated instruction - it doesn't always write the flags. if (isPredicated(*MI)) @@ -2793,25 +2825,31 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( break; } - if (Sub) { - ARMCC::CondCodes NewCC = getSwappedCondition(CC); - if (NewCC == ARMCC::AL) - return false; + if (SubAdd) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based // on CMP needs to be updated to be based on SUB. + // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also + // needs to be modified. // Push the condition code operands to OperandsToUpdate. // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. - if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) { + unsigned Opc = SubAdd->getOpcode(); + bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || + Opc == ARM::SUBri || Opc == ARM::t2SUBri; + if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 && + SubAdd->getOperand(2).getReg() == SrcReg)) { // VSel doesn't support condition code update. if (IsInstrVSel) return false; + // Ensure we can swap the condition. + ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); + if (NewCC == ARMCC::AL) + return false; OperandsToUpdate.push_back( std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); } } else { - // No Sub, so this is x = y, z; cmp x, 0. + // No SubAdd, so this is x = y, z; cmp x, 0. switch (CC) { case ARMCC::EQ: // Z case ARMCC::NE: // Z @@ -2865,6 +2903,23 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( return true; } +bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { + // Do not sink MI if it might be used to optimize a redundant compare. + // We heuristically only look at the instruction immediately following MI to + // avoid potentially searching the entire basic block. + if (isPredicated(MI)) + return true; + MachineBasicBlock::const_iterator Next = &MI; + ++Next; + unsigned SrcReg, SrcReg2; + int CmpMask, CmpValue; + if (Next != MI.getParent()->end() && + analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) && + isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI)) + return false; + return true; +} + bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg, MachineRegisterInfo *MRI) const { @@ -2873,7 +2928,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) return false; if (!DefMI.getOperand(1).isImm()) - // Could be t2MOVi32imm + // Could be t2MOVi32imm @xx return false; if (!MRI->hasOneNonDBGUse(Reg)) @@ -3458,8 +3513,8 @@ bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { } unsigned ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { - // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops - // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) + // ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops + // (outs GPR:$wb), (ins GPR:$Rn, $p (2xOp), reglist:$regs, variable_ops) return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); } @@ -3973,7 +4028,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI.getParent()->getParent(); // FIXME: Use Function::optForSize(). - if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -4668,7 +4723,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), DDst); - // On the first instruction, both DSrc and DDst may be if present. + // On the first instruction, both DSrc and DDst may be undef if present. // Specifically when the original instruction didn't have them as an // . unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; @@ -4688,7 +4743,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MIB.addReg(DDst, RegState::Define); // On the second instruction, DDst has definitely been defined above, so - // it is not . DSrc, if present, can be as above. + // it is not undef. DSrc, if present, can be undef as above. CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); MIB.addReg(CurReg, getUndefRegState(CurUndef)); @@ -4771,7 +4826,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( // We must be able to clobber the whole D-reg. if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // Virtual register must be a foo:ssub_0 operand. + // Virtual register must be a def undef foo:ssub_0 operand. if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) return 0; } else if (ARM::SPRRegClass.contains(Reg)) { @@ -4855,12 +4910,14 @@ bool ARMBaseInstrInfo::getRegSequenceLikeInputs( // Populate the InputRegs accordingly. // rY const MachineOperand *MOReg = &MI.getOperand(1); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_0)); // rZ MOReg = &MI.getOperand(2); - InputRegs.push_back( - RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + if (!MOReg->isUndef()) + InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(), + MOReg->getSubReg(), ARM::ssub_1)); return true; } llvm_unreachable("Target dependent opcode missing"); @@ -4879,6 +4936,8 @@ bool ARMBaseInstrInfo::getExtractSubregLikeInputs( // rX = EXTRACT_SUBREG dZ, ssub_0 // rY = EXTRACT_SUBREG dZ, ssub_1 const MachineOperand &MOReg = MI.getOperand(2); + if (MOReg.isUndef()) + return false; InputReg.Reg = MOReg.getReg(); InputReg.SubReg = MOReg.getSubReg(); InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; @@ -4898,6 +4957,8 @@ bool ARMBaseInstrInfo::getInsertSubregLikeInputs( // dX = VSETLNi32 dY, rZ, imm const MachineOperand &MOBaseReg = MI.getOperand(1); const MachineOperand &MOInsertedReg = MI.getOperand(2); + if (MOInsertedReg.isUndef()) + return false; const MachineOperand &MOIndex = MI.getOperand(3); BaseReg.Reg = MOBaseReg.getReg(); BaseReg.SubReg = MOBaseReg.getSubReg(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 2ff4b1100ee2..282a68749102 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -47,10 +47,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of /// the list is modeled as . - /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: - /// - vreg1:sub1, sub0 - /// - vreg2<:0>, sub1 + /// - %1:sub1, sub0 + /// - %2<:0>, sub1 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -63,8 +63,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI /// and \p DefIdx. /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. - /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: - /// - vreg1:sub1, sub0 + /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce: + /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -77,9 +77,9 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { /// and \p DefIdx. /// \p [out] BaseReg and \p [out] InsertedReg contain /// the equivalent inputs of INSERT_SUBREG. - /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: - /// - BaseReg: vreg0:sub0 - /// - InsertedReg: vreg1:sub1, sub3 + /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce: + /// - BaseReg: %0:sub0 + /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -215,6 +215,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { bool expandPostRAPseudo(MachineInstr &MI) const override; + bool shouldSink(const MachineInstr &MI) const override; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 0aec874e5ddb..4b9a4376adf8 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -71,17 +71,17 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ? CSR_iOS_SaveList : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function *F = MF->getFunction(); - if (F->getCallingConv() == CallingConv::GHC) { + const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; - } else if (F->hasFnAttribute("interrupt")) { + } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; - } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. return CSR_FIQ_SaveList; @@ -93,7 +93,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } if (STI.getTargetLowering()->supportSwiftError() && - F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; @@ -101,7 +101,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { CSR_AAPCS_SwiftError_SaveList; } - if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS) + if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS) return MF->getInfo()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; @@ -111,7 +111,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo()->isSplitCSR()) return CSR_iOS_CXX_TLS_ViaCopy_SaveList; return nullptr; @@ -126,7 +126,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_NoRegs_RegMask; if (STI.getTargetLowering()->supportSwiftError() && - MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return STI.isTargetDarwin() ? CSR_iOS_SwiftError_RegMask : CSR_AAPCS_SwiftError_RegMask; @@ -440,7 +440,7 @@ void ARMBaseRegisterInfo::emitLoadConstPool( const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = - ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); + ConstantInt::get(Type::getInt32Ty(MF.getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 1c2df39d05a1..eab4b3b13f31 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -190,7 +190,7 @@ void ARMCallLowering::splitToValueTypes( LLVMContext &Ctx = OrigArg.Ty->getContext(); const DataLayout &DL = MF.getDataLayout(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); SmallVector SplitVTs; SmallVector Offsets; @@ -218,7 +218,7 @@ void ARMCallLowering::splitToValueTypes( bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( - SplitTy, F->getCallingConv(), F->isVarArg()); + SplitTy, F.getCallingConv(), F.isVarArg()); if (NeedsConsecutiveRegisters) { Flags.setInConsecutiveRegs(); if (i == e - 1) @@ -244,7 +244,7 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return true; auto &MF = MIRBuilder.getMF(); - const auto &F = *MF.getFunction(); + const auto &F = MF.getFunction(); auto DL = MF.getDataLayout(); auto &TLI = *getTLI(); @@ -434,9 +434,12 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); - for (auto &Arg : F.args()) + for (auto &Arg : F.args()) { if (!isSupportedType(DL, TLI, Arg.getType())) return false; + if (Arg.hasByValOrInAllocaAttr()) + return false; + } CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); @@ -529,6 +532,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; + if (Arg.Flags.isByVal()) + return false; + SmallVector Regs; splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index bc781b26b2c4..8baee1ce281d 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -326,7 +326,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { DEBUG({ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) << " ua=" << unsigned(BBI.Unalign) << " pa=" << unsigned(BBI.PostAlign) @@ -1071,11 +1071,11 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, const BasicBlockInfo &BBI = BBInfo[Block]; dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() << " max delta=" << MaxDisp - << format(" insn address=%#x", UserOffset) - << " in BB#" << Block << ": " + << format(" insn address=%#x", UserOffset) << " in " + << printMBBReference(*MI->getParent()) << ": " << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI << format("CPE address=%#x offset=%+d: ", CPEOffset, - int(CPEOffset-UserOffset)); + int(CPEOffset - UserOffset)); }); } @@ -1261,7 +1261,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // This is the least amount of required padding seen so far. BestGrowth = Growth; WaterIter = IP; - DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB) << " Growth=" << Growth << '\n'); if (CloserWater && WaterBB == U.MI->getParent()) @@ -1305,8 +1305,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { - DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() - << format(", expected CPE offset %#x\n", CPEOffset)); + DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) + << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, @@ -1578,11 +1578,11 @@ bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxDisp - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB) + << " from " << printMBBReference(*MI->getParent()) + << " max delta=" << MaxDisp << " from " << getOffsetOf(MI) + << " to " << DestOffset << " offset " + << int(DestOffset - BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { // Branch before the Dest. @@ -1700,9 +1700,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { } MachineBasicBlock *NextBB = &*++MBB->getIterator(); - DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); + DEBUG(dbgs() << " Insert B to " << printMBBReference(*DestBB) + << " also invert condition and change dest. to " + << printMBBReference(*NextBB) << "\n"); // Insert a new conditional branch and a new unconditional branch. // Also update the ImmBranch as well as adding a new entry for the new branch. @@ -2212,7 +2212,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(CPEMI->getOperand(0).getImm()); - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); + DEBUG(dbgs() << printMBBReference(*MBB) << ": " << *NewJTMI); unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH; CPEMI->setDesc(TII->get(JTOpc)); diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 38ea835fbe2f..39ae02af513b 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -292,6 +292,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { } void ARMConstantPoolMBB::print(raw_ostream &O) const { - O << "BB#" << MBB->getNumber(); + O << printMBBReference(*MBB); ARMConstantPoolValue::print(O); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bf67bbdc3795..b14b2c6a813f 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -606,8 +606,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, // Transfer the destination register operand. MIB.add(MI.getOperand(OpIdx++)); - if (IsExt) - MIB.add(MI.getOperand(OpIdx++)); + if (IsExt) { + MachineOperand VdSrc(MI.getOperand(OpIdx++)); + VdSrc.setIsRenamable(false); + MIB.add(VdSrc); + } bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); @@ -616,7 +619,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addReg(D0); // Copy the other source register operand. - MIB.add(MI.getOperand(OpIdx++)); + MachineOperand VmSrc(MI.getOperand(OpIdx++)); + VmSrc.setIsRenamable(false); + MIB.add(VmSrc); // Copy the predicate operands. MIB.add(MI.getOperand(OpIdx++)); @@ -922,7 +927,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rTempReg, rDestHi, rDesiredHi + // sbcs dead rTempReg, rDestHi, rDesiredHi // bne .Ldone unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; @@ -1254,7 +1259,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineConstantPool *MCP = MF->getConstantPool(); unsigned PCLabelID = AFI->createPICLabelUId(); MachineConstantPoolValue *CPV = - ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(), + ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), "__aeabi_read_tp", PCLabelID, 0); unsigned Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -1464,7 +1469,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. - MIB.add(MI.getOperand(OpIdx++)); + MachineOperand Dst(MI.getOperand(OpIdx++)); + Dst.setIsRenamable(false); + MIB.add(Dst); // Copy the predicate operands. MIB.add(MI.getOperand(OpIdx++)); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 1090f62106f8..814236ce83c0 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1416,7 +1416,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, case MVT::i8: case MVT::i16: needsExt = true; - // Intentional fall-through. + LLVM_FALLTHROUGH; case MVT::i32: if (isThumb2) { if (!UseImm) @@ -2352,8 +2352,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e-i <= 2) + // last argument, which shouldn't be passed to the underlying function. + if (IntrMemName && e - i <= 1) break; ISD::ArgFlagsTy Flags; @@ -2958,7 +2958,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); - LLVMContext *Context = &MF->getFunction()->getContext(); + LLVMContext *Context = &MF->getFunction().getContext(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index d60734ab1441..4ff864ac6ccd 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -203,10 +203,10 @@ static int sizeOfSPAdjustment(const MachineInstr &MI) { static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096; - if (F->hasFnAttribute("stack-probe-size")) - F->getFnAttribute("stack-probe-size") + if (F.hasFnAttribute("stack-probe-size")) + F.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); return StackSizeInBytes >= StackProbeSize; @@ -370,7 +370,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; StackAdjustingInsts DefCFAOffsetCandidates; @@ -448,7 +448,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -766,7 +766,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; // First put ourselves on the first (from top) terminator instructions. @@ -1533,7 +1533,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. @@ -1744,7 +1744,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedStackSize += 16; // For possible paddings. unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); - int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI); bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || MFI.hasVarSizedObjects() || (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || @@ -1832,12 +1832,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!HasFP) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; - DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " + DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " << RegDeficit << "\n"); } else { AvailableRegs.push_back(ARM::R7); DEBUG(dbgs() - << "%R7 is non-saved low register, adding to AvailableRegs\n"); + << "%r7 is non-saved low register, adding to AvailableRegs\n"); } } @@ -1859,11 +1859,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MF.getFrameInfo().isReturnAddressTaken())) { if (SavedRegs.test(ARM::LR)) { --RegDeficit; - DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit + DEBUG(dbgs() << "%lr is saved register, RegDeficit = " << RegDeficit << "\n"); } else { AvailableRegs.push_back(ARM::LR); - DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); + DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n"); } } @@ -2102,7 +2102,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Sadly, this currently doesn't support varargs, platforms other than // android/linux. Note that thumb1/thumb2 are support for android/linux. - if (MF.getFunction()->isVarArg()) + if (MF.getFunction().isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); @@ -2250,7 +2250,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( if (Thumb && ST->isThumb1Only()) { unsigned PCLabelId = ARMFI->createPICLabelUId(); ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( - MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); + MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0); MachineConstantPool *MCP = MF.getConstantPool(); unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8d32510e2004..c2d0e636da9e 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2765,7 +2765,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } } case ARMISD::SUBE: { - if (!Subtarget->hasV6Ops()) + if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) break; // Look for a pattern to match SMMLS // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index cee274080b2b..47c4712aad61 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -799,6 +799,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -1038,7 +1041,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->isThumb1Only()) setOperationAction(ISD::SETCCE, MVT::i32, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); @@ -1081,20 +1084,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - // Combine sin / cos into one node or libcall if possible. - if (Subtarget->hasSinCos()) { - setLibcallName(RTLIB::SINCOS_F32, "sincosf"); - setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetWatchABI()) { - setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); - setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); - } - if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { - // For iOS, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } + // Use __sincos_stret if available. + if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && + getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } // FP-ARMv8 implements a lot of rounding-like FP operations. @@ -1252,6 +1246,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::USAT: return "ARMISD::USAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -1342,6 +1337,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; + case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; + case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1770,7 +1767,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; - auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") @@ -1779,7 +1776,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), + isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG); if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " @@ -1978,7 +1975,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isDirect = false; const TargetMachine &TM = getTargetMachine(); - const Module *Mod = MF.getFunction()->getParent(); + const Module *Mod = MF.getFunction().getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) GV = G->getGlobal(); @@ -2030,7 +2027,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS.getParent(); bool PreferIndirect = - Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + Subtarget->isThumb() && MF.getFunction().optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; @@ -2102,7 +2099,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority - !MF.getFunction()->optForMinSize()) + !MF.getFunction().optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -2277,18 +2274,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); + // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // to the call take up r0-r3. The reason is that there are no legal registers + // left to hold the pointer to the function to be called. + if (Subtarget->isThumb1Only() && Outs.size() >= 4 && + !isa(Callee.getNode())) + return false; + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF->hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct @@ -2400,9 +2404,9 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); - StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return @@ -2543,7 +2547,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); @@ -2681,7 +2685,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, auto T = const_cast(CP->getType()); auto C = const_cast(CP->getConstVal()); auto M = const_cast(DAG.getMachineFunction(). - getFunction()->getParent()); + getFunction().getParent()); auto GV = new GlobalVariable( *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + @@ -2790,7 +2794,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = - getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); auto ARI = static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); @@ -2956,6 +2960,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); @@ -2964,10 +2972,6 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); - GlobalAddressSDNode *GA = cast(Op); - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(GA, DAG); - TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { @@ -3045,7 +3049,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll @@ -3103,7 +3107,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // in multiple functions but it no larger than a pointer. We also check if // GVar has constant (non-ConstantExpr) users. If so, it essentially has its // address taken. - if (!allUsersAreInFunction(GVar, F) && + if (!allUsersAreInFunction(GVar, &F) && !(Size <= 4 && allUsersAreInFunctions(GVar))) return SDValue(); @@ -3312,7 +3316,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -3588,7 +3592,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( SmallVector ArgValues; SDValue ArgValue; - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. @@ -3892,6 +3896,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +// This function returns three things: the arithmetic computation itself +// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The +// comparison and the condition code define the case in which the arithmetic +// computation *does not* overflow. std::pair ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const { @@ -3917,7 +3925,11 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); - Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); + // We use ADDC here to correspond to its use in LowerUnsignedALUO. + // We do not use it in the USUBO case as Value may not be used. + Value = DAG.getNode(ARMISD::ADDC, dl, + DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) + .getValue(0); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::SSUBO: @@ -3930,13 +3942,36 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; + case ISD::UMULO: + // We generate a UMUL_LOHI and then check if the high word is 0. + ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); + Value = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(Op.getValueType(), Op.getValueType()), + LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + DAG.getConstant(0, dl, MVT::i32)); + Value = Value.getValue(0); // We only want the low 32 bits for the result. + break; + case ISD::SMULO: + // We generate a SMUL_LOHI and then check if all the bits of the high word + // are the same as the sign bit of the low word. + ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); + Value = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(Op.getValueType(), Op.getValueType()), + LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + DAG.getNode(ISD::SRA, dl, Op.getValueType(), + Value.getValue(0), + DAG.getConstant(31, dl, MVT::i32))); + Value = Value.getValue(0); // We only want the low 32 bits for the result. + break; } // switch (...) return std::make_pair(Value, OverflowCmp); } SDValue -ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { +ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); @@ -3958,6 +3993,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, + SelectionDAG &DAG) { + SDLoc DL(BoolCarry); + EVT CarryVT = BoolCarry.getValueType(); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + // This converts the boolean value carry into the carry flag by doing + // ARMISD::ADDC Carry, ~0 + return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32), + BoolCarry, DAG.getConstant(NegOne, DL, CarryVT)); +} + +static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, + SelectionDAG &DAG) { + SDLoc DL(Flags); + + // Now convert the carry flag into a boolean carry. We do this + // using ARMISD:ADDE 0, 0, Carry + return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), Flags); +} + +SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, + SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Value; + SDValue Overflow; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::UADDO: + Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + break; + case ISD::USUBO: { + Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow + // value. So compute 1 - C. + Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(1, dl, MVT::i32), Overflow); + break; + } + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4135,7 +4230,7 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } -// Check if two chained conditionals could be converted into SSAT. +// Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: @@ -4146,10 +4241,14 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // x < k ? (x < -k ? -k : x) : k // etc. // +// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is +// a power of 2. +// // It returns true if the conversion can be done, false otherwise. -// Additionally, the variable is returned in parameter V and the constant in K. +// Additionally, the variable is returned in parameter V, the constant in K and +// usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, - uint64_t &K) { + uint64_t &K, bool &usat) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); @@ -4216,13 +4315,23 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, int64_t Val1 = cast(*K1)->getSExtValue(); int64_t Val2 = cast(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); + int64_t NegVal = std::min(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && - Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + isPowerOf2_64(PosVal + 1)) { + + // Handle the difference between USAT (unsigned) and SSAT (signed) saturation + if (Val1 == ~Val2) + usat = false; + else if (NegVal == 0) + usat = true; + else + return false; V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + return true; } @@ -4236,10 +4345,16 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; + bool SatUSat; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && - isSaturatingConditional(Op, SatValue, SatConstant)) - return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, - DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) { + if (SatUSat) + return DAG.getNode(ARMISD::USAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + else + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + } SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -4436,6 +4551,41 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Dest = Op.getOperand(2); + SDLoc dl(Op); + + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch + // instruction. + unsigned Opc = Cond.getOpcode(); + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + // The actual operation with overflow check. + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + + // Reverse the condition code. + ARMCC::CondCodes CondCode = + (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + CondCode = ARMCC::getOppositeCondition(CondCode); + ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + OverflowCmp); + } + + return SDValue(); +} + SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); @@ -4456,6 +4606,35 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } } + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch + // instruction. + unsigned Opc = LHS.getOpcode(); + if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) + return SDValue(); + + // The actual operation with overflow check. + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); + + if ((CC == ISD::SETNE) != isOneConstant(RHS)) { + // Reverse the condition code. + ARMCC::CondCodes CondCode = + (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); + CondCode = ARMCC::getOppositeCondition(CondCode); + ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); + } + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + OverflowCmp); + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -7361,6 +7540,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + + SDLoc DL(Op); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + + SDValue Result; + if (Op.getOpcode() == ISD::ADDCARRY) { + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the addition proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + } else { + // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we + // have to invert the carry first. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the subtraction proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + // But the carry returned by ARMISD::SUBE is not a borrow as expected + // by ISD::SUBCARRY, so compute 1 - C. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + } + + // Return both values. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); +} + SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); @@ -7406,10 +7632,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.IsZExt = false; Args.push_back(Entry); - const char *LibcallName = - (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; RTLIB::Libcall LC = - (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; + const char *LibcallName = getLibcallName(LC); CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); @@ -7637,9 +7862,9 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; - const auto *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && - F->getReturnType() == LCRTy; + F.getReturnType() == LCRTy; if (IsTC) InChain = TCChain; @@ -7665,6 +7890,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -7716,11 +7942,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: - case ISD::UADDO: case ISD::SSUBO: + return LowerSignedALUO(Op, DAG); + case ISD::UADDO: case ISD::USUBO: - return LowerXALUO(Op, DAG); + return LowerUnsignedALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -7834,7 +8063,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); @@ -7842,7 +8071,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = - ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass @@ -8128,7 +8357,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8229,7 +8458,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8525,7 +8754,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && + if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; @@ -8631,7 +8860,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. @@ -8854,8 +9083,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { + MachineOperand Def(MI.getOperand(1)); + if (TargetRegisterInfo::isPhysicalRegister(Def.getReg())) + Def.setIsRenamable(false); BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) - .add(MI.getOperand(1)) // Rn_wb + .add(Def) // Rn_wb .add(MI.getOperand(2)) // Rn .add(MI.getOperand(3)) // PredImm .add(MI.getOperand(4)) // PredReg @@ -9161,7 +9393,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // - // e.g. ADCS (..., CPSR) -> ADC (... opt:CPSR). + // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); @@ -9657,7 +9889,7 @@ static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, return resNode; } -static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, +static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for multiply add opportunities. @@ -9668,55 +9900,67 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi - // / \ [no multiline comment] - // loAdd -> ADDE | - // \ :glue / - // \ / - // ADDC <- hiAdd + // V \ [no multiline comment] + // loAdd -> ADDC | + // \ :carry / + // V V + // ADDE <- hiAdd // - assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); - - assert(AddeNode->getNumOperands() == 3 && - AddeNode->getOperand(2).getValueType() == MVT::i32 && + // In the special case where only the higher part of a signed result is used + // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts + // a constant with the exact value of 0x80000000, we recognize we are dealing + // with a "rounded multiply and add" (or subtract) and transform it into + // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively. + + assert((AddeSubeNode->getOpcode() == ARMISD::ADDE || + AddeSubeNode->getOpcode() == ARMISD::SUBE) && + "Expect an ADDE or SUBE"); + + assert(AddeSubeNode->getNumOperands() == 3 && + AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); - // Check that we have a glued ADDC node. - SDNode* AddcNode = AddeNode->getOperand(2).getNode(); - if (AddcNode->getOpcode() != ARMISD::ADDC) + // Check that we are chained to the right ADDC or SUBC node. + SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode(); + if ((AddeSubeNode->getOpcode() == ARMISD::ADDE && + AddcSubcNode->getOpcode() != ARMISD::ADDC) || + (AddeSubeNode->getOpcode() == ARMISD::SUBE && + AddcSubcNode->getOpcode() != ARMISD::SUBC)) return SDValue(); - SDValue AddcOp0 = AddcNode->getOperand(0); - SDValue AddcOp1 = AddcNode->getOperand(1); + SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0); + SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1); // Check if the two operands are from the same mul_lohi node. - if (AddcOp0.getNode() == AddcOp1.getNode()) + if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode()) return SDValue(); - assert(AddcNode->getNumValues() == 2 && - AddcNode->getValueType(0) == MVT::i32 && + assert(AddcSubcNode->getNumValues() == 2 && + AddcSubcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it // maybe a SMLAL which multiplies two 16-bit values. - if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && - AddcOp0->getOpcode() != ISD::SMUL_LOHI && - AddcOp1->getOpcode() != ISD::UMUL_LOHI && - AddcOp1->getOpcode() != ISD::SMUL_LOHI) - return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget); + if (AddeSubeNode->getOpcode() == ARMISD::ADDE && + AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI && + AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI && + AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI && + AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI) + return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget); // Check for the triangle shape. - SDValue AddeOp0 = AddeNode->getOperand(0); - SDValue AddeOp1 = AddeNode->getOperand(1); + SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0); + SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1); - // Make sure that the ADDE operands are not coming from the same node. - if (AddeOp0.getNode() == AddeOp1.getNode()) + // Make sure that the ADDE/SUBE operands are not coming from the same node. + if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode()) return SDValue(); - // Find the MUL_LOHI node walking up ADDE's operands. + // Find the MUL_LOHI node walking up ADDE/SUBE's operands. bool IsLeftOperandMUL = false; - SDValue MULOp = findMUL_LOHI(AddeOp0); + SDValue MULOp = findMUL_LOHI(AddeSubeOp0); if (MULOp == SDValue()) - MULOp = findMUL_LOHI(AddeOp1); + MULOp = findMUL_LOHI(AddeSubeOp1); else IsLeftOperandMUL = true; if (MULOp == SDValue()) @@ -9727,57 +9971,88 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; // Figure out the high and low input values to the MLAL node. - SDValue* HiAdd = nullptr; - SDValue* LoMul = nullptr; - SDValue* LowAdd = nullptr; + SDValue *HiAddSub = nullptr; + SDValue *LoMul = nullptr; + SDValue *LowAddSub = nullptr; - // Ensure that ADDE is from high result of ISD::SMUL_LOHI. - if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) + // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI. + if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1))) return SDValue(); if (IsLeftOperandMUL) - HiAdd = &AddeOp1; + HiAddSub = &AddeSubeOp1; else - HiAdd = &AddeOp0; + HiAddSub = &AddeSubeOp0; + // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node + // whose low result is fed to the ADDC/SUBC we are checking. - // Ensure that LoMul and LowAdd are taken from correct ISD::SMUL_LOHI node - // whose low result is fed to the ADDC we are checking. - - if (AddcOp0 == MULOp.getValue(0)) { - LoMul = &AddcOp0; - LowAdd = &AddcOp1; + if (AddcSubcOp0 == MULOp.getValue(0)) { + LoMul = &AddcSubcOp0; + LowAddSub = &AddcSubcOp1; } - if (AddcOp1 == MULOp.getValue(0)) { - LoMul = &AddcOp1; - LowAdd = &AddcOp0; + if (AddcSubcOp1 == MULOp.getValue(0)) { + LoMul = &AddcSubcOp1; + LowAddSub = &AddcSubcOp0; } if (!LoMul) return SDValue(); + // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC + // the replacement below will create a cycle. + if (AddcSubcNode == HiAddSub->getNode() || + AddcSubcNode->isPredecessorOf(HiAddSub->getNode())) + return SDValue(); + // Create the merged node. SelectionDAG &DAG = DCI.DAG; - // Build operand list. + // Start building operand list. SmallVector Ops; Ops.push_back(LoMul->getOperand(0)); Ops.push_back(LoMul->getOperand(1)); - Ops.push_back(*LowAdd); - Ops.push_back(*HiAdd); - SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), + // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be + // the case, we must be doing signed multiplication and only use the higher + // part of the result of the MLAL, furthermore the LowAddSub must be a constant + // addition or subtraction with the value of 0x800000. + if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() && + FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) && + LowAddSub->getNode()->getOpcode() == ISD::Constant && + static_cast(LowAddSub->getNode())->getZExtValue() == + 0x80000000) { + Ops.push_back(*HiAddSub); + if (AddcSubcNode->getOpcode() == ARMISD::SUBC) { + FinalOpc = ARMISD::SMMLSR; + } else { + FinalOpc = ARMISD::SMMLAR; + } + SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode); + + return SDValue(AddeSubeNode, 0); + } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC) + // SMMLS is generated during instruction selection and the rest of this + // function can not handle the case where AddcSubcNode is a SUBC. + return SDValue(); + + // Finish building the operand list for {U/S}MLAL + Ops.push_back(*LowAddSub); + Ops.push_back(*HiAddSub); + + SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); - DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult); SDValue LoMLALResult(MLALNode.getNode(), 0); - DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); + DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. - return SDValue(AddeNode, 0); + return SDValue(AddeSubeNode, 0); } static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, @@ -9857,8 +10132,22 @@ static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformAddcSubcCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + SelectionDAG &DAG(DCI.DAG); + + if (N->getOpcode() == ARMISD::ADDC) { + // (ADDC (ADDE 0, 0, C), -1) -> C + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS->getOpcode() == ARMISD::ADDE && + isNullConstant(LHS->getOperand(0)) && + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); + } + } + if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { @@ -9875,9 +10164,11 @@ static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformAddeSubeCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) { + SelectionDAG &DAG = DCI.DAG; SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { int64_t imm = C->getSExtValue(); @@ -9895,6 +10186,8 @@ static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG, N->getOperand(0), RHS, N->getOperand(2)); } } + } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) { + return AddCombineTo64bitMLAL(N, DCI, Subtarget); } return SDValue(); } @@ -9907,7 +10200,7 @@ static SDValue PerformADDECombine(SDNode *N, const ARMSubtarget *Subtarget) { // Only ARM and Thumb2 support UMLAL/SMLAL. if (Subtarget->isThumb1Only()) - return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); + return PerformAddeSubeCombine(N, DCI, Subtarget); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); @@ -11876,6 +12169,14 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static const APInt *isPowerOf2Constant(SDValue V) { + ConstantSDNode *C = dyn_cast(V); + if (!C) + return nullptr; + const APInt *CV = &C->getAPIntValue(); + return CV->isPowerOf2() ? CV : nullptr; +} + SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11904,8 +12205,8 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); + if (!AndC) return SDValue(); SDValue X = And->getOperand(0); @@ -11945,7 +12246,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->getAPIntValue().logBase2(); + unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. @@ -12106,8 +12407,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); - case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); + case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -12266,11 +12567,11 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + !F.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || @@ -12821,10 +13122,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: - // These nodes' second result is a boolean - if (Op.getResNo() == 0) - break; - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + // Special cases when we convert a carry to a boolean. + if (Op.getResNo() == 0) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // (ADDE 0, 0, C) will give us a single bit. + if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && + isNullConstant(RHS)) { + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + } + } break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. @@ -13430,6 +13738,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: @@ -13448,9 +13757,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::arm_neon_vst1: @@ -13475,9 +13783,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::arm_ldaex: @@ -13489,9 +13796,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: @@ -13503,9 +13808,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlexd: @@ -13515,9 +13818,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; case Intrinsic::arm_ldaexd: @@ -13527,9 +13828,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; default: @@ -13602,7 +13901,7 @@ Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, case AtomicOrdering::SequentiallyConsistent: if (!Inst->hasAtomicStore()) return nullptr; // Nothing to do - /*FALLTHROUGH*/ + LLVM_FALLTHROUGH; case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: if (Subtarget->preferISHSTBarriers()) @@ -14215,7 +14514,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a791e2ea233f..aa80f9a91956 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -87,6 +87,7 @@ class VectorType; CMOV, // ARM conditional move instructions. SSAT, // Signed saturation + USAT, // Unsigned saturation BCC_i64, @@ -202,6 +203,8 @@ class VectorType; SMLALDX, // Signed multiply accumulate long dual exchange SMLSLD, // Signed multiply subtract long dual SMLSLDX, // Signed multiply subtract long dual exchange + SMMLAR, // Signed multiply long, round and add + SMMLSR, // Signed multiply long, subtract and round // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -470,6 +473,7 @@ class VectorType; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// \brief Returns true if it is beneficial to convert a load of a constant @@ -638,9 +642,11 @@ class VectorType; SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; @@ -690,8 +696,8 @@ class VectorType; SDValue ThisVal) const; bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index a0e2ac4cbc6f..397c9dadb4ac 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -135,3 +135,31 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const { .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()) .add(predOps(ARMCC::AL)); } + +std::pair +ARMInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + const unsigned Mask = ARMII::MO_OPTION_MASK; + return std::make_pair(TF & Mask, TF & ~Mask); +} + +ArrayRef> +ARMInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace ARMII; + + static const std::pair TargetFlags[] = { + {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}}; + return makeArrayRef(TargetFlags); +} + +ArrayRef> +ARMInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace ARMII; + + static const std::pair TargetFlags[] = { + {MO_GOT, "arm-got"}, + {MO_SBREL, "arm-sbrel"}, + {MO_DLLIMPORT, "arm-dllimport"}, + {MO_SECREL, "arm-secrel"}, + {MO_NONLAZY, "arm-nonlazy"}}; + return makeArrayRef(TargetFlags); +} diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index c87fb97448c9..c54c987134df 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -38,6 +38,13 @@ class ARMInstrInfo : public ARMBaseInstrInfo { /// const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const override; + private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4e13af596300..7b8e4b19c128 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -105,6 +105,14 @@ def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; +def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; + +def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; +def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -139,6 +147,8 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -278,6 +288,9 @@ def HasDSP : Predicate<"Subtarget->hasDSP()">, def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate<"FeatureDB", "data-barriers">; +def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">, + AssemblerPredicate<"FeatureDFB", + "full-data-barrier">; def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, AssemblerPredicate<"FeatureV7Clrex", "v7 clrex">; @@ -3832,6 +3845,8 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), (SSAT16 imm1_16:$pos, GPRnopc:$a)>; def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), @@ -4136,7 +4151,8 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, + IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, (i32 0)))]>, Requires<[IsARM, HasV6]>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; @@ -4151,7 +4167,8 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, + IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (ARMsmmlar GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; @@ -4163,7 +4180,8 @@ def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, + IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (ARMsmmlsr GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; @@ -5846,6 +5864,8 @@ include "ARMInstrNEON.td" def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>; def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>; def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>; +// Armv8-R 'Data Full Barrier' +def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>; // System instructions def : MnemonicAlias<"swi", "svc">; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 670ed127da7e..8ca11d83dcf4 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2336,6 +2336,8 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), @@ -2659,7 +2661,9 @@ class T2SMMUL op7_4, string opc, list pattern> } def t2SMMUL : T2SMMUL<0b0000, "smmul", [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>; -def t2SMMULR : T2SMMUL<0b0001, "smmulr", []>; +def t2SMMULR : + T2SMMUL<0b0001, "smmulr", + [(set rGPR:$Rd, (ARMsmmlar rGPR:$Rn, rGPR:$Rm, (i32 0)))]>; class T2FourRegSMMLA op22_20, bits<4> op7_4, string opc, list pattern> @@ -2675,9 +2679,11 @@ class T2FourRegSMMLA op22_20, bits<4> op7_4, string opc, def t2SMMLA : T2FourRegSMMLA<0b101, 0b0000, "smmla", [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>; -def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>; +def t2SMMLAR: T2FourRegSMMLA<0b101, 0b0001, "smmlar", + [(set rGPR:$Rd, (ARMsmmlar rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>; def t2SMMLS: T2FourRegSMMLA<0b110, 0b0000, "smmls", []>; -def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>; +def t2SMMLSR: T2FourRegSMMLA<0b110, 0b0001, "smmlsr", + [(set rGPR:$Rd, (ARMsmmlsr rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>; class T2ThreeRegSMUL op22_20, bits<2> op5_4, string opc, list pattern> @@ -4506,6 +4512,8 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>; def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>; def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>; +// Armv8-R 'Data Full Barrier' +def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 4d286ed619ff..f225ff824195 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -117,33 +117,39 @@ ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, { } -static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - unsigned DstReg = I.getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) - return true; - - const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI); - (void)RegBank; +static const TargetRegisterClass *guessRegClass(unsigned Reg, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + const RegisterBank *RegBank = RBI.getRegBank(Reg, MRI, TRI); assert(RegBank && "Can't get reg bank for virtual register"); - const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); + const unsigned Size = MRI.getType(Reg).getSizeInBits(); assert((RegBank->getID() == ARM::GPRRegBankID || RegBank->getID() == ARM::FPRRegBankID) && "Unsupported reg bank"); - const TargetRegisterClass *RC = &ARM::GPRRegClass; - if (RegBank->getID() == ARM::FPRRegBankID) { - if (DstSize == 32) - RC = &ARM::SPRRegClass; - else if (DstSize == 64) - RC = &ARM::DPRRegClass; + if (Size == 32) + return &ARM::SPRRegClass; + else if (Size == 64) + return &ARM::DPRRegClass; else llvm_unreachable("Unsupported destination size"); } + return &ARM::GPRRegClass; +} + +static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + unsigned DstReg = I.getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + return true; + + const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); + // No need to constrain SrcReg. It will get constrained when // we hit another of its uses or its defs. // Copies do not have constraints. @@ -669,13 +675,14 @@ bool ARMInstructionSelector::select(MachineInstr &I, return true; } + using namespace TargetOpcode; + if (selectImpl(I, CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; - using namespace TargetOpcode; switch (I.getOpcode()) { case G_SEXT: isSExt = true; @@ -741,6 +748,31 @@ bool ARMInstructionSelector::select(MachineInstr &I, const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + if (SrcRegBank.getID() == ARM::FPRRegBankID) { + // This should only happen in the obscure case where we have put a 64-bit + // integer into a D register. Get it out of there and keep only the + // interesting part. + assert(I.getOpcode() == G_TRUNC && "Unsupported operand for G_ANYEXT"); + assert(DstRegBank.getID() == ARM::GPRRegBankID && + "Unsupported combination of register banks"); + assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size"); + assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size"); + + unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); + auto InsertBefore = std::next(I.getIterator()); + auto MovI = + BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD)) + .addDef(DstReg) + .addDef(IgnoredBits) + .addUse(SrcReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; + } + if (SrcRegBank.getID() != DstRegBank.getID()) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; @@ -754,6 +786,54 @@ bool ARMInstructionSelector::select(MachineInstr &I, I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } + case G_CONSTANT: { + if (!MRI.getType(I.getOperand(0).getReg()).isPointer()) { + // Non-pointer constants should be handled by TableGen. + DEBUG(dbgs() << "Unsupported constant type\n"); + return false; + } + + auto &Val = I.getOperand(1); + if (Val.isCImm()) { + if (!Val.getCImm()->isZero()) { + DEBUG(dbgs() << "Unsupported pointer constant value\n"); + return false; + } + Val.ChangeToImmediate(0); + } else { + assert(Val.isImm() && "Unexpected operand for G_CONSTANT"); + if (Val.getImm() != 0) { + DEBUG(dbgs() << "Unsupported pointer constant value\n"); + return false; + } + } + + I.setDesc(TII.get(ARM::MOVi)); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + break; + } + case G_INTTOPTR: + case G_PTRTOINT: { + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() + << "G_INTTOPTR/G_PTRTOINT operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_INTTOPTR/G_PTRTOINT on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { @@ -855,12 +935,23 @@ bool ARMInstructionSelector::select(MachineInstr &I, // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) - .add(predOps(ARMCC::EQ, ARM::CPSR)); + .add(predOps(ARMCC::NE, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); return true; } + case G_PHI: { + I.setDesc(TII.get(PHI)); + + unsigned DstReg = I.getOperand(0).getReg(); + const TargetRegisterClass *RC = guessRegClass(DstReg, MRI, TRI, RBI); + if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { + break; + } + + return true; + } default: return false; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 51eae325c952..a9c1af3b26fc 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -59,7 +59,7 @@ widen_8_16(const LegalizerInfo::SizeAndActionsVec &v) { } static LegalizerInfo::SizeAndActionsVec -widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) { +widen_1_8_16_narrowToLargest(const LegalizerInfo::SizeAndActionsVec &v) { assert(v.size() >= 1); assert(v[0].first > 17); LegalizerInfo::SizeAndActionsVec result = { @@ -68,7 +68,7 @@ widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) { {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; addAndInterleaveWithUnsupported(result, v); auto Largest = result.back().first; - result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + result.push_back({Largest + 1, LegalizerInfo::NarrowScalar}); return result; } @@ -126,6 +126,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Legal); } + setAction({G_INTTOPTR, p0}, Legal); + setAction({G_INTTOPTR, 1, s32}, Legal); + + setAction({G_PTRTOINT, s32}, Legal); + setAction({G_PTRTOINT, 1, p0}, Legal); + for (unsigned Op : {G_ASHR, G_LSHR, G_SHL}) setAction({Op, s32}, Legal); @@ -138,8 +144,15 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_BRCOND, s1}, Legal); + for (auto Ty : {s32, p0}) + setAction({G_PHI, Ty}, Legal); + setLegalizeScalarToDifferentSizeStrategy( + G_PHI, 0, widenToLargerTypesUnsupportedOtherwise); + setAction({G_CONSTANT, s32}, Legal); - setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16); + setAction({G_CONSTANT, p0}, Legal); + setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, + widen_1_8_16_narrowToLargest); setAction({G_ICMP, s1}, Legal); setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, @@ -148,31 +161,62 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_ICMP, 1, Ty}, Legal); if (!ST.useSoftFloat() && ST.hasVFP2()) { - for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (unsigned Op : {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FCONSTANT, G_FNEG}) for (auto Ty : {s32, s64}) - setAction({BinOp, Ty}, Legal); + setAction({Op, Ty}, Legal); setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); + setAction({G_PHI, s64}, Legal); + setAction({G_FCMP, s1}, Legal); setAction({G_FCMP, 1, s32}, Legal); setAction({G_FCMP, 1, s64}, Legal); + + setAction({G_MERGE_VALUES, s64}, Legal); + setAction({G_MERGE_VALUES, 1, s32}, Legal); + setAction({G_UNMERGE_VALUES, s32}, Legal); + setAction({G_UNMERGE_VALUES, 1, s64}, Legal); + + setAction({G_FPEXT, s64}, Legal); + setAction({G_FPEXT, 1, s32}, Legal); + + setAction({G_FPTRUNC, s32}, Legal); + setAction({G_FPTRUNC, 1, s64}, Legal); } else { for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) for (auto Ty : {s32, s64}) setAction({BinOp, Ty}, Libcall); + for (auto Ty : {s32, s64}) { + setAction({G_FNEG, Ty}, Lower); + setAction({G_FCONSTANT, Ty}, Custom); + } + setAction({G_FCMP, s1}, Legal); setAction({G_FCMP, 1, s32}, Custom); setAction({G_FCMP, 1, s64}, Custom); + setAction({G_FPEXT, s64}, Legal); + setAction({G_FPEXT, 1, s32}, Libcall); + + setAction({G_FPTRUNC, s32}, Legal); + setAction({G_FPTRUNC, 1, s64}, Libcall); + if (AEABI(ST)) setFCmpLibcallsAEABI(); else setFCmpLibcallsGNU(); } + if (!ST.useSoftFloat() && ST.hasVFP4()) + for (auto Ty : {s32, s64}) + setAction({G_FMA, Ty}, Legal); + else + for (auto Ty : {s32, s64}) + setAction({G_FMA, Ty}, Libcall); + for (unsigned Op : {G_FREM, G_FPOW}) for (auto Ty : {s32, s64}) setAction({Op, Ty}, Libcall); @@ -293,6 +337,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, using namespace TargetOpcode; MIRBuilder.setInstr(MI); + LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (MI.getOpcode()) { default: @@ -309,7 +354,6 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, // Our divmod libcalls return a struct containing the quotient and the // remainder. We need to create a virtual register for it. - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); Type *ArgTy = Type::getInt32Ty(Ctx); StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); auto RetVal = MRI.createGenericVirtualRegister( @@ -350,7 +394,6 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, return true; } - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); auto *RetTy = Type::getInt32Ty(Ctx); @@ -395,6 +438,14 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, } break; } + case G_FCONSTANT: { + // Convert to integer constants, while preserving the binary representation. + auto AsInteger = + MI.getOperand(1).getFPImm()->getValueAPF().bitcastToAPInt(); + MIRBuilder.buildConstant(MI.getOperand(0).getReg(), + *ConstantInt::get(Ctx, AsInteger)); + break; + } } MI.eraseFromParent(); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index e989c2fce5d5..8b3a2e223796 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1273,7 +1273,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // can still change to a writeback form as that will save us 2 bytes // of code size. It can create WAW hazards though, so only do it if // we're minimizing code size. - if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) + if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill) return false; bool HighRegsUsed = false; @@ -1697,7 +1697,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if (OddReg == EvenReg && EvenDeadKill) { // If the two source operands are the same, the kill marker is // probably on the first one. e.g. - // t2STRDi8 %R5, %R5, %R9, 0, 14, %reg0 + // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; } @@ -1953,7 +1953,7 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; MF = &Fn; @@ -2035,7 +2035,7 @@ INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt", ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) + if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction())) return false; TD = &Fn.getDataLayout(); @@ -2130,9 +2130,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; unsigned Align = (*Op0->memoperands_begin())->getAlignment(); - const Function *Func = MF->getFunction(); + const Function &Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() - ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) + ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext())) : 8; // Pre-v6 need 8-byte align if (Align < ReqAlign) return false; diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 7e4d598a6e0b..cff4a256100d 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -49,7 +49,7 @@ static bool CanMovePastDMB(const MachineInstr *MI) { } bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Vector to store the DMBs we will remove after the first iteration diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index b32bfd449544..0e6073a5c809 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -226,12 +226,30 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_SEXT: case G_ZEXT: case G_ANYEXT: - case G_TRUNC: case G_GEP: + case G_INTTOPTR: + case G_PTRTOINT: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; + case G_TRUNC: { + // In some cases we may end up with a G_TRUNC from a 64-bit value to a + // 32-bit value. This isn't a real floating point trunc (that would be a + // G_FPTRUNC). Instead it is an integer trunc in disguise, which can appear + // because the legalizer doesn't distinguish between integer and floating + // point values so it may leave some 64-bit integers un-narrowed. Until we + // have a more principled solution that doesn't let such things sneak all + // the way to this point, just map the source to a DPR and the destination + // to a GPR. + LLT LargeTy = MRI.getType(MI.getOperand(1).getReg()); + OperandsMapping = + LargeTy.getSizeInBits() <= 32 + ? &ARM::ValueMappings[ARM::GPR3OpsIdx] + : getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); + break; + } case G_LOAD: case G_STORE: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); @@ -245,13 +263,46 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_FADD: case G_FSUB: case G_FMUL: - case G_FDIV: { + case G_FDIV: + case G_FNEG: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); OperandsMapping =Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; } + case G_FMA: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + OperandsMapping = + Ty.getSizeInBits() == 64 + ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}) + : getOperandsMapping({&ARM::ValueMappings[ARM::SPR3OpsIdx], + &ARM::ValueMappings[ARM::SPR3OpsIdx], + &ARM::ValueMappings[ARM::SPR3OpsIdx], + &ARM::ValueMappings[ARM::SPR3OpsIdx]}); + break; + } + case G_FPEXT: { + LLT ToTy = MRI.getType(MI.getOperand(0).getReg()); + LLT FromTy = MRI.getType(MI.getOperand(1).getReg()); + if (ToTy.getSizeInBits() == 64 && FromTy.getSizeInBits() == 32) + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], + &ARM::ValueMappings[ARM::SPR3OpsIdx]}); + break; + } + case G_FPTRUNC: { + LLT ToTy = MRI.getType(MI.getOperand(0).getReg()); + LLT FromTy = MRI.getType(MI.getOperand(1).getReg()); + if (ToTy.getSizeInBits() == 32 && FromTy.getSizeInBits() == 64) + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::SPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); + break; + } case G_CONSTANT: case G_FRAME_INDEX: case G_GLOBAL_VALUE: diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 33dcf9b8fef0..d4fbf76f299f 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -171,7 +171,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. - if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { + if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { return SDValue(); } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e3855cc9a91f..23027e92481f 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -28,10 +28,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -41,8 +38,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetOptions.h" -#include -#include using namespace llvm; @@ -353,11 +348,6 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel.MispredictPenalty; } -bool ARMSubtarget::hasSinCos() const { - return isTargetWatchOS() || - (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0)); -} - bool ARMSubtarget::enableMachineScheduler() const { // Enable the MachineScheduler before register allocation for subtargets // with the use-misched feature. @@ -378,7 +368,7 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. - return isTargetWatchABI() || (isSwift() && !MF.getFunction()->optForMinSize()); + return isTargetWatchABI() || (isSwift() && !MF.getFunction().optForMinSize()); } bool ARMSubtarget::useMovt(const MachineFunction &MF) const { @@ -386,7 +376,7 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction()->optForMinSize() || genExecuteOnly()); + (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 9301197e1387..eedb675a3304 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -236,6 +236,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// instructions. bool HasDataBarrier = false; + /// HasFullDataBarrier - True if the subtarget supports DFB data barrier + /// instruction. + bool HasFullDataBarrier = false; + /// HasV7Clrex - True if the subtarget supports CLREX instructions bool HasV7Clrex = false; @@ -544,6 +548,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasDataBarrier() const { return HasDataBarrier; } + bool hasFullDataBarrier() const { return HasFullDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } bool hasAcquireRelease() const { return HasAcquireRelease; } @@ -712,10 +717,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo { unsigned getMispredictionPenalty() const; - /// This function returns true if the target has sincos() routine in its - /// compiler runtime or math libraries. - bool hasSinCos() const; - /// Returns true if machine scheduler should be enabled. bool enableMachineScheduler() const override; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 007dc2be16e5..9ba286a98d57 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/ExecutionDepsFix.h" +#include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" @@ -75,7 +75,7 @@ EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); namespace llvm { - void initializeARMExecutionDepsFixPass(PassRegistry&); + void initializeARMExecutionDomainFixPass(PassRegistry&); } extern "C" void LLVMInitializeARMTarget() { @@ -90,8 +90,9 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMConstantIslandsPass(Registry); - initializeARMExecutionDepsFixPass(Registry); + initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); + initializeThumb2SizeReducePass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -282,10 +283,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(ARMTTIImpl(this, F)); - }); +TargetTransformInfo +ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(ARMTTIImpl(this, F)); } ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, @@ -355,20 +355,23 @@ class ARMPassConfig : public TargetPassConfig { void addPreEmitPass() override; }; -class ARMExecutionDepsFix : public ExecutionDepsFix { +class ARMExecutionDomainFix : public ExecutionDomainFix { public: static char ID; - ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {} + ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {} StringRef getPassName() const override { - return "ARM Execution Dependency Fix"; + return "ARM Execution Domain Fix"; } }; -char ARMExecutionDepsFix::ID; +char ARMExecutionDomainFix::ID; } // end anonymous namespace -INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", - "ARM Execution Dependency Fix", false, false) +INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix", + "ARM Execution Domain Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix", + "ARM Execution Domain Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(*this, PM); @@ -385,7 +388,7 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - 1, false, false, true, [this](const Function &F) { + 1, false, false, true, true, [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); @@ -462,7 +465,8 @@ void ARMPassConfig::addPreSched2() { if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); - addPass(new ARMExecutionDepsFix()); + addPass(new ARMExecutionDomainFix()); + addPass(createBreakFalseDeps()); } // Expand some pseudo instructions into multiple instructions to allow diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 655ec3202bfb..2072bb731f0a 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -53,8 +53,7 @@ class ARMBaseTargetMachine : public LLVMTargetMachine { const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } - /// \brief Get the TargetIRAnalysis for this target. - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index cae01e415eff..43d7888075b5 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -394,25 +394,6 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, return 1; } -int ARMTTIImpl::getFPOpCost(Type *Ty) { - // Use similar logic that's in ARMISelLowering: - // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access - // to VFP. - - if (ST->hasVFP2() && !ST->isThumb1Only()) { - if (Ty->isFloatTy()) { - return TargetTransformInfo::TCC_Basic; - } - - if (Ty->isDoubleTy()) { - return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive : - TargetTransformInfo::TCC_Basic; - } - } - - return TargetTransformInfo::TCC_Expensive; -} - int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 99353a3219a0..cd9fa0709020 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -156,8 +156,6 @@ class ARMTTIImpl : public BasicTTIImplBase { int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); - int getFPOpCost(Type *Ty); - int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 26fda5f22b4f..55a73ff537cd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1150,10 +1150,30 @@ class ARMOperand : public MCParsedAsmOperand { bool isToken() const override { return Kind == k_Token; } bool isMemBarrierOpt() const { return Kind == k_MemBarrierOpt; } bool isInstSyncBarrierOpt() const { return Kind == k_InstSyncBarrierOpt; } - bool isMem() const override { return Kind == k_Memory; } + bool isMem() const override { + if (Kind != k_Memory) + return false; + if (Memory.BaseRegNum && + !ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Memory.BaseRegNum)) + return false; + if (Memory.OffsetRegNum && + !ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Memory.OffsetRegNum)) + return false; + return true; + } bool isShifterImm() const { return Kind == k_ShifterImmediate; } - bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } - bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } + bool isRegShiftedReg() const { + return Kind == k_ShiftedRegister && + ARMMCRegisterClasses[ARM::GPRRegClassID].contains( + RegShiftedReg.SrcReg) && + ARMMCRegisterClasses[ARM::GPRRegClassID].contains( + RegShiftedReg.ShiftReg); + } + bool isRegShiftedImm() const { + return Kind == k_ShiftedImmediate && + ARMMCRegisterClasses[ARM::GPRRegClassID].contains( + RegShiftedImm.SrcReg); + } bool isRotImm() const { return Kind == k_RotateImmediate; } bool isModImm() const { return Kind == k_ModifiedImmediate; } @@ -1192,9 +1212,12 @@ class ARMOperand : public MCParsedAsmOperand { bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } - bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } + bool isPostIdxRegShifted() const { + return Kind == k_PostIndexRegister && + ARMMCRegisterClasses[ARM::GPRRegClassID].contains(PostIdxReg.RegNum); + } bool isPostIdxReg() const { - return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; + return isPostIdxRegShifted() && PostIdxReg.ShiftTy == ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const { if (!isMem()) @@ -1331,10 +1354,10 @@ class ARMOperand : public MCParsedAsmOperand { } bool isAM3Offset() const { - if (Kind != k_Immediate && Kind != k_PostIndexRegister) + if (isPostIdxReg()) + return true; + if (!isImm()) return false; - if (Kind == k_PostIndexRegister) - return PostIdxReg.ShiftTy == ARM_AM::no_shift; // Immediate offset in range [-255, 255]. const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; @@ -5581,11 +5604,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, CanAcceptPredicationCode = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && - Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && - Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && - Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" && - Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") && - !Mnemonic.startswith("srs"); + Mnemonic != "dmb" && Mnemonic != "dfb" && Mnemonic != "dsb" && + Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" && + Mnemonic != "pldw" && Mnemonic != "ldc2" && Mnemonic != "ldc2l" && + Mnemonic != "stc2" && Mnemonic != "stc2l" && + !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); } else if (isThumbOne()) { if (hasV6MOps()) CanAcceptPredicationCode = Mnemonic != "movs"; @@ -6227,7 +6250,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, // The instruction must be predicable. if (!MCID.isPredicable()) return Error(Loc, "instructions in IT block must be predicable"); - unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm(); + ARMCC::CondCodes Cond = ARMCC::CondCodes( + Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm()); if (Cond != currentITCond()) { // Find the condition code Operand to get its SMLoc information. SMLoc CondLoc; @@ -6235,9 +6259,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, if (static_cast(*Operands[I]).isCondCode()) CondLoc = Operands[I]->getStartLoc(); return Error(CondLoc, "incorrect condition in IT block; got '" + - StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + - "', but expected '" + - ARMCondCodeToString(ARMCC::CondCodes(currentITCond())) + "'"); + StringRef(ARMCondCodeToString(Cond)) + + "', but expected '" + + ARMCondCodeToString(currentITCond()) + "'"); } // Check for non-'al' condition codes outside of the IT block. } else if (isThumbTwo() && MCID.isPredicable() && diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index a29a2eeccfe8..53c635877675 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2386,6 +2386,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, case ARM::VLD4q32_UPD: if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) return MCDisassembler::Fail; + break; default: break; } @@ -3326,6 +3327,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, case ARM::t2STRs: if (Rn == 15) return MCDisassembler::Fail; + break; default: break; } @@ -3391,6 +3393,7 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, break; case ARM::t2LDRSBs: Inst.setOpcode(ARM::t2PLIs); + break; default: break; } @@ -3854,6 +3857,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, case ARM::t2STRHi12: if (Rn == 15) return MCDisassembler::Fail; + break; default: break; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 97a27ece0d4f..ff507ab7162f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -12,7 +12,6 @@ #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" -#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" @@ -25,7 +24,6 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" @@ -174,8 +172,8 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { } unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const { - bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2]; - bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps]; + bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2]; + bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps]; switch (Op) { default: @@ -391,7 +389,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) + if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -403,7 +401,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return Value; } case ARM::fixup_t2_movt_hi16: - if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) + if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -593,7 +591,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, case ARM::fixup_arm_thumb_cp: // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we // could have an error on our hands. - if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { + if (!STI.getFeatureBits()[ARM::FeatureThumb2] && IsResolved) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); @@ -617,8 +615,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, } case ARM::fixup_arm_thumb_br: // Offset by 4 and don't encode the lower bit, which is always 0. - if (!STI->getFeatureBits()[ARM::FeatureThumb2] && - !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) { + if (!STI.getFeatureBits()[ARM::FeatureThumb2] && + !STI.getFeatureBits()[ARM::HasV8MBaselineOps]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); @@ -628,7 +626,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return ((Value - 4) >> 1) & 0x7ff; case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. - if (!STI->getFeatureBits()[ARM::FeatureThumb2]) { + if (!STI.getFeatureBits()[ARM::FeatureThumb2]) { const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value); if (FixupDiagnostic) { Ctx.reportError(Fixup.getLoc(), FixupDiagnostic); @@ -1156,51 +1154,52 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) { } MCAsmBackend *llvm::createARMAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TheTriple, StringRef CPU, const MCTargetOptions &Options, bool isLittle) { + const Triple &TheTriple = STI.getTargetTriple(); switch (TheTriple.getObjectFormat()) { default: llvm_unreachable("unsupported object format"); case Triple::MachO: { MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName()); - return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS); + return new ARMAsmBackendDarwin(T, STI, MRI, CS); } case Triple::COFF: assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); - return new ARMAsmBackendWinCOFF(T, TheTriple); + return new ARMAsmBackendWinCOFF(T, STI); case Triple::ELF: assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle); + return new ARMAsmBackendELF(T, STI, OSABI, isLittle); } } MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return createARMAsmBackend(T, MRI, TT, CPU, Options, true); + return createARMAsmBackend(T, STI, MRI, Options, true); } MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return createARMAsmBackend(T, MRI, TT, CPU, Options, false); + return createARMAsmBackend(T, STI, MRI, Options, false); } MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return createARMAsmBackend(T, MRI, TT, CPU, Options, true); + return createARMAsmBackend(T, STI, MRI, Options, true); } MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return createARMAsmBackend(T, MRI, TT, CPU, Options, false); + return createARMAsmBackend(T, STI, MRI, Options, false); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 02374966dafe..c8527e5cca20 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -19,22 +19,20 @@ namespace llvm { class ARMAsmBackend : public MCAsmBackend { - const MCSubtargetInfo *STI; + const MCSubtargetInfo &STI; bool isThumbMode; // Currently emitting Thumb code. bool IsLittleEndian; // Big or little endian. public: - ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle) - : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), - isThumbMode(TT.getArchName().startswith("thumb")), + ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI, bool IsLittle) + : MCAsmBackend(), STI(STI), + isThumbMode(STI.getTargetTriple().isThumb()), IsLittleEndian(IsLittle) {} - ~ARMAsmBackend() override { delete STI; } - unsigned getNumFixupKinds() const override { return ARM::NumTargetFixupKinds; } - bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; } + bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index f05e3a6f1160..19e3fdb72046 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -19,10 +19,10 @@ class ARMAsmBackendDarwin : public ARMAsmBackend { const MCRegisterInfo &MRI; public: const MachO::CPUSubTypeARM Subtype; - ARMAsmBackendDarwin(const Target &T, const Triple &TT, + ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st) - : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) { - } + : ARMAsmBackend(T, STI, /* IsLittleEndian */ true), MRI(MRI), + Subtype(st) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index d0f5419a1b0f..361ea3040847 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -20,9 +20,9 @@ namespace { class ARMAsmBackendELF : public ARMAsmBackend { public: uint8_t OSABI; - ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI, + ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI, bool IsLittle) - : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} + : ARMAsmBackend(T, STI, IsLittle), OSABI(OSABI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 53b9c29446a3..0ac6d4270aac 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -17,8 +17,8 @@ using namespace llvm; namespace { class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: - ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple) - : ARMAsmBackend(T, TheTriple, true) {} + ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI) + : ARMAsmBackend(T, STI, true) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 8cfa18f58b61..9d73c7629dae 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -512,9 +512,11 @@ class ARMELFStreamer : public MCELFStreamer { assert(IsThumb); EmitThumbMappingSymbol(); + // Thumb wide instructions are emitted as a pair of 16-bit words of the + // appropriate endianness. for (unsigned II = 0, IE = Size; II != IE; II = II + 2) { - const unsigned I0 = LittleEndian ? II + 0 : (Size - II - 1); - const unsigned I1 = LittleEndian ? II + 1 : (Size - II - 2); + const unsigned I0 = LittleEndian ? II + 0 : II + 1; + const unsigned I1 = LittleEndian ? II + 1 : II + 0; Buffer[Size - II - 2] = uint8_t(Inst >> I0 * CHAR_BIT); Buffer[Size - II - 1] = uint8_t(Inst >> I1 * CHAR_BIT); } @@ -847,6 +849,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; + case ARM::ArchKind::ARMV7EM: case ARM::ArchKind::ARMV7M: setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2063ca6bdf3b..306f068312f5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "ARMMCExpr.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 0fb97e5fee97..df9874c78d07 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -68,27 +68,27 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createARMAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options, bool IsLittleEndian); -MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); -MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); MCAsmBackend *createThumbLEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); MCAsmBackend *createThumbBEAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); // Construct a PE/COFF machine code streamer which will generate a PE/COFF diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 5516a1bdb03d..6259c98321f4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -10,7 +10,6 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm-c/Disassembler.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 00c41c403f6a..153e7b1e2197 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -371,7 +371,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; TII = static_cast(Fn.getSubtarget().getInstrInfo()); diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index ba00b3d79da9..a65e22fd86e8 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -611,6 +611,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, unsigned TemporaryReg = 0; BitVector PopFriendly = TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); + // R7 may be used as a frame pointer, hence marked as not generally + // allocatable, however there's no reason to not use it as a temporary for + // restoring LR. + if (STI.useR7AsFramePointer()) + PopFriendly.set(ARM::R7); + assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. @@ -622,17 +628,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, GPRsNoLRSP.reset(ARM::PC); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); - // If we couldn't find a pop-friendly register, restore LR before popping the - // other callee-saved registers, so we can use one of them as a temporary. + // If we couldn't find a pop-friendly register, try restoring LR before + // popping the other callee-saved registers, so we could use one of them as a + // temporary. bool UseLDRSP = false; if (!PopReg && MBBI != MBB.begin()) { auto PrevMBBI = MBBI; PrevMBBI--; if (PrevMBBI->getOpcode() == ARM::tPOP) { - MBBI = PrevMBBI; - UsedRegs.stepBackward(*MBBI); + UsedRegs.stepBackward(*PrevMBBI); findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); - UseLDRSP = true; + if (PopReg) { + MBBI = PrevMBBI; + UseLDRSP = true; + } } } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 3a3920a2db32..49645834e2de 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index a0b98a431085..5357e26856ea 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -45,6 +45,7 @@ using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" +#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); @@ -162,7 +163,7 @@ namespace { const Thumb2InstrInfo *TII; const ARMSubtarget *STI; - Thumb2SizeReduce(std::function Ftor); + Thumb2SizeReduce(std::function Ftor = nullptr); bool runOnMachineFunction(MachineFunction &MF) override; @@ -172,7 +173,7 @@ namespace { } StringRef getPassName() const override { - return "Thumb2 instruction size reduction pass"; + return THUMB2_SIZE_REDUCE_NAME; } private: @@ -237,6 +238,9 @@ namespace { } // end anonymous namespace +INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, + false) + Thumb2SizeReduce::Thumb2SizeReduce(std::function Ftor) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { OptimizeSize = MinimizeSize = false; @@ -449,7 +453,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { - if (!MBB.getParent()->getFunction()->optForMinSize()) + if (!MBB.getParent()->getFunction().optForMinSize()) return false; if (!MI->hasOneMemOperand() || @@ -1084,7 +1088,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { - if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + if (PredicateFtor && !PredicateFtor(MF.getFunction())) return false; STI = &static_cast(MF.getSubtarget()); @@ -1094,8 +1098,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(STI->getInstrInfo()); // Optimizing / minimizing size? Minimizing size implies optimizing for size. - OptimizeSize = MF.getFunction()->optForSize(); - MinimizeSize = MF.getFunction()->optForMinSize(); + OptimizeSize = MF.getFunction().optForSize(); + MinimizeSize = MF.getFunction().optForMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index d2bebb9eeeca..d190edf5913c 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -70,7 +70,7 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) @@ -89,7 +89,7 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/lib/Target/ARM/Utils/ARMBaseInfo.cpp index 3da1b0520cd6..534f78c6d4d2 100644 --- a/lib/Target/ARM/Utils/ARMBaseInfo.cpp +++ b/lib/Target/ARM/Utils/ARMBaseInfo.cpp @@ -13,8 +13,6 @@ #include "ARMBaseInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Regex.h" using namespace llvm; namespace llvm { diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index 5101cf586f11..3b7322365772 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -53,7 +53,7 @@ bool AVRFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { void AVRFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); - CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + CallingConv::ID CallConv = MF.getFunction().getCallingConv(); DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); @@ -143,7 +143,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, void AVRFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + CallingConv::ID CallConv = MF.getFunction().getCallingConv(); bool isHandler = (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL); diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 890379d5639f..d9e27e91405c 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -44,6 +44,7 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setBooleanVectorContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(AVR::SP); + setSupportsUnalignedAtomics(true); setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::BlockAddress, MVT::i16, Custom); @@ -1038,7 +1039,7 @@ SDValue AVRTargetLowering::LowerFormalArguments( CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); - analyzeArguments(nullptr, MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo, + analyzeArguments(nullptr, &MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo, false, isVarArg); SDValue ArgValue; @@ -1390,7 +1391,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Don't emit the ret/reti instruction when the naked attribute is present in // the function being compiled. - if (MF.getFunction()->getAttributes().hasAttribute( + if (MF.getFunction().getAttributes().hasAttribute( AttributeList::FunctionIndex, Attribute::Naked)) { return Chain; } diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index b6ac93452cb1..d171a620760e 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -34,7 +34,7 @@ AVRRegisterInfo::AVRRegisterInfo() : AVRGenRegisterInfo(0) {} const uint16_t * AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - CallingConv::ID CC = MF->getFunction()->getCallingConv(); + CallingConv::ID CC = MF->getFunction().getCallingConv(); return ((CC == CallingConv::AVR_INTR || CC == CallingConv::AVR_SIGNAL) ? CSR_Interrupts_SaveList diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 2e1adcc6a4fa..b527ad3e0b14 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -9,6 +9,7 @@ #include "AVR.h" #include "AVRRegisterInfo.h" +#include "MCTargetDesc/AVRMCELFStreamer.h" #include "MCTargetDesc/AVRMCExpr.h" #include "MCTargetDesc/AVRMCTargetDesc.h" @@ -40,6 +41,7 @@ class AVRAsmParser : public MCTargetAsmParser { const MCSubtargetInfo &STI; MCAsmParser &Parser; const MCRegisterInfo *MRI; + const std::string GENERATE_STUBS = "gs"; #define GET_ASSEMBLER_HEADER #include "AVRGenAsmMatcher.inc" @@ -54,7 +56,7 @@ class AVRAsmParser : public MCTargetAsmParser { bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) override; - bool ParseDirective(AsmToken directiveID) override; + bool ParseDirective(AsmToken DirectiveID) override; OperandMatchResultTy parseMemriOperand(OperandVector &Operands); @@ -80,6 +82,8 @@ class AVRAsmParser : public MCTargetAsmParser { uint64_t const &ErrorInfo); bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo); + bool parseLiteralValues(unsigned SizeInBytes, SMLoc L); + public: AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) @@ -404,11 +408,14 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) { size_t ReadCount = Parser.getLexer().peekTokens(tokens); if (ReadCount == 2) { - if (tokens[0].getKind() == AsmToken::Identifier && - tokens[1].getKind() == AsmToken::LParen) { + if ((tokens[0].getKind() == AsmToken::Identifier && + tokens[1].getKind() == AsmToken::LParen) || + (tokens[0].getKind() == AsmToken::LParen && + tokens[1].getKind() == AsmToken::Minus)) { AsmToken::TokenKind CurTok = Parser.getLexer().getKind(); - if (CurTok == AsmToken::Minus) { + if (CurTok == AsmToken::Minus || + tokens[1].getKind() == AsmToken::Minus) { isNegated = true; } else { assert(CurTok == AsmToken::Plus); @@ -416,7 +423,8 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) { } // Eat the sign - Parser.Lex(); + if (CurTok == AsmToken::Minus || CurTok == AsmToken::Plus) + Parser.Lex(); } } @@ -432,14 +440,34 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) { if (ModifierKind != AVRMCExpr::VK_AVR_None) { Parser.Lex(); Parser.Lex(); // Eat modifier name and parenthesis + if (Parser.getTok().getString() == GENERATE_STUBS && + Parser.getTok().getKind() == AsmToken::Identifier) { + std::string GSModName = ModifierName.str() + "_" + GENERATE_STUBS; + ModifierKind = AVRMCExpr::getKindByName(GSModName.c_str()); + if (ModifierKind != AVRMCExpr::VK_AVR_None) + Parser.Lex(); // Eat gs modifier name + } } else { return Error(Parser.getTok().getLoc(), "unknown modifier"); } + if (tokens[1].getKind() == AsmToken::Minus || + tokens[1].getKind() == AsmToken::Plus) { + Parser.Lex(); + assert(Parser.getTok().getKind() == AsmToken::LParen); + Parser.Lex(); // Eat the sign and parenthesis + } + MCExpr const *InnerExpression; if (getParser().parseExpression(InnerExpression)) return true; + if (tokens[1].getKind() == AsmToken::Minus || + tokens[1].getKind() == AsmToken::Plus) { + assert(Parser.getTok().getKind() == AsmToken::RParen); + Parser.Lex(); // Eat closing parenthesis + } + // If we have a modifier wrap the inner expression assert(Parser.getTok().getKind() == AsmToken::RParen); Parser.Lex(); // Eat closing parenthesis @@ -580,7 +608,59 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info, return false; } -bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) { return true; } +bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal.lower() == ".long") { + parseLiteralValues(SIZE_LONG, DirectiveID.getLoc()); + } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") { + parseLiteralValues(SIZE_WORD, DirectiveID.getLoc()); + } else if (IDVal.lower() == ".byte") { + parseLiteralValues(1, DirectiveID.getLoc()); + } + return true; +} + +bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) { + MCAsmParser &Parser = getParser(); + AVRMCELFStreamer &AVRStreamer = + static_cast(Parser.getStreamer()); + AsmToken Tokens[2]; + size_t ReadCount = Parser.getLexer().peekTokens(Tokens); + if (ReadCount == 2 && Parser.getTok().getKind() == AsmToken::Identifier && + Tokens[0].getKind() == AsmToken::Minus && + Tokens[1].getKind() == AsmToken::Identifier) { + MCSymbol *Symbol = getContext().getOrCreateSymbol(".text"); + AVRStreamer.EmitValueForModiferKind(Symbol, SizeInBytes, L, + AVRMCExpr::VK_AVR_None); + return false; + } + + if (Parser.getTok().getKind() == AsmToken::Identifier && + Parser.getLexer().peekTok().getKind() == AsmToken::LParen) { + StringRef ModifierName = Parser.getTok().getString(); + AVRMCExpr::VariantKind ModifierKind = + AVRMCExpr::getKindByName(ModifierName.str().c_str()); + if (ModifierKind != AVRMCExpr::VK_AVR_None) { + Parser.Lex(); + Parser.Lex(); // Eat the modifier and parenthesis + } else { + return Error(Parser.getTok().getLoc(), "unknown modifier"); + } + MCSymbol *Symbol = + getContext().getOrCreateSymbol(Parser.getTok().getString()); + AVRStreamer.EmitValueForModiferKind(Symbol, SizeInBytes, L, ModifierKind); + return false; + } + + auto parseOne = [&]() -> bool { + const MCExpr *Value; + if (Parser.parseExpression(Value)) + return true; + Parser.getStreamer().EmitValue(Value, SizeInBytes, L); + return false; + }; + return (parseMany(parseOne)); +} extern "C" void LLVMInitializeAVRAsmParser() { RegisterMCAsmParser X(getTheAVRTarget()); diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 01a09610118c..2f5e9f02e53c 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -265,15 +265,19 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, adjust::ldi::fixup(Size, Fixup, Value, Ctx); break; case AVR::fixup_lo8_ldi: + adjust::ldi::lo8(Size, Fixup, Value, Ctx); + break; case AVR::fixup_lo8_ldi_pm: - if (Kind == AVR::fixup_lo8_ldi_pm) adjust::pm(Value); - + case AVR::fixup_lo8_ldi_gs: + adjust::pm(Value); adjust::ldi::lo8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hi8_ldi: + adjust::ldi::hi8(Size, Fixup, Value, Ctx); + break; case AVR::fixup_hi8_ldi_pm: - if (Kind == AVR::fixup_hi8_ldi_pm) adjust::pm(Value); - + case AVR::fixup_hi8_ldi_gs: + adjust::pm(Value); adjust::ldi::hi8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hh8_ldi: @@ -316,6 +320,13 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, Value &= 0xffff; break; + case AVR::fixup_16_pm: + Value >>= 1; // Flash addresses are always shifted. + adjust::unsigned_width(16, Value, std::string("port number"), Fixup, Ctx); + + Value &= 0xffff; + break; + case AVR::fixup_6_adiw: adjust::fixup_6_adiw(Fixup, Value, Ctx); break; @@ -329,6 +340,7 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, break; // Fixups which do not require adjustments. + case FK_Data_1: case FK_Data_2: case FK_Data_4: case FK_Data_8: @@ -422,8 +434,9 @@ MCFixupKindInfo const &AVRAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_8_hi8", 0, 8, 0}, {"fixup_8_hlo8", 0, 8, 0}, - {"fixup_sym_diff", 0, 32, 0}, - {"fixup_16_ldst", 0, 16, 0}, + {"fixup_diff8", 0, 8, 0}, + {"fixup_diff16", 0, 16, 0}, + {"fixup_diff32", 0, 32, 0}, {"fixup_lds_sts_16", 0, 16, 0}, @@ -463,10 +476,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, } } -MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const llvm::MCTargetOptions &TO) { - return new AVRAsmBackend(TT.getOS()); + return new AVRAsmBackend(STI.getTargetTriple().getOS()); } } // end of namespace llvm diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp index 25da75e63feb..412f66fbcf22 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp @@ -40,12 +40,43 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); switch ((unsigned) Fixup.getKind()) { case FK_Data_1: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_AVR_8; + case MCSymbolRefExpr::VK_AVR_DIFF8: + return ELF::R_AVR_DIFF8; + case MCSymbolRefExpr::VK_AVR_LO8: + return ELF::R_AVR_8_LO8; + case MCSymbolRefExpr::VK_AVR_HI8: + return ELF::R_AVR_8_HI8; + case MCSymbolRefExpr::VK_AVR_HLO8: + return ELF::R_AVR_8_HLO8; + } case FK_Data_4: - llvm_unreachable("unsupported relocation type"); + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_AVR_32; + case MCSymbolRefExpr::VK_AVR_DIFF32: + return ELF::R_AVR_DIFF32; + } case FK_Data_2: - return ELF::R_AVR_16_PM; + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_AVR_16; + case MCSymbolRefExpr::VK_AVR_NONE: + return ELF::R_AVR_16_PM; + case MCSymbolRefExpr::VK_AVR_DIFF16: + return ELF::R_AVR_DIFF16; + } case AVR::fixup_32: return ELF::R_AVR_32; case AVR::fixup_7_pcrel: @@ -104,10 +135,12 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AVR_8_HI8; case AVR::fixup_8_hlo8: return ELF::R_AVR_8_HLO8; - case AVR::fixup_sym_diff: - return ELF::R_AVR_SYM_DIFF; - case AVR::fixup_16_ldst: - return ELF::R_AVR_16_LDST; + case AVR::fixup_diff8: + return ELF::R_AVR_DIFF8; + case AVR::fixup_diff16: + return ELF::R_AVR_DIFF16; + case AVR::fixup_diff32: + return ELF::R_AVR_DIFF32; case AVR::fixup_lds_sts_16: return ELF::R_AVR_LDS_STS_16; case AVR::fixup_port6: diff --git a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h index d3bd52d343fc..cdb0b215bc60 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h +++ b/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h @@ -115,10 +115,9 @@ enum Fixups { fixup_8_hi8, fixup_8_hlo8, - /// Fixup to calculate the difference between two symbols. - /// Is the only stateful fixup. We do not support it yet. - fixup_sym_diff, - fixup_16_ldst, + fixup_diff8, + fixup_diff16, + fixup_diff32, fixup_lds_sts_16, diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp new file mode 100644 index 000000000000..c60ea7a92e6f --- /dev/null +++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp @@ -0,0 +1,51 @@ +//===--------- AVRMCELFStreamer.cpp - AVR subclass of MCELFStreamer -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a stub that parses a MCInst bundle and passes the +// instructions on to the real streamer. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "avrmcelfstreamer" + +#include "MCTargetDesc/AVRMCELFStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +void AVRMCELFStreamer::EmitValueForModiferKind( + const MCSymbol *Sym, unsigned SizeInBytes, SMLoc Loc, + AVRMCExpr::VariantKind ModifierKind) { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_AVR_NONE; + if (ModifierKind == AVRMCExpr::VK_AVR_None) { + Kind = MCSymbolRefExpr::VK_AVR_DIFF8; + if (SizeInBytes == SIZE_LONG) + Kind = MCSymbolRefExpr::VK_AVR_DIFF32; + else if (SizeInBytes == SIZE_WORD) + Kind = MCSymbolRefExpr::VK_AVR_DIFF16; + } else if (ModifierKind == AVRMCExpr::VK_AVR_LO8) + Kind = MCSymbolRefExpr::VK_AVR_LO8; + else if (ModifierKind == AVRMCExpr::VK_AVR_HI8) + Kind = MCSymbolRefExpr::VK_AVR_HI8; + else if (ModifierKind == AVRMCExpr::VK_AVR_HH8) + Kind = MCSymbolRefExpr::VK_AVR_HLO8; + MCELFStreamer::EmitValue(MCSymbolRefExpr::create(Sym, Kind, getContext()), + SizeInBytes, Loc); +} + +namespace llvm { +MCStreamer *createAVRELFStreamer(Triple const &TT, MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr CE) { + return new AVRMCELFStreamer(Context, std::move(MAB), OS, std::move(CE)); +} + +} // end namespace llvm diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h new file mode 100644 index 000000000000..398b409f4586 --- /dev/null +++ b/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h @@ -0,0 +1,54 @@ +//===--------- AVRMCELFStreamer.h - AVR subclass of MCELFStreamer ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AVR_MCTARGETDESC_AVRMCELFSTREAMER_H +#define LLVM_LIB_TARGET_AVR_MCTARGETDESC_AVRMCELFSTREAMER_H + +#include "MCTargetDesc/AVRMCExpr.h" +#include "MCTargetDesc/AVRMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCInstrInfo.h" + +namespace llvm { + +const int SIZE_LONG = 4; +const int SIZE_WORD = 2; + +class AVRMCELFStreamer : public MCELFStreamer { + std::unique_ptr MCII; + +public: + AVRMCELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + MCII(createAVRMCInstrInfo()) {} + + AVRMCELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter, + MCAssembler *Assembler) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + MCII(createAVRMCInstrInfo()) {} + + void EmitValueForModiferKind( + const MCSymbol *Sym, unsigned SizeInBytes, SMLoc Loc = SMLoc(), + AVRMCExpr::VariantKind ModifierKind = AVRMCExpr::VK_AVR_None); +}; + +MCStreamer *createAVRELFStreamer(Triple const &TT, MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr CE); + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AVR_MCTARGETDESC_AVRMCELFSTREAMER_H diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp index 085afd23a83c..d4a67973af7f 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp @@ -29,6 +29,9 @@ const struct ModifierEntry { {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8}, {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8}, {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8}, + + {"lo8_gs", AVRMCExpr::VK_AVR_LO8_GS}, {"hi8_gs", AVRMCExpr::VK_AVR_HI8_GS}, + {"gs", AVRMCExpr::VK_AVR_GS}, }; } // end of anonymous namespace @@ -99,24 +102,38 @@ int64_t AVRMCExpr::evaluateAsInt64(int64_t Value) const { switch (Kind) { case AVRMCExpr::VK_AVR_LO8: + Value &= 0xff; break; case AVRMCExpr::VK_AVR_HI8: + Value &= 0xff00; Value >>= 8; break; case AVRMCExpr::VK_AVR_HH8: + Value &= 0xff0000; Value >>= 16; break; case AVRMCExpr::VK_AVR_HHI8: + Value &= 0xff000000; Value >>= 24; break; case AVRMCExpr::VK_AVR_PM_LO8: - Value >>= 1; + case AVRMCExpr::VK_AVR_LO8_GS: + Value >>= 1; // Program memory addresses must always be shifted by one. + Value &= 0xff; break; case AVRMCExpr::VK_AVR_PM_HI8: - Value >>= 9; + case AVRMCExpr::VK_AVR_HI8_GS: + Value >>= 1; // Program memory addresses must always be shifted by one. + Value &= 0xff00; + Value >>= 8; break; case AVRMCExpr::VK_AVR_PM_HH8: - Value >>= 17; + Value >>= 1; // Program memory addresses must always be shifted by one. + Value &= 0xff0000; + Value >>= 16; + break; + case AVRMCExpr::VK_AVR_GS: + Value >>= 1; // Program memory addresses must always be shifted by one. break; case AVRMCExpr::VK_AVR_None: @@ -151,6 +168,15 @@ AVR::Fixups AVRMCExpr::getFixupKind() const { case VK_AVR_PM_HH8: Kind = isNegated() ? AVR::fixup_hh8_ldi_pm_neg : AVR::fixup_hh8_ldi_pm; break; + case VK_AVR_GS: + Kind = AVR::fixup_16_pm; + break; + case VK_AVR_LO8_GS: + Kind = AVR::fixup_lo8_ldi_gs; + break; + case VK_AVR_HI8_GS: + Kind = AVR::fixup_hi8_ldi_gs; + break; case VK_AVR_None: llvm_unreachable("Uninitialized expression"); diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h index be565a8be340..a166b0946749 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h +++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h @@ -30,7 +30,11 @@ class AVRMCExpr : public MCTargetExpr { VK_AVR_PM_LO8, ///< Corresponds to `pm_lo8()`. VK_AVR_PM_HI8, ///< Corresponds to `pm_hi8()`. - VK_AVR_PM_HH8 ///< Corresponds to `pm_hh8()`. + VK_AVR_PM_HH8, ///< Corresponds to `pm_hh8()`. + + VK_AVR_LO8_GS, ///< Corresponds to `lo8(gs())`. + VK_AVR_HI8_GS, ///< Corresponds to `hi8(gs())`. + VK_AVR_GS, ///< Corresponds to `gs()`. }; public: diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index bccce5d307e1..dd65a4312077 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -11,12 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "AVRMCTargetDesc.h" #include "AVRELFStreamer.h" #include "AVRMCAsmInfo.h" +#include "AVRMCELFStreamer.h" +#include "AVRMCTargetDesc.h" #include "AVRTargetStreamer.h" #include "InstPrinter/AVRInstPrinter.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCInstrInfo.h" @@ -35,7 +37,7 @@ using namespace llvm; -static MCInstrInfo *createAVRMCInstrInfo() { +MCInstrInfo *llvm::createAVRMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitAVRMCInstrInfo(X); @@ -108,7 +110,7 @@ extern "C" void LLVMInitializeAVRTargetMC() { // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(), createAVRMCCodeEmitter); - // Register the ELF streamer + // Register the obj streamer TargetRegistry::RegisterELFStreamer(getTheAVRTarget(), createMCStreamer); // Register the obj target streamer. diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h index 8053b8d389fc..fcfd8cf82292 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h @@ -26,6 +26,7 @@ class MCContext; class MCInstrInfo; class MCObjectWriter; class MCRegisterInfo; +class MCSubtargetInfo; class MCTargetOptions; class StringRef; class Target; @@ -34,14 +35,16 @@ class raw_pwrite_stream; Target &getTheAVRTarget(); +MCInstrInfo *createAVRMCInstrInfo(); + /// Creates a machine code emitter for AVR. MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); /// Creates an assembly backend for AVR. -MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const llvm::MCTargetOptions &TO); /// Creates an ELF object writer for AVR. diff --git a/lib/Target/AVR/MCTargetDesc/CMakeLists.txt b/lib/Target/AVR/MCTargetDesc/CMakeLists.txt index 5bad6494c8a9..eb7fe956cf34 100644 --- a/lib/Target/AVR/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AVR/MCTargetDesc/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAVRDesc AVRELFStreamer.cpp AVRMCAsmInfo.cpp AVRMCCodeEmitter.cpp + AVRMCELFStreamer.cpp AVRMCExpr.cpp AVRMCTargetDesc.cpp AVRTargetStreamer.cpp diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 1c12c23c9312..35be4476ee08 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -519,6 +519,37 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, if (!MaskN) return; + // The Reg operand should be a virtual register, which is defined + // outside the current basic block. DAG combiner has done a pretty + // good job in removing truncating inside a single basic block except + // when the Reg operand comes from bpf_load_[byte | half | word] for + // which the generic optimizer doesn't understand their results are + // zero extended. + SDValue BaseV = Node->getOperand(0); + if (BaseV.getOpcode() == ISD::INTRINSIC_W_CHAIN) { + unsigned IntNo = cast(BaseV->getOperand(1))->getZExtValue(); + uint64_t MaskV = MaskN->getZExtValue(); + + if (!((IntNo == Intrinsic::bpf_load_byte && MaskV == 0xFF) || + (IntNo == Intrinsic::bpf_load_half && MaskV == 0xFFFF) || + (IntNo == Intrinsic::bpf_load_word && MaskV == 0xFFFFFFFF))) + return; + + DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); + dbgs() << '\n'); + + I--; + CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); + I++; + CurDAG->DeleteNode(Node); + + return; + } + + // Multiple basic blocks case. + if (BaseV.getOpcode() != ISD::CopyFromReg) + return; + unsigned match_load_op = 0; switch (MaskN->getZExtValue()) { default: @@ -534,20 +565,12 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, break; } - // The Reg operand should be a virtual register, which is defined - // outside the current basic block. DAG combiner has done a pretty - // good job in removing truncating inside a single basic block. - SDValue BaseV = Node->getOperand(0); - if (BaseV.getOpcode() != ISD::CopyFromReg) - return; - const RegisterSDNode *RegN = dyn_cast(BaseV.getNode()->getOperand(1)); if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) return; unsigned AndOpReg = RegN->getReg(); - DEBUG(dbgs() << "Examine %vreg" << TargetRegisterInfo::virtReg2Index(AndOpReg) - << '\n'); + DEBUG(dbgs() << "Examine " << printReg(AndOpReg) << '\n'); // Examine the PHI insns in the MachineBasicBlock to found out the // definitions of this virtual register. At this stage (DAG2DAG @@ -574,10 +597,10 @@ void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, return; } else { // The PHI node looks like: - // %vreg2 = PHI %vreg0, , %vreg1, - // Trace each incoming definition, e.g., (%vreg0, BB#1) and (%vreg1, BB#3) - // The AND operation can be removed if both %vreg0 in BB#1 and %vreg1 in - // BB#3 are defined with with a load matching the MaskN. + // %2 = PHI %0, <%bb.1>, %1, <%bb.3> + // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3) + // The AND operation can be removed if both %0 in %bb.1 and %1 in + // %bb.3 are defined with with a load matching the MaskN. DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); unsigned PrevReg = -1; for (unsigned i = 0; i < MII->getNumOperands(); ++i) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index 7d535563c75a..3ea96e3148f2 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -36,7 +36,7 @@ using namespace llvm; static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(*MF.getFunction(), Msg, DL.getDebugLoc())); + DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); } static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg, @@ -48,7 +48,7 @@ static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg, Val->print(OS); OS.flush(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(*MF.getFunction(), Str, DL.getDebugLoc())); + DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc())); } BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, @@ -227,7 +227,7 @@ SDValue BPFTargetLowering::LowerFormalArguments( } } - if (IsVarArg || MF.getFunction()->hasStructRetAttr()) { + if (IsVarArg || MF.getFunction().hasStructRetAttr()) { fail(DL, DAG, "functions with VarArgs or StructRet are not supported"); } @@ -382,7 +382,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - if (MF.getFunction()->getReturnType()->isAggregateType()) { + if (MF.getFunction().getReturnType()->isAggregateType()) { fail(DL, DAG, "only integer returns supported"); return DAG.getNode(Opc, DL, MVT::Other, Chain); } diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 126d55fc28de..3634f2c38b73 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -456,7 +456,7 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1, } // ADJCALLSTACKDOWN/UP pseudo insns -let Defs = [R11], Uses = [R11] in { +let Defs = [R11], Uses = [R11], isCodeGenOnly = 1 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), "#ADJCALLSTACKDOWN $amt1 $amt2", [(BPFcallseq_start timm:$amt1, timm:$amt2)]>; @@ -465,7 +465,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(BPFcallseq_end timm:$amt1, timm:$amt2)]>; } -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, isCodeGenOnly = 1 in { def Select : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 00d609e8960e..6f7067816098 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -45,12 +45,12 @@ BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const { static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL) { if (Offset <= -512) { - auto F = MF.getFunction(); - DiagnosticInfoUnsupported DiagStackSize(*F, + const Function &F = MF.getFunction(); + DiagnosticInfoUnsupported DiagStackSize(F, "Looks like the BPF stack limit of 512 bytes is exceeded. " "Please move large on stack variables into BPF per-cpu array map.\n", DL); - F->getContext().diagnose(DiagStackSize); + F.getContext().diagnose(DiagStackSize); } } diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index 6f81e020b996..1f4ef098403d 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -56,7 +56,7 @@ void BPFInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { O << getRegisterName(Op.getReg()); } else if (Op.isImm()) { - O << (int32_t)Op.getImm(); + O << formatImm((int32_t)Op.getImm()); } else { assert(Op.isExpr() && "Expected an expression"); printExpr(Op.getExpr(), O); @@ -76,9 +76,9 @@ void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, if (OffsetOp.isImm()) { auto Imm = OffsetOp.getImm(); if (Imm >= 0) - O << " + " << formatDec(Imm); + O << " + " << formatImm(Imm); else - O << " - " << formatDec(-Imm); + O << " - " << formatImm(-Imm); } else { assert(0 && "Expected an immediate"); } @@ -88,7 +88,7 @@ void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) - O << (uint64_t)Op.getImm(); + O << formatImm(Op.getImm()); else if (Op.isExpr()) printExpr(Op.getExpr(), O); else @@ -100,7 +100,7 @@ void BPFInstPrinter::printBrTargetOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Op = MI->getOperand(OpNo); if (Op.isImm()) { int16_t Imm = Op.getImm(); - O << ((Imm >= 0) ? "+" : "") << Imm; + O << ((Imm >= 0) ? "+" : "") << formatImm(Imm); } else if (Op.isExpr()) { printExpr(Op.getExpr(), O); } else { diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index e6ea92e08364..6593d9d018fd 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -104,15 +104,15 @@ BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { } MCAsmBackend *llvm::createBPFAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, - const MCTargetOptions&) { + const MCTargetOptions &) { return new BPFAsmBackend(/*IsLittleEndian=*/true); } MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, - const MCTargetOptions&) { + const MCTargetOptions &) { return new BPFAsmBackend(/*IsLittleEndian=*/false); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 6466042f6929..a6dac3abca02 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -45,11 +45,11 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); -MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr createBPFELFObjectWriter(raw_pwrite_stream &OS, diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index d0d8b39b83bc..387296c69c39 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -47,6 +47,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include @@ -60,9 +61,6 @@ using namespace llvm; -static cl::opt EnableFutureRegs("mfuture-regs", - cl::desc("Enable future registers")); - static cl::opt WarnMissingParenthesis( "mwarn-missing-parenthesis", cl::desc("Warn for missing parenthesis around predicate registers"), @@ -95,12 +93,20 @@ class HexagonAsmParser : public MCTargetAsmParser { } MCAsmParser &Parser; - MCAssembler *Assembler; MCInst MCB; bool InBrackets; MCAsmParser &getParser() const { return Parser; } - MCAssembler *getAssembler() const { return Assembler; } + MCAssembler *getAssembler() const { + MCAssembler *Assembler = nullptr; + // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) + if (!Parser.getStreamer().hasRawTextSupport()) { + MCELFStreamer *MES = static_cast(&Parser.getStreamer()); + Assembler = &MES->getAssembler(); + } + return Assembler; + } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool equalIsAsmAssignment() override { return false; } @@ -123,7 +129,7 @@ class HexagonAsmParser : public MCTargetAsmParser { bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc, OperandVector &InstOperands, uint64_t &ErrorInfo, bool MatchingInlineAsm); - + void eatToEndOfPacket(); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -155,17 +161,11 @@ class HexagonAsmParser : public MCTargetAsmParser { HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(Options, _STI, MII), Parser(_Parser), - MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { + InBrackets(false) { + MCB.setOpcode(Hexagon::BUNDLE); setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); MCAsmParserExtension::Initialize(_Parser); - - Assembler = nullptr; - // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) - if (!Parser.getStreamer().hasRawTextSupport()) { - MCELFStreamer *MES = static_cast(&Parser.getStreamer()); - Assembler = &MES->getAssembler(); - } } bool splitIdentifier(OperandVector &Operands); @@ -190,6 +190,7 @@ class HexagonAsmParser : public MCTargetAsmParser { /// instruction. struct HexagonOperand : public MCParsedAsmOperand { enum KindTy { Token, Immediate, Register } Kind; + MCContext &Context; SMLoc StartLoc, EndLoc; @@ -216,10 +217,12 @@ struct HexagonOperand : public MCParsedAsmOperand { struct ImmTy Imm; }; - HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + HexagonOperand(KindTy K, MCContext &Context) + : MCParsedAsmOperand(), Kind(K), Context(Context) {} public: - HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() { + HexagonOperand(const HexagonOperand &o) + : MCParsedAsmOperand(), Context(o.Context) { Kind = o.Kind; StartLoc = o.StartLoc; EndLoc = o.EndLoc; @@ -392,9 +395,13 @@ struct HexagonOperand : public MCParsedAsmOperand { return; } int64_t Extended = SignExtend64(Value, 32); + HexagonMCExpr *NewExpr = HexagonMCExpr::create( + MCConstantExpr::create(Extended, Context), Context); if ((Extended < 0) != (Value < 0)) - Expr->setSignMismatch(); - Inst.addOperand(MCOperand::createExpr(Expr)); + NewExpr->setSignMismatch(); + NewExpr->setMustExtend(Expr->mustExtend()); + NewExpr->setMustNotExtend(Expr->mustNotExtend()); + Inst.addOperand(MCOperand::createExpr(NewExpr)); } void addn1ConstOperands(MCInst &Inst, unsigned N) const { @@ -408,8 +415,9 @@ struct HexagonOperand : public MCParsedAsmOperand { void print(raw_ostream &OS) const override; - static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { - HexagonOperand *Op = new HexagonOperand(Token); + static std::unique_ptr CreateToken(MCContext &Context, + StringRef Str, SMLoc S) { + HexagonOperand *Op = new HexagonOperand(Token, Context); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -417,18 +425,18 @@ struct HexagonOperand : public MCParsedAsmOperand { return std::unique_ptr(Op); } - static std::unique_ptr CreateReg(unsigned RegNum, SMLoc S, - SMLoc E) { - HexagonOperand *Op = new HexagonOperand(Register); + static std::unique_ptr + CreateReg(MCContext &Context, unsigned RegNum, SMLoc S, SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Register, Context); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; return std::unique_ptr(Op); } - static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, - SMLoc E) { - HexagonOperand *Op = new HexagonOperand(Immediate); + static std::unique_ptr + CreateImm(MCContext &Context, const MCExpr *Val, SMLoc S, SMLoc E) { + HexagonOperand *Op = new HexagonOperand(Immediate, Context); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -480,8 +488,8 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { // 4 or less we have a packet that is too big. if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) { Error(IDLoc, "invalid instruction packet: out of slots"); - return true; // Error } + return true; // Error } return false; // No error @@ -493,13 +501,23 @@ bool HexagonAsmParser::matchBundleOptions() { if (!Parser.getTok().is(AsmToken::Colon)) return false; Lex(); + char const *MemNoShuffMsg = + "invalid instruction packet: mem_noshuf specifier not " + "supported with this architecture"; StringRef Option = Parser.getTok().getString(); + auto IDLoc = Parser.getTok().getLoc(); if (Option.compare_lower("endloop0") == 0) HexagonMCInstrInfo::setInnerLoop(MCB); else if (Option.compare_lower("endloop1") == 0) HexagonMCInstrInfo::setOuterLoop(MCB); + else if (Option.compare_lower("mem_noshuf") == 0) + if (getSTI().getFeatureBits()[Hexagon::FeatureMemNoShuf]) + HexagonMCInstrInfo::setMemReorderDisabled(MCB); + else + return getParser().Error(IDLoc, MemNoShuffMsg); else - return true; + return getParser().Error(IDLoc, llvm::Twine("'") + Option + + "' is not a valid bundle option"); Lex(); } } @@ -512,13 +530,13 @@ void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) { NewInst.setOpcode(MCI.getOpcode()); for (MCOperand &I : MCI) if (I.isImm()) { - int64_t Value (I.getImm()); + int64_t Value(I.getImm()); NewInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create( MCConstantExpr::create(Value, getContext()), getContext()))); } else { if (I.isExpr() && cast(I.getExpr())->signMismatch() && WarnSignedMismatch) - Warning (MCI.getLoc(), "Signed/Unsigned mismatch"); + Warning(MCI.getLoc(), "Signed/Unsigned mismatch"); NewInst.addOperand(I); } MCI = NewInst; @@ -572,6 +590,15 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, llvm_unreachable("Implement any new match types added!"); } +void HexagonAsmParser::eatToEndOfPacket() { + assert(InBrackets); + MCAsmLexer &Lexer = getLexer(); + while (!Lexer.is(AsmToken::RCurly)) + Lexer.Lex(); + Lexer.Lex(); + InBrackets = false; +} + bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -586,6 +613,7 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, assert(Operands.size() == 1 && "Brackets should be by themselves"); if (InBrackets) { getParser().Error(IDLoc, "Already in a packet"); + InBrackets = false; return true; } InBrackets = true; @@ -604,8 +632,11 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } MCInst *SubInst = new (getParser().getContext()) MCInst; if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo, - MatchingInlineAsm)) + MatchingInlineAsm)) { + if (InBrackets) + eatToEndOfPacket(); return true; + } HexagonMCInstrInfo::extendIfNeeded( getParser().getContext(), MII, MCB, *SubInst); MCB.addOperand(MCOperand::createInst(SubInst)); @@ -853,10 +884,11 @@ bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) { do { std::pair HeadTail = String.split('.'); if (!HeadTail.first.empty()) - Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc)); + Operands.push_back( + HexagonOperand::CreateToken(getContext(), HeadTail.first, Loc)); if (!HeadTail.second.empty()) Operands.push_back(HexagonOperand::CreateToken( - String.substr(HeadTail.first.size(), 1), Loc)); + getContext(), String.substr(HeadTail.first.size(), 1), Loc)); String = HeadTail.second; } while (!String.empty()); return false; @@ -878,38 +910,43 @@ bool HexagonAsmParser::parseOperand(OperandVector &Operands) { case Hexagon::P3: if (previousEqual(Operands, 0, "if")) { if (WarnMissingParenthesis) - Warning (Begin, "Missing parenthesis around predicate register"); + Warning(Begin, "Missing parenthesis around predicate register"); static char const *LParen = "("; static char const *RParen = ")"; - Operands.push_back(HexagonOperand::CreateToken(LParen, Begin)); - Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + Operands.push_back( + HexagonOperand::CreateToken(getContext(), LParen, Begin)); + Operands.push_back( + HexagonOperand::CreateReg(getContext(), Register, Begin, End)); const AsmToken &MaybeDotNew = Lexer.getTok(); if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && MaybeDotNew.getString().equals_lower(".new")) splitIdentifier(Operands); - Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + Operands.push_back( + HexagonOperand::CreateToken(getContext(), RParen, Begin)); return false; } if (previousEqual(Operands, 0, "!") && previousEqual(Operands, 1, "if")) { if (WarnMissingParenthesis) - Warning (Begin, "Missing parenthesis around predicate register"); + Warning(Begin, "Missing parenthesis around predicate register"); static char const *LParen = "("; static char const *RParen = ")"; - Operands.insert(Operands.end () - 1, - HexagonOperand::CreateToken(LParen, Begin)); - Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End)); + Operands.insert(Operands.end() - 1, HexagonOperand::CreateToken( + getContext(), LParen, Begin)); + Operands.push_back( + HexagonOperand::CreateReg(getContext(), Register, Begin, End)); const AsmToken &MaybeDotNew = Lexer.getTok(); if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) && MaybeDotNew.getString().equals_lower(".new")) splitIdentifier(Operands); - Operands.push_back(HexagonOperand::CreateToken(RParen, Begin)); + Operands.push_back( + HexagonOperand::CreateToken(getContext(), RParen, Begin)); return false; } break; } - Operands.push_back(HexagonOperand::CreateReg( - Register, Begin, End)); + Operands.push_back( + HexagonOperand::CreateReg(getContext(), Register, Begin, End)); return false; } return splitIdentifier(Operands); @@ -931,10 +968,9 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { return true; if (!matchRegister(String.lower())) return true; - (void)Second; assert(Second.is(AsmToken::Colon)); - StringRef Raw (String.data(), Third.getString().data() - String.data() + - Third.getString().size()); + StringRef Raw(String.data(), Third.getString().data() - String.data() + + Third.getString().size()); std::string Collapsed = Raw; Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end()); StringRef Whole = Collapsed; @@ -944,7 +980,8 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) { return false; } -bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) { +bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, + SMLoc &Loc) { if (!Contigious && ErrorNoncontigiousRegister) { Error(Loc, "Register name is not contigious"); return true; @@ -954,7 +991,8 @@ bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) return false; } -bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { +bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { MCAsmLexer &Lexer = getLexer(); StartLoc = getLexer().getLoc(); SmallVector Lookahead; @@ -963,19 +1001,19 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En bool NeededWorkaround = false; while (Again) { AsmToken const &Token = Lexer.getTok(); - RawString = StringRef(RawString.data(), - Token.getString().data() - RawString.data () + - Token.getString().size()); + RawString = StringRef(RawString.data(), Token.getString().data() - + RawString.data() + + Token.getString().size()); Lookahead.push_back(Token); Lexer.Lex(); bool Contigious = Lexer.getTok().getString().data() == Lookahead.back().getString().data() + - Lookahead.back().getString().size(); + Lookahead.back().getString().size(); bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) || Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) || Lexer.is(AsmToken::Colon); - bool Workaround = Lexer.is(AsmToken::Colon) || - Lookahead.back().is(AsmToken::Colon); + bool Workaround = + Lexer.is(AsmToken::Colon) || Lookahead.back().is(AsmToken::Colon); Again = (Contigious && Type) || (Workaround && Type); NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type)); } @@ -1005,10 +1043,10 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En std::pair ColonSplit = StringRef(FullString).split(':'); unsigned ColonReg = matchRegister(ColonSplit.first.lower()); if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) { - Lexer.UnLex(Lookahead.back()); - Lookahead.pop_back(); - Lexer.UnLex(Lookahead.back()); - Lookahead.pop_back(); + do { + Lexer.UnLex(Lookahead.back()); + Lookahead.pop_back(); + } while (!Lookahead.empty () && !Lexer.is(AsmToken::Colon)); RegNo = ColonReg; EndLoc = Lexer.getLoc(); if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc)) @@ -1036,19 +1074,18 @@ bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) { return false; } -bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { +bool HexagonAsmParser::parseExpression(MCExpr const *&Expr) { SmallVector Tokens; MCAsmLexer &Lexer = getLexer(); bool Done = false; - static char const * Comma = ","; + static char const *Comma = ","; do { - Tokens.emplace_back (Lexer.getTok()); + Tokens.emplace_back(Lexer.getTok()); Lex(); - switch (Tokens.back().getKind()) - { + switch (Tokens.back().getKind()) { case AsmToken::TokenKind::Hash: - if (Tokens.size () > 1) - if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) { + if (Tokens.size() > 1) + if ((Tokens.end() - 2)->getKind() == AsmToken::TokenKind::Plus) { Tokens.insert(Tokens.end() - 2, AsmToken(AsmToken::TokenKind::Comma, Comma)); Done = true; @@ -1067,7 +1104,8 @@ bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) { Lexer.UnLex(Tokens.back()); Tokens.pop_back(); } - return getParser().parseExpression(Expr); + SMLoc Loc = Lexer.getLoc(); + return getParser().parseExpression(Expr, Loc); } bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { @@ -1078,7 +1116,8 @@ bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) { bool Error = parseExpression(Expr); Expr = HexagonMCExpr::create(Expr, getContext()); if (!Error) - Operands.push_back(HexagonOperand::CreateImm(Expr, Loc, Loc)); + Operands.push_back( + HexagonOperand::CreateImm(getContext(), Expr, Loc, Loc)); return Error; } return parseOperand(Operands); @@ -1091,6 +1130,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { while (true) { AsmToken const &Token = Parser.getTok(); switch (Token.getKind()) { + case AsmToken::Eof: case AsmToken::EndOfStatement: { Lex(); return false; @@ -1098,15 +1138,15 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { case AsmToken::LCurly: { if (!Operands.empty()) return true; - Operands.push_back( - HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + getContext(), Token.getString(), Token.getLoc())); Lex(); return false; } case AsmToken::RCurly: { if (Operands.empty()) { - Operands.push_back( - HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + getContext(), Token.getString(), Token.getLoc())); Lex(); } return false; @@ -1122,9 +1162,9 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { case AsmToken::LessEqual: case AsmToken::LessLess: { Operands.push_back(HexagonOperand::CreateToken( - Token.getString().substr(0, 1), Token.getLoc())); + getContext(), Token.getString().substr(0, 1), Token.getLoc())); Operands.push_back(HexagonOperand::CreateToken( - Token.getString().substr(1, 1), Token.getLoc())); + getContext(), Token.getString().substr(1, 1), Token.getLoc())); Lex(); continue; } @@ -1133,8 +1173,8 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { bool ImplicitExpression = implicitExpressionLocation(Operands); SMLoc ExprLoc = Lexer.getLoc(); if (!ImplicitExpression) - Operands.push_back( - HexagonOperand::CreateToken(Token.getString(), Token.getLoc())); + Operands.push_back(HexagonOperand::CreateToken( + getContext(), Token.getString(), Token.getLoc())); Lex(); bool MustExtend = false; bool HiOnly = false; @@ -1171,16 +1211,15 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { if (Expr->evaluateAsAbsolute(Value)) { if (HiOnly) Expr = MCBinaryExpr::createLShr( - Expr, MCConstantExpr::create(16, Context), Context); + Expr, MCConstantExpr::create(16, Context), Context); if (HiOnly || LoOnly) - Expr = MCBinaryExpr::createAnd(Expr, - MCConstantExpr::create(0xffff, Context), - Context); + Expr = MCBinaryExpr::createAnd( + Expr, MCConstantExpr::create(0xffff, Context), Context); } else { MCValue Value; if (Expr->evaluateAsRelocatable(Value, nullptr, nullptr)) { if (!Value.isAbsolute()) { - switch(Value.getAccessVariant()) { + switch (Value.getAccessVariant()) { case MCSymbolRefExpr::VariantKind::VK_TPREL: case MCSymbolRefExpr::VariantKind::VK_DTPREL: // Don't lazy extend these expression variants @@ -1196,7 +1235,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { HexagonMCInstrInfo::setMustNotExtend(*Expr, MustNotExtend); HexagonMCInstrInfo::setMustExtend(*Expr, MustExtend); std::unique_ptr Operand = - HexagonOperand::CreateImm(Expr, ExprLoc, ExprLoc); + HexagonOperand::CreateImm(getContext(), Expr, ExprLoc, ExprLoc); Operands.push_back(std::move(Operand)); continue; } @@ -1209,15 +1248,14 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) { } bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, - AsmToken ID, + StringRef Name, AsmToken ID, OperandVector &Operands) { getLexer().UnLex(ID); return parseInstruction(Operands); } -static MCInst makeCombineInst(int opCode, MCOperand &Rdd, - MCOperand &MO1, MCOperand &MO2) { +static MCInst makeCombineInst(int opCode, MCOperand &Rdd, MCOperand &MO1, + MCOperand &MO2) { MCInst TmpInst; TmpInst.setOpcode(opCode); TmpInst.addOperand(Rdd); @@ -1286,6 +1324,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, bool is32bit = false; // used to distinguish between CONST32 and CONST64 switch (Inst.getOpcode()) { default: + if (HexagonMCInstrInfo::getDesc(MII, Inst).isPseudo()) { + SMDiagnostic Diag = getSourceManager().GetMessage( + IDLoc, SourceMgr::DK_Error, + "Found pseudo instruction with no expansion"); + Diag.print("", errs()); + report_fatal_error("Invalid pseudo instruction"); + } break; case Hexagon::A2_iconst: { @@ -1319,8 +1364,10 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::C2_cmpgei: { MCOperand &MO = Inst.getOperand(2); - MO.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( - MO.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); + MO.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(MO.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); Inst.setOpcode(Hexagon::C2_cmpgti); break; } @@ -1341,8 +1388,10 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, TmpInst.addOperand(Rt); Inst = TmpInst; } else { - MO.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( - MO.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); + MO.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(MO.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); Inst.setOpcode(Hexagon::C2_cmpgtui); } break; @@ -1509,7 +1558,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, TmpInst.addOperand(MO_0); TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create( - MCSymbolRefExpr::create(Sym, getContext()), getContext()))); + MCSymbolRefExpr::create(Sym, getContext()), getContext()))); Inst = TmpInst; } } @@ -1540,7 +1589,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MCConstantExpr::create(s8, Context), Context))); // upper 32 auto Expr = HexagonMCExpr::create( MCConstantExpr::create(Lo_32(Value), Context), Context); - HexagonMCInstrInfo::setMustExtend(*Expr, HexagonMCInstrInfo::mustExtend(*MO.getExpr())); + HexagonMCInstrInfo::setMustExtend( + *Expr, HexagonMCInstrInfo::mustExtend(*MO.getExpr())); MCOperand imm2(MCOperand::createExpr(Expr)); // lower 32 Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2); } else { @@ -1588,15 +1638,16 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::S2_tableidxh_goodsyntax: { MCInst TmpInst; MCOperand &Rx = Inst.getOperand(0); - MCOperand &_dst_ = Inst.getOperand(1); MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(1, Context), Context), Context)); + Imm6.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm6.getExpr(), + MCConstantExpr::create(1, Context), Context), + Context)); TmpInst.setOpcode(Hexagon::S2_tableidxh); TmpInst.addOperand(Rx); - TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rx); TmpInst.addOperand(Rs); TmpInst.addOperand(Imm4); TmpInst.addOperand(Imm6); @@ -1607,15 +1658,16 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::S2_tableidxw_goodsyntax: { MCInst TmpInst; MCOperand &Rx = Inst.getOperand(0); - MCOperand &_dst_ = Inst.getOperand(1); MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(2, Context), Context), Context)); + Imm6.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm6.getExpr(), + MCConstantExpr::create(2, Context), Context), + Context)); TmpInst.setOpcode(Hexagon::S2_tableidxw); TmpInst.addOperand(Rx); - TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rx); TmpInst.addOperand(Rs); TmpInst.addOperand(Imm4); TmpInst.addOperand(Imm6); @@ -1626,15 +1678,16 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::S2_tableidxd_goodsyntax: { MCInst TmpInst; MCOperand &Rx = Inst.getOperand(0); - MCOperand &_dst_ = Inst.getOperand(1); MCOperand &Rs = Inst.getOperand(2); MCOperand &Imm4 = Inst.getOperand(3); MCOperand &Imm6 = Inst.getOperand(4); - Imm6.setExpr(HexagonMCExpr::create(MCBinaryExpr::createSub( - Imm6.getExpr(), MCConstantExpr::create(3, Context), Context), Context)); + Imm6.setExpr(HexagonMCExpr::create( + MCBinaryExpr::createSub(Imm6.getExpr(), + MCConstantExpr::create(3, Context), Context), + Context)); TmpInst.setOpcode(Hexagon::S2_tableidxd); TmpInst.addOperand(Rx); - TmpInst.addOperand(_dst_); + TmpInst.addOperand(Rx); TmpInst.addOperand(Rs); TmpInst.addOperand(Imm4); TmpInst.addOperand(Imm6); @@ -1655,21 +1708,15 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, bool Absolute = Expr.evaluateAsAbsolute(Value); assert(Absolute); (void)Absolute; - if (!HexagonMCInstrInfo::mustExtend(Expr)) { - if (Value < 0 && Value > -256) { - Imm.setExpr(HexagonMCExpr::create( - MCConstantExpr::create(Value * -1, Context), Context)); - TmpInst.setOpcode(Hexagon::M2_mpysin); - } else if (Value < 256 && Value >= 0) - TmpInst.setOpcode(Hexagon::M2_mpysip); - else - return Match_InvalidOperand; - } else { - if (Value >= 0) - TmpInst.setOpcode(Hexagon::M2_mpysip); - else - return Match_InvalidOperand; - } + if (!HexagonMCInstrInfo::mustExtend(Expr) && + ((Value <= -256) || Value >= 256)) + return Match_InvalidOperand; + if (Value < 0 && Value > -256) { + Imm.setExpr(HexagonMCExpr::create( + MCConstantExpr::create(Value * -1, Context), Context)); + TmpInst.setOpcode(Hexagon::M2_mpysin); + } else + TmpInst.setOpcode(Hexagon::M2_mpysip); TmpInst.addOperand(Rd); TmpInst.addOperand(Rs); TmpInst.addOperand(Imm); @@ -1952,7 +1999,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, break; case Hexagon::A2_zxtb: { Inst.setOpcode(Hexagon::A2_andir); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, Context))); + Inst.addOperand( + MCOperand::createExpr(MCConstantExpr::create(255, Context))); break; } } // switch diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 5e20d8ca0fdb..15d6a05a0078 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -18,16 +18,16 @@ // A "ref" value is associated with a BitRef structure, which indicates // which virtual register, and which bit in that register is the origin // of the value. For example, given an instruction -// vreg2 = ASL vreg1, 1 -// assuming that nothing is known about bits of vreg1, bit 1 of vreg2 -// will be a "ref" to (vreg1, 0). If there is a subsequent instruction -// vreg3 = ASL vreg2, 2 -// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well. +// %2 = ASL %1, 1 +// assuming that nothing is known about bits of %1, bit 1 of %2 +// will be a "ref" to (%1, 0). If there is a subsequent instruction +// %3 = ASL %2, 2 +// then bit 3 of %3 will be a "ref" to (%1, 0) as well. // The "bottom" case means that the bit's value cannot be determined, // and that this virtual register actually defines it. The "bottom" case // is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref -// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to -// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc. +// to self", so for the %1 above, the bit 0 of it will be a "ref" to +// (%1, 0), bit 1 will be a "ref" to (%1, 1), etc. // // The tracker implements the Wegman-Zadeck algorithm, originally developed // for SSA-based constant propagation. Each register is represented as @@ -75,7 +75,7 @@ using BT = BitTracker; namespace { - // Local trickery to pretty print a register (without the whole "%vreg" + // Local trickery to pretty print a register (without the whole "%number" // business). struct printv { printv(unsigned r) : R(r) {} @@ -186,7 +186,8 @@ void BitTracker::print_cells(raw_ostream &OS) const { } BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) - : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} + : ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType), Trace(false) { +} BitTracker::~BitTracker() { delete ⤅ @@ -762,12 +763,39 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, return true; } +bool BT::UseQueueType::Cmp::operator()(const MachineInstr *InstA, + const MachineInstr *InstB) const { + // This is a comparison function for a priority queue: give higher priority + // to earlier instructions. + // This operator is used as "less", so returning "true" gives InstB higher + // priority (because then InstA < InstB). + if (InstA == InstB) + return false; + const MachineBasicBlock *BA = InstA->getParent(); + const MachineBasicBlock *BB = InstB->getParent(); + if (BA != BB) { + // If the blocks are different, ideally the dominating block would + // have a higher priority, but it may be too expensive to check. + return BA->getNumber() > BB->getNumber(); + } + + MachineBasicBlock::const_iterator ItA = InstA->getIterator(); + MachineBasicBlock::const_iterator ItB = InstB->getIterator(); + MachineBasicBlock::const_iterator End = BA->end(); + while (ItA != End) { + if (ItA == ItB) + return false; // ItA was before ItB. + ++ItA; + } + return true; +} + // Main W-Z implementation. void BT::visitPHI(const MachineInstr &PI) { int ThisN = PI.getParent()->getNumber(); if (Trace) - dbgs() << "Visit FI(BB#" << ThisN << "): " << PI; + dbgs() << "Visit FI(" << printMBBReference(*PI.getParent()) << "): " << PI; const MachineOperand &MD = PI.getOperand(0); assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition"); @@ -784,7 +812,8 @@ void BT::visitPHI(const MachineInstr &PI) { const MachineBasicBlock *PB = PI.getOperand(i + 1).getMBB(); int PredN = PB->getNumber(); if (Trace) - dbgs() << " edge BB#" << PredN << "->BB#" << ThisN; + dbgs() << " edge " << printMBBReference(*PB) << "->" + << printMBBReference(*PI.getParent()); if (!EdgeExec.count(CFGEdge(PredN, ThisN))) { if (Trace) dbgs() << " not executable\n"; @@ -809,10 +838,8 @@ void BT::visitPHI(const MachineInstr &PI) { } void BT::visitNonBranch(const MachineInstr &MI) { - if (Trace) { - int ThisN = MI.getParent()->getNumber(); - dbgs() << "Visit MI(BB#" << ThisN << "): " << MI; - } + if (Trace) + dbgs() << "Visit MI(" << printMBBReference(*MI.getParent()) << "): " << MI; if (MI.isDebugValue()) return; assert(!MI.isBranch() && "Unexpected branch instruction"); @@ -897,7 +924,7 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { BTs.clear(); const MachineInstr &MI = *It; if (Trace) - dbgs() << "Visit BR(BB#" << ThisN << "): " << MI; + dbgs() << "Visit BR(" << printMBBReference(B) << "): " << MI; assert(MI.isBranch() && "Expecting branch instruction"); InstrExec.insert(&MI); bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough); @@ -913,7 +940,7 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { if (Trace) { dbgs() << " adding targets:"; for (unsigned i = 0, n = BTs.size(); i < n; ++i) - dbgs() << " BB#" << BTs[i]->getNumber(); + dbgs() << " " << printMBBReference(*BTs[i]); if (FallsThrough) dbgs() << "\n falls through\n"; else @@ -949,18 +976,11 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { void BT::visitUsesOf(unsigned Reg) { if (Trace) - dbgs() << "visiting uses of " << printReg(Reg, &ME.TRI) << "\n"; + dbgs() << "queuing uses of modified reg " << printReg(Reg, &ME.TRI) + << " cell: " << ME.getCell(Reg, Map) << '\n'; - for (const MachineInstr &UseI : MRI.use_nodbg_instructions(Reg)) { - if (!InstrExec.count(&UseI)) - continue; - if (UseI.isPHI()) - visitPHI(UseI); - else if (!UseI.isBranch()) - visitNonBranch(UseI); - else - visitBranchesFrom(UseI); - } + for (MachineInstr &UseI : MRI.use_nodbg_instructions(Reg)) + UseQ.push(&UseI); } BT::RegisterCell BT::get(RegisterRef RR) const { @@ -1010,6 +1030,8 @@ void BT::visit(const MachineInstr &MI) { assert(!MI.isBranch() && "Only non-branches are allowed"); InstrExec.insert(&MI); visitNonBranch(MI); + // Make sure to flush all the pending use updates. + runUseQueue(); // The call to visitNonBranch could propagate the changes until a branch // is actually visited. This could result in adding CFG edges to the flow // queue. Since the queue won't be processed, clear it. @@ -1025,35 +1047,13 @@ void BT::reset() { ReachedBB.reserve(MF.size()); } -void BT::run() { - reset(); - assert(FlowQ.empty()); - - using MachineFlowGraphTraits = GraphTraits; - - const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); - - unsigned MaxBN = 0; - for (const MachineBasicBlock &B : MF) { - assert(B.getNumber() >= 0 && "Disconnected block"); - unsigned BN = B.getNumber(); - if (BN > MaxBN) - MaxBN = BN; - } - - // Keep track of visited blocks. - BitVector BlockScanned(MaxBN+1); - - int EntryN = Entry->getNumber(); - // Generate a fake edge to get something to start with. - FlowQ.push(CFGEdge(-1, EntryN)); - +void BT::runEdgeQueue(BitVector &BlockScanned) { while (!FlowQ.empty()) { CFGEdge Edge = FlowQ.front(); FlowQ.pop(); if (EdgeExec.count(Edge)) - continue; + return; EdgeExec.insert(Edge); ReachedBB.insert(Edge.second); @@ -1070,7 +1070,7 @@ void BT::run() { // then the instructions have already been processed. Any updates to // the cells would now only happen through visitUsesOf... if (BlockScanned[Edge.second]) - continue; + return; BlockScanned[Edge.second] = true; // Visit non-branch instructions. @@ -1094,6 +1094,50 @@ void BT::run() { visitBranchesFrom(*It); } } // while (!FlowQ->empty()) +} + +void BT::runUseQueue() { + while (!UseQ.empty()) { + MachineInstr &UseI = *UseQ.front(); + UseQ.pop(); + + if (!InstrExec.count(&UseI)) + continue; + if (UseI.isPHI()) + visitPHI(UseI); + else if (!UseI.isBranch()) + visitNonBranch(UseI); + else + visitBranchesFrom(UseI); + } +} + +void BT::run() { + reset(); + assert(FlowQ.empty()); + + using MachineFlowGraphTraits = GraphTraits; + const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); + + unsigned MaxBN = 0; + for (const MachineBasicBlock &B : MF) { + assert(B.getNumber() >= 0 && "Disconnected block"); + unsigned BN = B.getNumber(); + if (BN > MaxBN) + MaxBN = BN; + } + + // Keep track of visited blocks. + BitVector BlockScanned(MaxBN+1); + + int EntryN = Entry->getNumber(); + // Generate a fake edge to get something to start with. + FlowQ.push(CFGEdge(-1, EntryN)); + + while (!FlowQ.empty() || !UseQ.empty()) { + runEdgeQueue(BlockScanned); + runUseQueue(); + } if (Trace) print_cells(dbgs() << "Cells after propagation:\n"); diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index 8a0f65722298..5df6b61710f6 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -23,6 +23,7 @@ namespace llvm { +class BitVector; class ConstantInt; class MachineRegisterInfo; class MachineBasicBlock; @@ -63,23 +64,55 @@ struct BitTracker { void visitNonBranch(const MachineInstr &MI); void visitBranchesFrom(const MachineInstr &BI); void visitUsesOf(unsigned Reg); - void reset(); using CFGEdge = std::pair; using EdgeSetType = std::set; using InstrSetType = std::set; using EdgeQueueType = std::queue; - EdgeSetType EdgeExec; // Executable flow graph edges. - InstrSetType InstrExec; // Executable instructions. - EdgeQueueType FlowQ; // Work queue of CFG edges. - DenseSet ReachedBB; // Cache of reached blocks. - bool Trace; // Enable tracing for debugging. + // Priority queue of instructions using modified registers, ordered by + // their relative position in a basic block. + struct UseQueueType { + unsigned size() const { + return Uses.size(); + } + bool empty() const { + return size() == 0; + } + MachineInstr *front() const { + return Uses.top(); + } + void push(MachineInstr *MI) { + if (Set.insert(MI).second) + Uses.push(MI); + } + void pop() { + Set.erase(front()); + Uses.pop(); + } + private: + struct Cmp { + bool operator()(const MachineInstr *MI, const MachineInstr *MJ) const; + }; + std::priority_queue, Cmp> Uses; + DenseSet Set; // Set to avoid adding duplicate entries. + }; + + void reset(); + void runEdgeQueue(BitVector &BlockScanned); + void runUseQueue(); const MachineEvaluator &ME; MachineFunction &MF; MachineRegisterInfo &MRI; CellMapType ⤅ + + EdgeSetType EdgeExec; // Executable flow graph edges. + InstrSetType InstrExec; // Executable instructions. + UseQueueType UseQ; // Work queue of register uses. + EdgeQueueType FlowQ; // Work queue of CFG edges. + DenseSet ReachedBB; // Cache of reached blocks. + bool Trace; // Enable tracing for debugging. }; // Abstraction of a reference to bit at position Pos from a register Reg. diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index ac6a5fcd0812..1c36093923ac 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandCondsets.cpp HexagonFixupHwLoops.cpp HexagonFrameLowering.cpp + HexagonGatherPacketize.cpp HexagonGenExtract.cpp HexagonGenInsert.cpp HexagonGenMux.cpp @@ -35,7 +36,9 @@ add_llvm_target(HexagonCodeGen HexagonHazardRecognizer.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp + HexagonISelDAGToDAGHVX.cpp HexagonISelLowering.cpp + HexagonISelLoweringHVX.cpp HexagonLoopIdiomRecognition.cpp HexagonMachineFunctionInfo.cpp HexagonMachineScheduler.cpp diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index c26ba3db8ef6..481b692ae8bf 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -45,10 +45,12 @@ class HexagonDisassembler : public MCDisassembler { public: std::unique_ptr const MCII; std::unique_ptr CurrentBundle; + mutable MCInst const *CurrentExtender; HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {} + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *), + CurrentExtender(nullptr) {} DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, ArrayRef Bytes, uint64_t Address, @@ -58,40 +60,38 @@ class HexagonDisassembler : public MCDisassembler { ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; - void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const; + void remapInstruction(MCInst &Instr) const; }; -} // end anonymous namespace - -static uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI, +static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI, int64_t Value) { - MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( - MCB, HexagonMCInstrInfo::bundleSize(MCB)); - if (!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) + MCInstrInfo MCII = *Disassembler.MCII; + if (!Disassembler.CurrentExtender || + MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI)) return Value; unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); uint32_t Lower6 = static_cast(Value >> Alignment) & 0x3f; int64_t Bits; - bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits); - assert(Success); (void)Success; - uint32_t Upper26 = static_cast(Bits); - uint32_t Operand = Upper26 | Lower6; + bool Success = + Disassembler.CurrentExtender->getOperand(0).getExpr()->evaluateAsAbsolute( + Bits); + assert(Success); + (void)Success; + uint64_t Upper26 = static_cast(Bits); + uint64_t Operand = Upper26 | Lower6; return Operand; } - static HexagonDisassembler const &disassembler(void const *Decoder) { return *static_cast(Decoder); } - template static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); - int64_t FullValue = - fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI, - SignExtend64(tmp)); + int64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp)); int64_t Extended = SignExtend64<32>(FullValue); HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); } +} // Forward declare these because the auto-generated code will reference them. // Definitions are further down. @@ -107,8 +107,8 @@ static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeHvxVRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -116,14 +116,14 @@ static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -196,7 +196,6 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, signedDecoder<6>(MI, tmp, Decoder); return MCDisassembler::Success; } - #include "HexagonGenDisassemblerTables.inc" static MCDisassembler *createHexagonDisassembler(const Target &T, @@ -220,7 +219,8 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Size = 0; *CurrentBundle = &MI; - MI = HexagonMCInstrInfo::createBundle(); + MI.setOpcode(Hexagon::BUNDLE); + MI.addOperand(MCOperand::createImm(0)); while (Result == Success && !Complete) { if (Bytes.size() < HEXAGON_INSTR_SIZE) return MCDisassembler::Fail; @@ -238,9 +238,89 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, *getContext().getRegisterInfo(), false); if (!Checker.check()) return MCDisassembler::Fail; + remapInstruction(MI); return MCDisassembler::Success; } +void HexagonDisassembler::remapInstruction(MCInst &Instr) const { + for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) { + auto &MI = const_cast(*I.getInst()); + switch (MI.getOpcode()) { + case Hexagon::S2_allocframe: + if (MI.getOperand(0).getReg() == Hexagon::R29) { + MI.setOpcode(Hexagon::S6_allocframe_to_raw); + MI.erase(MI.begin () + 1); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L2_deallocframe: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(1).getReg() == Hexagon::R30) { + MI.setOpcode(L6_deallocframe_map_to_raw); + MI.erase(MI.begin () + 1); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(1).getReg() == Hexagon::R30) { + MI.setOpcode(L6_return_map_to_raw); + MI.erase(MI.begin () + 1); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_t: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_t); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_f: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_f); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_tnew_pt: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_tnew_pt); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_fnew_pt: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_fnew_pt); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_tnew_pnt: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_tnew_pnt); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + case Hexagon::L4_return_fnew_pnt: + if (MI.getOperand(0).getReg() == Hexagon::D15 && + MI.getOperand(2).getReg() == Hexagon::R30) { + MI.setOpcode(L4_return_map_to_raw_fnew_pnt); + MI.erase(MI.begin () + 2); + MI.erase(MI.begin ()); + } + break; + } + } +} + static void adjustDuplex(MCInst &MI, MCContext &Context) { switch (MI.getOpcode()) { case Hexagon::SA1_setin1: @@ -274,7 +354,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( return DecodeStatus::Fail; } - MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex( + CurrentExtender = HexagonMCInstrInfo::extenderForIndex( MCB, HexagonMCInstrInfo::bundleSize(MCB)); DecodeStatus Result = DecodeStatus::Fail; @@ -350,8 +430,12 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( MI.setOpcode(Hexagon::DuplexIClass0 + duplexIClass); MCInst *MILow = new (getContext()) MCInst; MCInst *MIHigh = new (getContext()) MCInst; + auto TmpExtender = CurrentExtender; + CurrentExtender = + nullptr; // constant extenders in duplex must always be in slot 1 Result = decodeInstruction(DecodeLow, *MILow, Instruction & 0x1fff, Address, this, STI); + CurrentExtender = TmpExtender; if (Result != DecodeStatus::Success) return DecodeStatus::Fail; adjustDuplex(*MILow, getContext()); @@ -370,7 +454,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( HexagonII::INST_PARSE_PACKET_END) Complete = true; - if (Extender != nullptr) + if (CurrentExtender != nullptr) Result = decodeInstruction(DecoderTableMustExtend32, MI, Instruction, Address, this, STI); @@ -429,25 +513,29 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( unsigned Lookback = (Register & 0x6) >> 1; unsigned Offset = 1; bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); + bool PrevVector = false; auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); auto i = Instructions.end() - 1; for (auto n = Instructions.begin() - 1;; --i, ++Offset) { if (i == n) // Couldn't find producer return MCDisassembler::Fail; - if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst())) + bool CurrentVector = HexagonMCInstrInfo::isVector(*MCII, *i->getInst()); + if (Vector && !CurrentVector) // Skip scalars when calculating distances for vectors ++Lookback; - if (HexagonMCInstrInfo::isImmext(*i->getInst())) + if (HexagonMCInstrInfo::isImmext(*i->getInst()) && (Vector == PrevVector)) ++Lookback; + PrevVector = CurrentVector; if (Offset == Lookback) break; } auto const &Inst = *i->getInst(); bool SubregBit = (Register & 0x1) != 0; - if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) { + if (HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) { // If subreg bit is set we're selecting the second produced newvalue - unsigned Producer = + unsigned Producer = SubregBit ? + HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg() : HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg(); assert(Producer != Hexagon::NoRegister); MCO.setReg(Producer); @@ -466,7 +554,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction( return MCDisassembler::Fail; } - if (Extender != nullptr) { + if (CurrentExtender != nullptr) { MCInst const &Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ? *MI.getOperand(1).getInst() : MI; @@ -666,8 +754,7 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder) { HexagonDisassembler const &Disassembler = disassembler(Decoder); - int64_t FullValue = - fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI, tmp); + int64_t FullValue = fullValue(Disassembler, MI, tmp); assert(FullValue >= 0 && "Negative in unsigned decoder"); HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext()); return MCDisassembler::Success; @@ -690,10 +777,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, // r13_2 is not extendable, so if there are no extent bits, it's r13_2 if (Bits == 0) Bits = 15; - uint32_t FullValue = - fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI, - SignExtend64(tmp, Bits)); - int64_t Extended = SignExtend64<32>(FullValue) + Address; + uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits)); + uint32_t Extended = FullValue + Address; if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4)) HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); return MCDisassembler::Success; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 3218f2510e5f..6292e2a7a4ea 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -25,33 +25,36 @@ include "llvm/Target/Target.td" include "HexagonDepArch.td" // Hexagon ISA Extensions -def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion", +def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion", "Hexagon::ArchEnum::V60", "Hexagon HVX instructions">; +def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion", + "Hexagon::ArchEnum::V60", "Hexagon HVX instructions", + [ExtensionHVX]>; def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion", "Hexagon::ArchEnum::V62", "Hexagon HVX instructions", - [ExtensionHVXV60]>; -def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion", - "Hexagon::ArchEnum::V62", "Hexagon HVX instructions", - [ExtensionHVXV60, - ExtensionHVXV62]>; + [ExtensionHVX,ExtensionHVXV60]>; +def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion", + "Hexagon::ArchEnum::V65", "Hexagon HVX instructions", + [ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>; def ExtensionHVX64B : SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", - "Hexagon HVX 64B instructions", - [ExtensionHVXV60, ExtensionHVXV62]>; + "Hexagon HVX 64B instructions", [ExtensionHVX]>; def ExtensionHVX128B : SubtargetFeature<"hvx-length128b", "UseHVX128BOps", "true", - "Hexagon HVX 128B instructions", - [ExtensionHVXV60, ExtensionHVXV62]>; + "Hexagon HVX 128B instructions", [ExtensionHVX]>; // This is an alias to ExtensionHVX128B to accept the hvx-double as // an acceptable subtarget feature. def ExtensionHVXDbl : SubtargetFeature<"hvx-double", "UseHVX128BOps", "true", - "Hexagon HVX 128B instructions", - [ExtensionHVXV60, ExtensionHVXV62]>; + "Hexagon HVX 128B instructions", [ExtensionHVX128B]>; def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true", "Use constant-extended calls">; +def FeatureMemNoShuf: SubtargetFeature<"mem_noshuf", "HasMemNoShuf", "false", + "Supports mem_noshuf feature">; +def FeatureDuplex : SubtargetFeature<"duplex", "EnableDuplex", "true", + "Enable generation of duplex instruction">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. @@ -69,6 +72,8 @@ def UseHVXV60 : Predicate<"HST->useHVXOps()">, AssemblerPredicate<"ExtensionHVXV60">; def UseHVXV62 : Predicate<"HST->useHVXOps()">, AssemblerPredicate<"ExtensionHVXV62">; +def UseHVXV65 : Predicate<"HST->useHVXOps()">, + AssemblerPredicate<"ExtensionHVXV65">; def Hvx64 : HwMode<"+hvx-length64b">; def Hvx64old : HwMode<"-hvx-double">; @@ -80,21 +85,22 @@ def Hvx128old : HwMode<"+hvx-double">; //===----------------------------------------------------------------------===// class ImmRegShl; +// ImmRegRel - Filter class used to relate instructions having reg-reg form +// with their reg-imm counterparts. +class ImmRegRel; // PredRel - Filter class used to relate non-predicated instructions with their // predicated forms. class PredRel; // PredNewRel - Filter class used to relate predicated instructions with their // predicate-new forms. class PredNewRel: PredRel; -// ImmRegRel - Filter class used to relate instructions having reg-reg form -// with their reg-imm counterparts. -class ImmRegRel; // NewValueRel - Filter class used to relate regular store instructions with // their new-value store form. class NewValueRel: PredNewRel; // NewValueRel - Filter class used to relate load/store instructions having // different addressing modes with each other. class AddrModeRel: NewValueRel; +class PostInc_BaseImm; class IntrinsicsRel; //===----------------------------------------------------------------------===// @@ -220,6 +226,22 @@ def changeAddrMode_rr_io: InstrMapping { let ValueCols = [["BaseImmOffset"]]; } +def changeAddrMode_pi_io: InstrMapping { + let FilterClass = "PostInc_BaseImm"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["PostInc"]; + let ValueCols = [["BaseImmOffset"]]; +} + +def changeAddrMode_io_pi: InstrMapping { + let FilterClass = "PostInc_BaseImm"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseImmOffset"]; + let ValueCols = [["PostInc"]]; +} + def changeAddrMode_rr_ur: InstrMapping { let FilterClass = "ImmRegShl"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; @@ -268,7 +290,7 @@ def getRealHWInstr : InstrMapping { let ValueCols = [["Pseudo"], ["Real"]]; } //===----------------------------------------------------------------------===// -// Register File, Calling Conv, Instruction Descriptions +// Register File, Instruction Descriptions //===----------------------------------------------------------------------===// include "HexagonSchedule.td" include "HexagonRegisterInfo.td" @@ -280,9 +302,11 @@ include "HexagonDepInstrFormats.td" include "HexagonDepInstrInfo.td" include "HexagonPseudo.td" include "HexagonPatterns.td" +include "HexagonPatternsV65.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" include "HexagonMapAsm2IntrinV62.gen.td" +include "HexagonMapAsm2IntrinV65.gen.td" def HexagonInstrInfo : InstrInfo; @@ -295,15 +319,18 @@ class Proc; def : Proc<"hexagonv4", HexagonModelV4, - [ArchV4]>; + [ArchV4, FeatureDuplex]>; def : Proc<"hexagonv5", HexagonModelV4, - [ArchV4, ArchV5]>; + [ArchV4, ArchV5, FeatureDuplex]>; def : Proc<"hexagonv55", HexagonModelV55, - [ArchV4, ArchV5, ArchV55]>; + [ArchV4, ArchV5, ArchV55, FeatureDuplex]>; def : Proc<"hexagonv60", HexagonModelV60, - [ArchV4, ArchV5, ArchV55, ArchV60]>; + [ArchV4, ArchV5, ArchV55, ArchV60, FeatureDuplex]>; def : Proc<"hexagonv62", HexagonModelV62, - [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62]>; + [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, FeatureDuplex]>; +def : Proc<"hexagonv65", HexagonModelV65, + [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, + FeatureMemNoShuf, FeatureDuplex]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing @@ -317,11 +344,17 @@ def HexagonAsmParser : AsmParser { def HexagonAsmParserVariant : AsmParserVariant { int Variant = 0; string TokenizingCharacters = "#()=:.<>!+*-|^&"; + string BreakCharacters = ""; +} + +def HexagonAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; } def Hexagon : Target { - // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; let AssemblyParsers = [HexagonAsmParser]; let AssemblyParserVariants = [HexagonAsmParserVariant]; + let AssemblyWriters = [HexagonAsmWriter]; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index b69299511bcb..68b1fe6bf4b1 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -615,7 +615,18 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MappedInst = TmpInst; return; } + case Hexagon::V6_vdd0: { + MCInst TmpInst; + assert (Inst.getOperand(0).isReg() && + "Expected register and none was found"); + TmpInst.setOpcode(Hexagon::V6_vsubw_dv); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + MappedInst = TmpInst; + return; + } case Hexagon::V6_vL32Ub_pi: case Hexagon::V6_vL32b_cur_pi: case Hexagon::V6_vL32b_nt_cur_pi: @@ -715,13 +726,25 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, case Hexagon::V6_vS32b_qpred_ai: MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext); return; + + // V65+ + case Hexagon::V6_vS32b_srls_ai: + MappedInst = ScaleVectorOffset(Inst, 1, VectorSize, OutContext); + return; + + case Hexagon::V6_vS32b_srls_pi: + MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext); + return; + } } /// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to /// the current output stream. void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst MCB = HexagonMCInstrInfo::createBundle(); + MCInst MCB; + MCB.setOpcode(Hexagon::BUNDLE); + MCB.addOperand(MCOperand::createImm(0)); const MCInstrInfo &MCII = *Subtarget->getInstrInfo(); if (MI->isBundle()) { diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index cbf1b0dc040a..9e73766b6fdc 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -895,7 +895,7 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, } // Calculate the register class that matches Reg:Sub. For example, if -// vreg1 is a double register, then vreg1:isub_hi would match the "int" +// %1 is a double register, then %1:isub_hi would match the "int" // register class. const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) { @@ -1246,11 +1246,11 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) { // holds the bits for the entire register. To keep track of that, the // argument Begin indicates where in Bits is the lowest-significant bit // of the register used in operand OpN. For example, in instruction: -// vreg1 = S2_lsr_i_r vreg2:isub_hi, 10 +// %1 = S2_lsr_i_r %2:isub_hi, 10 // the operand 1 is a 32-bit register, which happens to be a subregister -// of the 64-bit register vreg2, and that subregister starts at position 32. +// of the 64-bit register %2, and that subregister starts at position 32. // In this case Begin=32, since Bits[32] would be the lowest-significant bit -// of vreg2:isub_hi. +// of %2:isub_hi. bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits, uint16_t Begin) { unsigned Opc = MI.getOpcode(); @@ -1356,11 +1356,11 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B, // This pass can create copies between registers that don't have the // exact same values. Updating the tracker has to involve updating // all dependent cells. Example: - // vreg1 = inst vreg2 ; vreg1 != vreg2, but used bits are equal + // %1 = inst %2 ; %1 != %2, but used bits are equal // - // vreg3 = copy vreg2 ; <- inserted - // ... = vreg3 ; <- replaced from vreg2 - // Indirectly, we can create a "copy" between vreg1 and vreg2 even + // %3 = copy %2 ; <- inserted + // ... = %3 ; <- replaced from %2 + // Indirectly, we can create a "copy" between %1 and %2 even // though their exact values do not match. BT.visit(*CopyI); Changed = true; @@ -2313,10 +2313,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, // Check for tstbit simplification opportunity, where the bit being checked // can be tracked back to another register. For example: -// vreg2 = S2_lsr_i_r vreg1, 5 -// vreg3 = S2_tstbit_i vreg2, 0 +// %2 = S2_lsr_i_r %1, 5 +// %3 = S2_tstbit_i %2, 0 // => -// vreg3 = S2_tstbit_i vreg1, 5 +// %3 = S2_tstbit_i %1, 5 bool BitSimplification::simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) { unsigned Opc = MI->getOpcode(); @@ -2631,7 +2631,7 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, } bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; auto &HST = MF.getSubtarget(); @@ -2977,7 +2977,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, } bool HexagonLoopRescheduling::processLoop(LoopCand &C) { - DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n"); + DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) << "\n"); std::vector Phis; for (auto &I : *C.LB) { if (!I.isPHI()) @@ -3181,7 +3181,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { } bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; auto &HST = MF.getSubtarget(); diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 8297c474b8f1..b6e220beb0c6 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -61,7 +61,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // passed via registers. unsigned InVirtReg, InPhysReg = 0; - for (const Argument &Arg : MF.getFunction()->args()) { + for (const Argument &Arg : MF.getFunction().args()) { Type *ATy = Arg.getType(); unsigned Width = 0; if (ATy->isIntegerTy()) diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 00db408b8ed7..ff915ca59dae 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -368,7 +368,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, } } // Defs and clobbers can overlap, e.g. - // %D0 = COPY %vreg5, %R0, %R1 + // dead %d0 = COPY %5, implicit-def %r0, implicit-def %r1 for (RegisterRef R : Defs) Clobbers.erase(R); diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 6e43574ecb1c..a22ac8c9fdf5 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -114,7 +114,7 @@ bool HexagonCFGOptimizer::isOnFallThroughPath(MachineBasicBlock *MBB) { } bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; // Loop over all of the basic blocks. diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp index 1e55c4b038e5..294a6da69f51 100644 --- a/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -1831,7 +1831,7 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const { } bool HCE::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr)); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index ed6c40deeba9..8ac96f3a4bfa 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -187,7 +187,7 @@ namespace { // Mapping: vreg -> cell // The keys are registers _without_ subregisters. This won't allow - // definitions in the form of "vreg:subreg = ...". Such definitions + // definitions in the form of "vreg:subreg = ...". Such definitions // would be questionable from the point of view of SSA, since the "vreg" // could not be initialized in its entirety (specifically, an instruction // defining the "other part" of "vreg" would also count as a definition @@ -280,7 +280,7 @@ namespace { public: MachineConstEvaluator(MachineFunction &Fn) : TRI(*Fn.getSubtarget().getRegisterInfo()), - MF(Fn), CX(Fn.getFunction()->getContext()) {} + MF(Fn), CX(Fn.getFunction().getContext()) {} virtual ~MachineConstEvaluator() = default; // The required interface: @@ -617,7 +617,7 @@ void MachineConstPropagator::CellMap::print(raw_ostream &os, void MachineConstPropagator::visitPHI(const MachineInstr &PN) { const MachineBasicBlock *MB = PN.getParent(); unsigned MBN = MB->getNumber(); - DEBUG(dbgs() << "Visiting FI(BB#" << MBN << "): " << PN); + DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN); const MachineOperand &MD = PN.getOperand(0); Register DefR(MD); @@ -642,8 +642,8 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) { const MachineBasicBlock *PB = PN.getOperand(i+1).getMBB(); unsigned PBN = PB->getNumber(); if (!EdgeExec.count(CFGEdge(PBN, MBN))) { - DEBUG(dbgs() << " edge BB#" << PBN << "->BB#" << MBN - << " not executable\n"); + DEBUG(dbgs() << " edge " << printMBBReference(*PB) << "->" + << printMBBReference(*MB) << " not executable\n"); continue; } const MachineOperand &SO = PN.getOperand(i); @@ -658,9 +658,8 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) { LatticeCell SrcC; bool Eval = MCE.evaluate(UseR, Cells.get(UseR.Reg), SrcC); - DEBUG(dbgs() << " edge from BB#" << PBN << ": " - << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) - << SrcC << '\n'); + DEBUG(dbgs() << " edge from " << printMBBReference(*PB) << ": " + << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) << SrcC << '\n'); Changed |= Eval ? DefC.meet(SrcC) : DefC.setBottom(); Cells.update(DefR.Reg, DefC); @@ -672,7 +671,7 @@ void MachineConstPropagator::visitPHI(const MachineInstr &PN) { } void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { - DEBUG(dbgs() << "Visiting MI(BB#" << MI.getParent()->getNumber() + DEBUG(dbgs() << "Visiting MI(" << printMBBReference(*MI.getParent()) << "): " << MI); CellMap Outputs; bool Eval = MCE.evaluate(MI, Cells, Outputs); @@ -729,8 +728,8 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) { while (It != End) { const MachineInstr &MI = *It; InstrExec.insert(&MI); - DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(BB#" - << MBN << "): " << MI); + DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(" + << printMBBReference(B) << "): " << MI); // Do not evaluate subsequent branches if the evaluation of any of the // previous branches failed. Keep iterating over the branches only // to mark them as executable. @@ -772,7 +771,8 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) { for (const MachineBasicBlock *TB : Targets) { unsigned TBN = TB->getNumber(); - DEBUG(dbgs() << " pushing edge BB#" << MBN << " -> BB#" << TBN << "\n"); + DEBUG(dbgs() << " pushing edge " << printMBBReference(B) << " -> " + << printMBBReference(*TB) << "\n"); FlowQ.push(CFGEdge(MBN, TBN)); } } @@ -870,8 +870,10 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { CFGEdge Edge = FlowQ.front(); FlowQ.pop(); - DEBUG(dbgs() << "Picked edge BB#" << Edge.first << "->BB#" - << Edge.second << '\n'); + DEBUG(dbgs() << "Picked edge " + << printMBBReference(*MF.getBlockNumbered(Edge.first)) << "->" + << printMBBReference(*MF.getBlockNumbered(Edge.second)) + << '\n'); if (Edge.first != EntryNum) if (EdgeExec.count(Edge)) continue; @@ -934,7 +936,8 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { for (const MachineBasicBlock *SB : B.successors()) { unsigned SN = SB->getNumber(); if (!EdgeExec.count(CFGEdge(BN, SN))) - dbgs() << " BB#" << BN << " -> BB#" << SN << '\n'; + dbgs() << " " << printMBBReference(B) << " -> " + << printMBBReference(*SB) << '\n'; } } }); @@ -1887,10 +1890,8 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { - const Function *F = MF.getFunction(); - if (!F) - return false; - if (skipFunction(*F)) + const Function &F = MF.getFunction(); + if (skipFunction(F)) return false; HexagonConstEvaluator HCE(MF); @@ -1974,7 +1975,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI, { const MachineOperand &VO = MI.getOperand(1); // The operand of CONST32 can be a blockaddress, e.g. - // %vreg0 = CONST32 + // %0 = CONST32 // Do this check for all instructions for safety. if (!VO.isImm()) return false; @@ -2922,7 +2923,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, DEBUG({ if (!NewInstrs.empty()) { MachineFunction &MF = *MI.getParent()->getParent(); - dbgs() << "In function: " << MF.getFunction()->getName() << "\n"; + dbgs() << "In function: " << MF.getName() << "\n"; dbgs() << "Rewrite: for " << MI << " created " << *NewInstrs[0]; for (unsigned i = 1; i < NewInstrs.size(); ++i) dbgs() << " " << *NewInstrs[i]; @@ -3126,7 +3127,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, if (BrI.getOpcode() == Hexagon::J2_jump) return false; - DEBUG(dbgs() << "Rewrite(BB#" << B.getNumber() << "):" << BrI); + DEBUG(dbgs() << "Rewrite(" << printMBBReference(B) << "):" << BrI); bool Rewritten = false; if (NumTargets > 0) { assert(!FallsThru && "This should have been checked before"); @@ -3144,7 +3145,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, BrI.setDesc(JD); while (BrI.getNumOperands() > 0) BrI.RemoveOperand(0); - // This ensures that all implicit operands (e.g. %R31, etc) + // This ensures that all implicit operands (e.g. implicit-def %r31, etc) // are present in the rewritten branch. for (auto &Op : NI->operands()) BrI.addOperand(Op); diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index a27993116d81..087a77203fcb 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -351,11 +351,11 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // kill flag for a register (a removeRegisterKilled() analogous to // addRegisterKilled) that handles aliased register correctly. // * or has a killed aliased register use of I1's use reg - // %D4 = A2_tfrpi 16 - // %R6 = A2_tfr %R9 - // %R8 = KILL %R8, %D4 + // %d4 = A2_tfrpi 16 + // %r6 = A2_tfr %r9 + // %r8 = KILL %r8, implicit killed %d4 // If we want to move R6 = across the KILL instruction we would have - // to remove the %D4 operand. For now, we are + // to remove the implicit killed %d4 operand. For now, we are // conservative and disallow the move. // we can't move I1 across it. if (MI.isDebugValue()) { @@ -459,7 +459,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { } bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; if (IsCombinesDisabled) return false; @@ -471,8 +471,8 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { TRI = ST->getRegisterInfo(); TII = ST->getInstrInfo(); - const Function *F = MF.getFunction(); - bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); + const Function &F = MF.getFunction(); + bool OptForSize = F.hasFnAttribute(Attribute::OptimizeForSize); // Combine aggressively (for code size) ShouldCombineAggressively = diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h index 92573d331326..dc75f8f63400 100644 --- a/lib/Target/Hexagon/HexagonDepArch.h +++ b/lib/Target/Hexagon/HexagonDepArch.h @@ -1,4 +1,4 @@ -//===--- HexagonDepArch.h -------------------------------------------------===// +//===- HexagonDepArch.h ---------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,12 +6,16 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + + #ifndef HEXAGON_DEP_ARCH_H #define HEXAGON_DEP_ARCH_H namespace llvm { namespace Hexagon { -enum class ArchEnum { V4, V5, V55, V60, V62 }; +enum class ArchEnum { V4,V5,V55,V60,V62,V65 }; } // namespace Hexagon -} // namespace llvm +} // namespace llvm; #endif // HEXAGON_DEP_ARCH_H diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td index 98403956e6ad..87dcd966f2ed 100644 --- a/lib/Target/Hexagon/HexagonDepArch.td +++ b/lib/Target/Hexagon/HexagonDepArch.td @@ -1,4 +1,4 @@ -//===--- HexagonDepArch.td ------------------------------------------------===// +//===- HexagonDepArch.td --------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,7 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + +def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">; +def HasV65T : Predicate<"HST->hasV65TOps()">, AssemblerPredicate<"ArchV65">; def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">; def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">; def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">; diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/Hexagon/HexagonDepDecoders.h similarity index 52% rename from lib/Target/AMDGPU/Processors.td rename to lib/Target/Hexagon/HexagonDepDecoders.h index d50dae78e247..020362a95909 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/Hexagon/HexagonDepDecoders.h @@ -1,4 +1,4 @@ -//===-- Processors.td - AMDGPU Processor definitions ----------------------===// +//===- HexagonDepDecoders.h -----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,7 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + + -FIXME: Deleting this file broke buildbots that don't do full rebuilds. This -file is no longer used by the backend, so it can be deleted once all -the buildbots update there dependencies. diff --git a/lib/Target/Hexagon/HexagonDepIICHVX.td b/lib/Target/Hexagon/HexagonDepIICHVX.td index 1c1788264c66..b27cdae81a28 100644 --- a/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -1,4 +1,4 @@ -//===--- HexagonDepIICHVX.td ----------------------------------------------===// +//===- HexagonDepIICHVX.td ------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,15 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + def tc_0317c6ca : InstrItinClass; def tc_1b93bdc6 : InstrItinClass; def tc_2171ebae : InstrItinClass; def tc_28978789 : InstrItinClass; +def tc_29841470 : InstrItinClass; def tc_316c637c : InstrItinClass; def tc_354299ad : InstrItinClass; def tc_35e92f8e : InstrItinClass; @@ -20,39 +24,49 @@ def tc_41f4b64e : InstrItinClass; def tc_41f99e1c : InstrItinClass; def tc_45453b98 : InstrItinClass; def tc_4e2a5159 : InstrItinClass; +def tc_4f190ba3 : InstrItinClass; def tc_4fd8566e : InstrItinClass; def tc_51cd3aab : InstrItinClass; def tc_5a9fc4ec : InstrItinClass; +def tc_5c03dc63 : InstrItinClass; def tc_5c120602 : InstrItinClass; def tc_5cbf490b : InstrItinClass; +def tc_63e3d94c : InstrItinClass; def tc_644584f8 : InstrItinClass; +def tc_66bb62ea : InstrItinClass; def tc_69b6dd20 : InstrItinClass; def tc_6b78cf13 : InstrItinClass; def tc_6fd9ad30 : InstrItinClass; def tc_71337255 : InstrItinClass; def tc_72ad7b54 : InstrItinClass; +def tc_7474003e : InstrItinClass; def tc_77a4c701 : InstrItinClass; def tc_7c3f55c4 : InstrItinClass; def tc_7e9f581b : InstrItinClass; def tc_7fa82b08 : InstrItinClass; def tc_7fa8b40f : InstrItinClass; def tc_85d237e3 : InstrItinClass; +def tc_8a6eb39a : InstrItinClass; def tc_8b6a873f : InstrItinClass; def tc_908a4c8c : InstrItinClass; def tc_9311da3f : InstrItinClass; +def tc_94f43c04 : InstrItinClass; def tc_9777e6bf : InstrItinClass; def tc_97c165b9 : InstrItinClass; +def tc_98733e9d : InstrItinClass; def tc_99093773 : InstrItinClass; def tc_9b9642a1 : InstrItinClass; def tc_9c267309 : InstrItinClass; def tc_a3127e12 : InstrItinClass; def tc_a4c9df3b : InstrItinClass; +def tc_a807365d : InstrItinClass; def tc_aedb9f9e : InstrItinClass; def tc_b06ab583 : InstrItinClass; def tc_b712833a : InstrItinClass; def tc_b77635b4 : InstrItinClass; def tc_bbaf280e : InstrItinClass; def tc_bf142ae2 : InstrItinClass; +def tc_bfe309d5 : InstrItinClass; def tc_c00bf9c9 : InstrItinClass; def tc_c4b515c5 : InstrItinClass; def tc_cbf6d1dc : InstrItinClass; @@ -65,14 +79,18 @@ def tc_d7bea0ec : InstrItinClass; def tc_d98f4d63 : InstrItinClass; def tc_da979fb3 : InstrItinClass; def tc_db5b9e2f : InstrItinClass; +def tc_df54ad52 : InstrItinClass; def tc_e172d86a : InstrItinClass; def tc_e231aa4f : InstrItinClass; def tc_e3748cdf : InstrItinClass; def tc_e5053c8f : InstrItinClass; def tc_e6299d16 : InstrItinClass; def tc_eb669007 : InstrItinClass; +def tc_ec58f88a : InstrItinClass; def tc_eda67dcd : InstrItinClass; +def tc_ee927c0e : InstrItinClass; def tc_f3fc3f83 : InstrItinClass; +def tc_fa99dc24 : InstrItinClass; class DepHVXItinV55 { list DepHVXItinV55_list = [ @@ -97,6 +115,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ALL]>], [3, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], @@ -146,6 +169,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [SLOT1], 0>, @@ -163,6 +192,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], @@ -174,11 +208,23 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], @@ -206,6 +252,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 1, 2], @@ -239,6 +290,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], @@ -254,6 +310,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -264,6 +326,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -291,6 +359,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -323,6 +397,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], @@ -386,6 +466,12 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], @@ -418,15 +504,32 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLANE]>], [9, 5, 5], - [HVX_FWD, HVX_FWD, HVX_FWD]> + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]> ]; } @@ -453,6 +556,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ALL]>], [3, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], @@ -502,6 +610,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [SLOT1], 0>, @@ -519,6 +633,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], @@ -530,11 +649,23 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], @@ -562,6 +693,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 1, 2], @@ -595,6 +731,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], @@ -610,6 +751,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -620,6 +767,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -647,6 +800,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -679,6 +838,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], @@ -742,6 +907,12 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], @@ -774,15 +945,32 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLANE]>], [9, 5, 5], - [HVX_FWD, HVX_FWD, HVX_FWD]> + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]> ]; } @@ -809,6 +997,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ALL]>], [3, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], @@ -858,6 +1051,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [SLOT1], 0>, @@ -875,6 +1074,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], @@ -886,11 +1090,23 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], @@ -918,6 +1134,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 1, 2], @@ -951,6 +1172,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], @@ -966,6 +1192,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -976,6 +1208,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -1003,6 +1241,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST], 0>, @@ -1035,6 +1279,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], @@ -1098,6 +1348,12 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], @@ -1130,14 +1386,472 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLANE]>], [9, 5, 5], - [HVX_FWD, HVX_FWD, HVX_FWD]> + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]> + ]; +} + +class DepHVXItinV65 { + list DepHVXItinV65_list = [ + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]> ]; } diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td index 261778bda724..083ec7753e04 100644 --- a/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -1,4 +1,4 @@ -//===--- HexagonDepIICScalar.td -------------------------------------------===// +//===- HexagonDepIICScalar.td ---------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,2499 +6,4185 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + -def tc_049dfb74 : InstrItinClass; -def tc_0767081f : InstrItinClass; -def tc_07ac815d : InstrItinClass; -def tc_090485bb : InstrItinClass; -def tc_09c86199 : InstrItinClass; -def tc_09faec3b : InstrItinClass; -def tc_0cb867f2 : InstrItinClass; -def tc_1000eb10 : InstrItinClass; -def tc_128719e8 : InstrItinClass; -def tc_136c4786 : InstrItinClass; -def tc_14da557c : InstrItinClass; -def tc_1b6011fb : InstrItinClass; -def tc_1b834fe7 : InstrItinClass; -def tc_1e062b18 : InstrItinClass; -def tc_1e69aa99 : InstrItinClass; -def tc_1f9668cc : InstrItinClass; -def tc_1fe8323c : InstrItinClass; -def tc_20a8e109 : InstrItinClass; -def tc_210b2456 : InstrItinClass; -def tc_251c87b2 : InstrItinClass; -def tc_261d9b78 : InstrItinClass; -def tc_28d296df : InstrItinClass; -def tc_29c14515 : InstrItinClass; -def tc_2aaab1e0 : InstrItinClass; -def tc_2c8fe5ae : InstrItinClass; -def tc_2d1e6f5c : InstrItinClass; -def tc_2e55aa16 : InstrItinClass; -def tc_30665cb0 : InstrItinClass; -def tc_336e698c : InstrItinClass; -def tc_34e882a4 : InstrItinClass; -def tc_35fb9d13 : InstrItinClass; -def tc_37326008 : InstrItinClass; -def tc_3993c58b : InstrItinClass; -def tc_3b4892c6 : InstrItinClass; -def tc_3bea1824 : InstrItinClass; -def tc_3c10f809 : InstrItinClass; -def tc_3d905451 : InstrItinClass; -def tc_3e61d314 : InstrItinClass; -def tc_3eab77bd : InstrItinClass; -def tc_43068634 : InstrItinClass; -def tc_45631a8d : InstrItinClass; -def tc_47ab9233 : InstrItinClass; -def tc_47f0b7ad : InstrItinClass; -def tc_485bb57c : InstrItinClass; -def tc_4997da4a : InstrItinClass; -def tc_511f28f6 : InstrItinClass; -def tc_537e2013 : InstrItinClass; -def tc_53ee6546 : InstrItinClass; -def tc_548f402d : InstrItinClass; -def tc_5625c6c1 : InstrItinClass; -def tc_580a779c : InstrItinClass; -def tc_583510c7 : InstrItinClass; -def tc_5d806107 : InstrItinClass; -def tc_5fa2857c : InstrItinClass; -def tc_5fe9fcd0 : InstrItinClass; -def tc_6264c5e0 : InstrItinClass; -def tc_639d93ee : InstrItinClass; -def tc_63cd9d2d : InstrItinClass; -def tc_65dc7cc4 : InstrItinClass; -def tc_69bb508b : InstrItinClass; -def tc_6c52d277 : InstrItinClass; -def tc_6c576d46 : InstrItinClass; -def tc_70cabf66 : InstrItinClass; -def tc_7639d4b0 : InstrItinClass; -def tc_7675c0e9 : InstrItinClass; -def tc_76c4c5ef : InstrItinClass; -def tc_77781686 : InstrItinClass; -def tc_78b3c689 : InstrItinClass; -def tc_7986ba30 : InstrItinClass; -def tc_7bc567a7 : InstrItinClass; -def tc_7c2dcd4d : InstrItinClass; -def tc_7ca2ea10 : InstrItinClass; -def tc_7d01cbdc : InstrItinClass; -def tc_7d9a56cd : InstrItinClass; -def tc_81a23d44 : InstrItinClass; -def tc_821c4233 : InstrItinClass; -def tc_82f0f122 : InstrItinClass; -def tc_84630363 : InstrItinClass; -def tc_86442910 : InstrItinClass; -def tc_87601822 : InstrItinClass; -def tc_88fa2da6 : InstrItinClass; -def tc_8c8041e6 : InstrItinClass; -def tc_8cb685d9 : InstrItinClass; -def tc_8def9c57 : InstrItinClass; -def tc_8f0a6bad : InstrItinClass; -def tc_8fab9ac3 : InstrItinClass; -def tc_92d1833c : InstrItinClass; -def tc_94e6ffd9 : InstrItinClass; -def tc_95c54f8b : InstrItinClass; -def tc_9a13af9d : InstrItinClass; -def tc_9b73d261 : InstrItinClass; -def tc_9c18c9a5 : InstrItinClass; -def tc_9c68db63 : InstrItinClass; -def tc_9ce7a5ab : InstrItinClass; -def tc_9da3628f : InstrItinClass; -def tc_9dafb7d3 : InstrItinClass; -def tc_9df8b0dc : InstrItinClass; -def tc_9e86015f : InstrItinClass; -def tc_9f518242 : InstrItinClass; -def tc_a12a5971 : InstrItinClass; -def tc_a1fb80e1 : InstrItinClass; -def tc_a333d2a9 : InstrItinClass; -def tc_a4567c39 : InstrItinClass; -def tc_a87879e8 : InstrItinClass; -def tc_a9c993d9 : InstrItinClass; -def tc_aad55963 : InstrItinClass; -def tc_ab1b5e74 : InstrItinClass; -def tc_ae0722f7 : InstrItinClass; -def tc_ae2c2dc2 : InstrItinClass; -def tc_ae762521 : InstrItinClass; -def tc_b08b653e : InstrItinClass; -def tc_b08be45e : InstrItinClass; -def tc_b0f50e3c : InstrItinClass; -def tc_b189ad4c : InstrItinClass; -def tc_b324366f : InstrItinClass; -def tc_b5bfaa60 : InstrItinClass; -def tc_b5f5a094 : InstrItinClass; -def tc_b86c7e8b : InstrItinClass; -def tc_baccf077 : InstrItinClass; -def tc_bc5561d8 : InstrItinClass; -def tc_bcf0e36e : InstrItinClass; -def tc_bd16579e : InstrItinClass; -def tc_be995eaf : InstrItinClass; -def tc_bf6fa601 : InstrItinClass; -def tc_c0cd91a8 : InstrItinClass; -def tc_c14739d5 : InstrItinClass; -def tc_c1dbc916 : InstrItinClass; -def tc_c58f771a : InstrItinClass; -def tc_c85212ca : InstrItinClass; -def tc_c8f9a6f6 : InstrItinClass; -def tc_ca280e8b : InstrItinClass; -def tc_cbe45117 : InstrItinClass; -def tc_cd321066 : InstrItinClass; -def tc_d108a090 : InstrItinClass; -def tc_d1b5a4b6 : InstrItinClass; -def tc_d2609065 : InstrItinClass; -def tc_d267fa19 : InstrItinClass; -def tc_d2a33af5 : InstrItinClass; -def tc_d63b71d1 : InstrItinClass; -def tc_d6a805a8 : InstrItinClass; -def tc_d95f4e98 : InstrItinClass; -def tc_da79106e : InstrItinClass; -def tc_dbe218dd : InstrItinClass; -def tc_dcfee7ae : InstrItinClass; -def tc_e17ce9ad : InstrItinClass; -def tc_e2480a7f : InstrItinClass; -def tc_e2c08bb4 : InstrItinClass; -def tc_e2c31426 : InstrItinClass; -def tc_e578178f : InstrItinClass; -def tc_e836c161 : InstrItinClass; -def tc_e8c7a357 : InstrItinClass; -def tc_eb07ef6f : InstrItinClass; -def tc_ecfaae86 : InstrItinClass; -def tc_ef0ebaaa : InstrItinClass; -def tc_ef2676fd : InstrItinClass; -def tc_f027ebe9 : InstrItinClass; -def tc_f055fbb6 : InstrItinClass; -def tc_f1240c08 : InstrItinClass; -def tc_f16d5b17 : InstrItinClass; -def tc_f1aa2cdb : InstrItinClass; -def tc_f26aa619 : InstrItinClass; -def tc_f4608adc : InstrItinClass; -def tc_faab1248 : InstrItinClass; -def tc_fcee8723 : InstrItinClass; -def tc_feb4974b : InstrItinClass; +def tc_0077f68c : InstrItinClass; +def tc_00afc57e : InstrItinClass; +def tc_00e7c26e : InstrItinClass; +def tc_03220ffa : InstrItinClass; +def tc_038a1342 : InstrItinClass; +def tc_04c9decc : InstrItinClass; +def tc_05b6c987 : InstrItinClass; +def tc_0a2b8c7c : InstrItinClass; +def tc_0cd51c76 : InstrItinClass; +def tc_0dc560de : InstrItinClass; +def tc_0fc1ae07 : InstrItinClass; +def tc_10b97e27 : InstrItinClass; +def tc_128f96e3 : InstrItinClass; +def tc_1372bca1 : InstrItinClass; +def tc_1432937d : InstrItinClass; +def tc_14cd4cfa : InstrItinClass; +def tc_15411484 : InstrItinClass; +def tc_16d0d8d5 : InstrItinClass; +def tc_181af5d0 : InstrItinClass; +def tc_1853ea6d : InstrItinClass; +def tc_1b82a277 : InstrItinClass; +def tc_1b9c9ee5 : InstrItinClass; +def tc_1c0005f9 : InstrItinClass; +def tc_1d5a38a8 : InstrItinClass; +def tc_1e856f58 : InstrItinClass; +def tc_20280784 : InstrItinClass; +def tc_234a11a5 : InstrItinClass; +def tc_238d91d2 : InstrItinClass; +def tc_29175780 : InstrItinClass; +def tc_29641329 : InstrItinClass; +def tc_2a160009 : InstrItinClass; +def tc_2b2f4060 : InstrItinClass; +def tc_2b6f77c6 : InstrItinClass; +def tc_2e00db30 : InstrItinClass; +def tc_2f185f5c : InstrItinClass; +def tc_2fc0c436 : InstrItinClass; +def tc_351fed2d : InstrItinClass; +def tc_3669266a : InstrItinClass; +def tc_367f7f3d : InstrItinClass; +def tc_36c68ad1 : InstrItinClass; +def tc_395dc00f : InstrItinClass; +def tc_3bc2c5d3 : InstrItinClass; +def tc_3cb8ea06 : InstrItinClass; +def tc_3d04548d : InstrItinClass; +def tc_3da80ba5 : InstrItinClass; +def tc_3e07fb90 : InstrItinClass; +def tc_41d5298e : InstrItinClass; +def tc_4403ca65 : InstrItinClass; +def tc_44126683 : InstrItinClass; +def tc_452f85af : InstrItinClass; +def tc_481e5e5c : InstrItinClass; +def tc_49eb22c8 : InstrItinClass; +def tc_4ca572d4 : InstrItinClass; +def tc_4d9914c9 : InstrItinClass; +def tc_4d99bca9 : InstrItinClass; +def tc_4f7cd700 : InstrItinClass; +def tc_513bef45 : InstrItinClass; +def tc_51b866be : InstrItinClass; +def tc_523fcf30 : InstrItinClass; +def tc_5274e61a : InstrItinClass; +def tc_52d7bbea : InstrItinClass; +def tc_53173427 : InstrItinClass; +def tc_53bc8a6a : InstrItinClass; +def tc_53bdb2f6 : InstrItinClass; +def tc_540fdfbc : InstrItinClass; +def tc_55050d58 : InstrItinClass; +def tc_56d25411 : InstrItinClass; +def tc_57288781 : InstrItinClass; +def tc_594ab548 : InstrItinClass; +def tc_5acef64a : InstrItinClass; +def tc_5ba5997d : InstrItinClass; +def tc_5eb851fc : InstrItinClass; +def tc_5f6847a1 : InstrItinClass; +def tc_60571023 : InstrItinClass; +def tc_609d2efe : InstrItinClass; +def tc_60d76817 : InstrItinClass; +def tc_60f5738d : InstrItinClass; +def tc_63fe3df7 : InstrItinClass; +def tc_66888ded : InstrItinClass; +def tc_6792d5ff : InstrItinClass; +def tc_681a2300 : InstrItinClass; +def tc_68cb12ce : InstrItinClass; +def tc_6aa5711a : InstrItinClass; +def tc_6ac37025 : InstrItinClass; +def tc_6ebb4a12 : InstrItinClass; +def tc_6efc556e : InstrItinClass; +def tc_73043bf4 : InstrItinClass; +def tc_746baa8e : InstrItinClass; +def tc_74e47fd9 : InstrItinClass; +def tc_7934b9df : InstrItinClass; +def tc_7a830544 : InstrItinClass; +def tc_7f881c76 : InstrItinClass; +def tc_84df2cd3 : InstrItinClass; +def tc_85523bcb : InstrItinClass; +def tc_855b0b61 : InstrItinClass; +def tc_87735c3b : InstrItinClass; +def tc_88fa1a78 : InstrItinClass; +def tc_897d1a9d : InstrItinClass; +def tc_8b15472a : InstrItinClass; +def tc_8bb285ec : InstrItinClass; +def tc_8fd5f294 : InstrItinClass; +def tc_8fe6b782 : InstrItinClass; +def tc_90f3e30c : InstrItinClass; +def tc_976ddc4f : InstrItinClass; +def tc_97743097 : InstrItinClass; +def tc_999d32db : InstrItinClass; +def tc_99be14ca : InstrItinClass; +def tc_9c00ce8d : InstrItinClass; +def tc_9c98e8af : InstrItinClass; +def tc_9d5941c7 : InstrItinClass; +def tc_9ef61e5c : InstrItinClass; +def tc_9faf76ae : InstrItinClass; +def tc_9fdb5406 : InstrItinClass; +def tc_a21dc435 : InstrItinClass; +def tc_a27582fa : InstrItinClass; +def tc_a46f0df5 : InstrItinClass; +def tc_a788683e : InstrItinClass; +def tc_a8acdac0 : InstrItinClass; +def tc_a904d137 : InstrItinClass; +def tc_adb14c66 : InstrItinClass; +def tc_b13761ae : InstrItinClass; +def tc_b166348b : InstrItinClass; +def tc_b44c6e2a : InstrItinClass; +def tc_b5a33b22 : InstrItinClass; +def tc_b77c481f : InstrItinClass; +def tc_b7dd427e : InstrItinClass; +def tc_b9488031 : InstrItinClass; +def tc_b9c0b731 : InstrItinClass; +def tc_b9c4623f : InstrItinClass; +def tc_bad2bcaf : InstrItinClass; +def tc_bcc96cee : InstrItinClass; +def tc_bd90564c : InstrItinClass; +def tc_bde7aaf4 : InstrItinClass; +def tc_be706f30 : InstrItinClass; +def tc_c2f7d806 : InstrItinClass; +def tc_c5e2426d : InstrItinClass; +def tc_c6aa82f7 : InstrItinClass; +def tc_c6ce9b3f : InstrItinClass; +def tc_c6ebf8dd : InstrItinClass; +def tc_c74f796f : InstrItinClass; +def tc_c82dc1ff : InstrItinClass; +def tc_caaebcba : InstrItinClass; +def tc_cd7374a0 : InstrItinClass; +def tc_cde8b071 : InstrItinClass; +def tc_cf47a43f : InstrItinClass; +def tc_cf59f215 : InstrItinClass; +def tc_d088982c : InstrItinClass; +def tc_d1090e34 : InstrItinClass; +def tc_d24b2d85 : InstrItinClass; +def tc_d580173f : InstrItinClass; +def tc_d6bf0472 : InstrItinClass; +def tc_d9709180 : InstrItinClass; +def tc_d9f95eef : InstrItinClass; +def tc_daa058fa : InstrItinClass; +def tc_dbdffe3d : InstrItinClass; +def tc_e0739b8c : InstrItinClass; +def tc_e1e0a2dc : InstrItinClass; +def tc_e1e99bfa : InstrItinClass; +def tc_e216a5db : InstrItinClass; +def tc_e421e012 : InstrItinClass; +def tc_e6b38e01 : InstrItinClass; +def tc_e7624c08 : InstrItinClass; +def tc_e7d02c66 : InstrItinClass; +def tc_e913dc32 : InstrItinClass; +def tc_e9c822f7 : InstrItinClass; +def tc_e9fae2d6 : InstrItinClass; +def tc_ef20db1c : InstrItinClass; +def tc_ef52ed71 : InstrItinClass; +def tc_ef84f62f : InstrItinClass; +def tc_f2704b9a : InstrItinClass; +def tc_f3eaa14b : InstrItinClass; +def tc_f47d212f : InstrItinClass; +def tc_f49e76f4 : InstrItinClass; +def tc_f4f43fb5 : InstrItinClass; +def tc_f7dd9c9f : InstrItinClass; +def tc_f86c328a : InstrItinClass; +def tc_f8eeed7a : InstrItinClass; +def tc_fcab4871 : InstrItinClass; +def tc_ff9ee76e : InstrItinClass; class DepScalarItinV4 { list DepScalarItinV4_list = [ - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]> ]; + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]> ]; } class DepScalarItinV5 { list DepScalarItinV5_list = [ - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]>, - InstrItinData ]> ]; + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]> ]; } class DepScalarItinV55 { list DepScalarItinV55_list = [ - InstrItinData ], [1], + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 1, 2], + InstrItinData ], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2, 2], + InstrItinData ], [1, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [3, 1], + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [4, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [3, 3, 2, 2], + InstrItinData ], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], + InstrItinData ], [3, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 2, 3], + InstrItinData ], [4, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 1, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1], + InstrItinData ], [3], [Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1], + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [2, 1, 2, 2], + InstrItinData ], [3, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 3, 2], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2, 2], + InstrItinData ], [4, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1], + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [4, 3, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 2, 2], + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], + InstrItinData ], [3, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, - - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [1, 2], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 3], + InstrItinData ], [4, 4, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 3], + InstrItinData ], [4, 4, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3], - [Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [3, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, - - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 1, 2, 3], + InstrItinData ], [4, 2, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [], - []>, - - InstrItinData ], [2], + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [3, 2, 1, 2, 2], + InstrItinData ], [4, 3, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 1], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 3], + InstrItinData ], [], + []>, + + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2, 2], + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [3, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [3, 3, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, - - InstrItinData ], [1, 2], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2], + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2, 2], + InstrItinData ], [3, 1, 2, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1, 2], + InstrItinData ], [4, 2, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 1, 2, 3], + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], + InstrItinData ], [4, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, - - InstrItinData ], [4, 3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [4, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, - - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3], - [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]> - ]; -} + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, -class DepScalarItinV60 { - list DepScalarItinV60_list = [ - InstrItinData ], [1], - [Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [5, 5, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 3, 1, 1, 2], + InstrItinData ], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3], + [Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], + InstrItinData ], [3, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 1, 2, 2], + InstrItinData ], [4, 2, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1], + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2], + InstrItinData ], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2, 3], + InstrItinData ], [4, 4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 2, 2], + InstrItinData ], [4, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2, 3], + InstrItinData ], [1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 2, 1, 1], + InstrItinData ], [3, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], + InstrItinData ], [], + []>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV60 { + list DepScalarItinV60_list = [ + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [5, 1, 1], + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2, 2], + InstrItinData ], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1, 1, 2, 2], + InstrItinData ], [1, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [3, 1], + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [1, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, - - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [1, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [4, 4, 2, 2], + InstrItinData ], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], + InstrItinData ], [4, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2], + InstrItinData ], [3], [Hex_FWD]>, - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 3], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 3], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3], - [Hex_FWD]>, + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 2], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], + InstrItinData ], [3, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [], + []>, - InstrItinData ], [], []>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [3, 1, 1, 2, 3], + InstrItinData ], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2], + InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 1, 2, 3], + InstrItinData ], [4, 3, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2], + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [], - []>, - - InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 3], + InstrItinData ], [4, 4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2, 2], + InstrItinData ], [2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 1, 1, 1], + InstrItinData ], [4, 2, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [1], + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [3, 3, 2], + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2], + InstrItinData ], [2, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1, 2], + InstrItinData ], [3, 3, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2, 2], + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 3], + InstrItinData ], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], []>, - InstrItinData ], [4, 3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2, 2], + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 1, 2, 3], + InstrItinData ], [4, 3, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 1], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [4, 4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 5, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3], - [Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV60se { + list DepScalarItinV60se_list = [ + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [], + []>, + + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [], + []>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [], + []>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV62 { + list DepScalarItinV62_list = [ + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV65 { + list DepScalarItinV65_list = [ + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]> - ]; -} + InstrItinData ], [2], + [Hex_FWD]>, -class DepScalarItinV62 { - list DepScalarItinV62_list = [ - InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2, 2], + InstrItinData ], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 5, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [4, 1, 1, 2], + InstrItinData ], [4, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2, 3], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [5, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [], []>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3, 3, 2], + InstrItinData ], [3, 3, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 3, 2], + InstrItinData ], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [4, 2], + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], - [Hex_FWD]>, - - InstrItinData ], [3, 2, 2], + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [3, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], + InstrItinData ], [3, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [2], - [Hex_FWD]>, - - InstrItinData ], [4, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 3], + InstrItinData ], [4, 2, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 3], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [3], + InstrItinData ], [1], [Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], + InstrItinData ], [5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, - - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [2, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 3, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [1, 1, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [], + InstrItinData ], [], []>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 1, 2, 2], + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1, 2, 3], + InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2], + InstrItinData ], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 3], + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2], + InstrItinData ], [2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 4, 2, 1, 1], + InstrItinData ], [3, 1, 2, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 3, 2, 1, 2], + InstrItinData ], [4, 2, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [3, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], + InstrItinData ], [2], [Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [4, 2, 1, 1, 2], + InstrItinData ], [2, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], + InstrItinData ], [], []>, - InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 2], + InstrItinData ], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2, 2], + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], - [Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2, 2], + InstrItinData ], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData ], [], - []>, + InstrItinData ], [1], + [Hex_FWD]>, - InstrItinData ], [4, 3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2, 2], + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3, 2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1], + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 3, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, - InstrItinData ], [5, 1], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2], + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [1], + InstrItinData ], [3], [Hex_FWD]>, - InstrItinData ], [1, 2], - [Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [], - []>, + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2], + InstrItinData ], [4], [Hex_FWD]>, - InstrItinData ], [2, 1], + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData ], [3, 2], + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [5, 5, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [3], - [Hex_FWD]>, + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 1], + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [1, 2, 2], + InstrItinData ], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [2, 2], + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 3], [Hex_FWD, Hex_FWD]> ]; } diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h index be831b9501ea..7e06ccede6e7 100644 --- a/lib/Target/Hexagon/HexagonDepITypes.h +++ b/lib/Target/Hexagon/HexagonDepITypes.h @@ -1,4 +1,4 @@ -//===--- HexagonDepITypes.h -----------------------------------------------===// +//===- HexagonDepITypes.h -------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + namespace llvm { namespace HexagonII { @@ -15,8 +18,17 @@ enum Type { TypeALU32_ADDI = 2, TypeALU64 = 3, TypeCJ = 4, + TypeCOPROC_VX = 5, TypeCR = 6, + TypeCVI_4SLOT_MPY = 7, + TypeCVI_GATHER = 8, + TypeCVI_GATHER_RST = 9, TypeCVI_HIST = 10, + TypeCVI_SCATTER = 11, + TypeCVI_SCATTER_DV = 12, + TypeCVI_SCATTER_NEW_RST = 13, + TypeCVI_SCATTER_NEW_ST = 14, + TypeCVI_SCATTER_RST = 15, TypeCVI_VA = 16, TypeCVI_VA_DV = 17, TypeCVI_VINLANESAT = 18, @@ -29,6 +41,7 @@ enum Type { TypeCVI_VP = 25, TypeCVI_VP_VS = 26, TypeCVI_VS = 27, + TypeCVI_VS_VX = 28, TypeCVI_VX = 29, TypeCVI_VX_DV = 30, TypeCVI_VX_LATE = 31, diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td index ac1989e4dd82..0a385bf938fe 100644 --- a/lib/Target/Hexagon/HexagonDepITypes.td +++ b/lib/Target/Hexagon/HexagonDepITypes.td @@ -1,4 +1,4 @@ -//===--- HexagonDepITypes.td ----------------------------------------------===// +//===- HexagonDepITypes.td ------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + class IType t> { bits<6> Value = t; } def TypeALU32_2op : IType<0>; @@ -13,8 +16,17 @@ def TypeALU32_3op : IType<1>; def TypeALU32_ADDI : IType<2>; def TypeALU64 : IType<3>; def TypeCJ : IType<4>; +def TypeCOPROC_VX : IType<5>; def TypeCR : IType<6>; +def TypeCVI_4SLOT_MPY : IType<7>; +def TypeCVI_GATHER : IType<8>; +def TypeCVI_GATHER_RST : IType<9>; def TypeCVI_HIST : IType<10>; +def TypeCVI_SCATTER : IType<11>; +def TypeCVI_SCATTER_DV : IType<12>; +def TypeCVI_SCATTER_NEW_RST : IType<13>; +def TypeCVI_SCATTER_NEW_ST : IType<14>; +def TypeCVI_SCATTER_RST : IType<15>; def TypeCVI_VA : IType<16>; def TypeCVI_VA_DV : IType<17>; def TypeCVI_VINLANESAT : IType<18>; @@ -27,6 +39,7 @@ def TypeCVI_VM_VP_LDU : IType<24>; def TypeCVI_VP : IType<25>; def TypeCVI_VP_VS : IType<26>; def TypeCVI_VS : IType<27>; +def TypeCVI_VS_VX : IType<28>; def TypeCVI_VX : IType<29>; def TypeCVI_VX_DV : IType<30>; def TypeCVI_VX_LATE : IType<31>; diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td index 1b24be477158..9f98da3a1dee 100644 --- a/lib/Target/Hexagon/HexagonDepInstrFormats.td +++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td @@ -1,4 +1,4 @@ -//===--- HexagonDepInstrFormats.td ----------------------------------------===// +//===- HexagonDepInstrFormats.td ------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,9 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + class Enc_890909 : OpcodeHexagon { bits <5> Rs32; @@ -15,6 +18,18 @@ class Enc_890909 : OpcodeHexagon { bits <2> Pe4; let Inst{6-5} = Pe4{1-0}; } +class Enc_9be1de : OpcodeHexagon { + bits <2> Qs4; + let Inst{6-5} = Qs4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vw32; + let Inst{4-0} = Vw32{4-0}; +} class Enc_527412 : OpcodeHexagon { bits <2> Ps4; let Inst{17-16} = Ps4{1-0}; @@ -46,14 +61,23 @@ class Enc_27b757 : OpcodeHexagon { bits <5> Vs32; let Inst{4-0} = Vs32{4-0}; } -class Enc_5de85f : OpcodeHexagon { +class Enc_8d04c3 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} +class Enc_1de724 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <4> n1; + let Inst{28-28} = n1{3-3}; + let Inst{24-22} = n1{2-0}; } class Enc_0e41fa : OpcodeHexagon { bits <5> Vuu32; @@ -63,12 +87,48 @@ class Enc_0e41fa : OpcodeHexagon { bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; } +class Enc_2a736a : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} +class Enc_3d6d37 : OpcodeHexagon { + bits <2> Qs4; + let Inst{6-5} = Qs4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <5> Vw32; + let Inst{4-0} = Vw32{4-0}; +} +class Enc_a641d0 : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <5> Vw32; + let Inst{4-0} = Vw32{4-0}; +} class Enc_802dc0 : OpcodeHexagon { bits <1> Ii; let Inst{8-8} = Ii{0-0}; bits <2> Qv4; let Inst{23-22} = Qv4{1-0}; } +class Enc_6a4549 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_6b197f : OpcodeHexagon { bits <4> Ii; let Inst{8-5} = Ii{3-0}; @@ -77,6 +137,14 @@ class Enc_6b197f : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_1f3376 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vxx32; + let Inst{7-3} = Vxx32{4-0}; +} class Enc_1f5d8f : OpcodeHexagon { bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; @@ -165,6 +233,14 @@ class Enc_7eee72 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_310ba1 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; +} class Enc_d7dc10 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; @@ -191,6 +267,14 @@ class Enc_8dec2e : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_28dcbb : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vvv32; + let Inst{4-0} = Vvv32{4-0}; +} class Enc_eaa9f8 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; @@ -207,6 +291,14 @@ class Enc_509701 : OpcodeHexagon { bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_c84567 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_830e5d : OpcodeHexagon { bits <8> Ii; let Inst{12-5} = Ii{7-0}; @@ -218,6 +310,12 @@ class Enc_830e5d : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_ae0040 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <6> Sd64; + let Inst{5-0} = Sd64{5-0}; +} class Enc_79b8c8 : OpcodeHexagon { bits <6> Ii; let Inst{6-3} = Ii{5-2}; @@ -238,6 +336,16 @@ class Enc_58a8bf : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_e8ddd5 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Vss32; + let Inst{7-3} = Vss32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_041d7b : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -261,6 +369,14 @@ class Enc_f44229 : OpcodeHexagon { bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } +class Enc_fc563d : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_aad80c : OpcodeHexagon { bits <5> Vuu32; let Inst{12-8} = Vuu32{4-0}; @@ -432,6 +548,13 @@ class Enc_6a5972 : OpcodeHexagon { bits <4> Rt16; let Inst{11-8} = Rt16{3-0}; } +class Enc_ff3442 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; +} class Enc_53dca9 : OpcodeHexagon { bits <6> Ii; let Inst{11-8} = Ii{5-2}; @@ -456,6 +579,12 @@ class Enc_93af4c : OpcodeHexagon { bits <4> Rx16; let Inst{3-0} = Rx16{3-0}; } +class Enc_621fba : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Gd32; + let Inst{4-0} = Gd32{4-0}; +} class Enc_5bdd42 : OpcodeHexagon { bits <7> Ii; let Inst{8-5} = Ii{6-3}; @@ -464,6 +593,14 @@ class Enc_5bdd42 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_ad9bef : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; +} class Enc_71f1b4 : OpcodeHexagon { bits <6> Ii; let Inst{8-5} = Ii{5-2}; @@ -483,6 +620,12 @@ class Enc_14640c : OpcodeHexagon { let Inst{24-22} = n1{3-1}; let Inst{13-13} = n1{0-0}; } +class Enc_2516bf : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_31db33 : OpcodeHexagon { bits <2> Qt4; let Inst{6-5} = Qt4{1-0}; @@ -513,6 +656,24 @@ class Enc_784502 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_9a9d62 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Vs32; + let Inst{7-3} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_3a81ac : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_6413b6 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -592,6 +753,16 @@ class Enc_e39bb2 : OpcodeHexagon { bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } +class Enc_7db2f8 : OpcodeHexagon { + bits <5> Vu32; + let Inst{13-9} = Vu32{4-0}; + bits <5> Vv32; + let Inst{8-4} = Vv32{4-0}; + bits <4> Vdd16; + let Inst{3-0} = Vdd16{3-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_1b64fb : OpcodeHexagon { bits <16> Ii; let Inst{26-25} = Ii{15-14}; @@ -670,6 +841,10 @@ class Enc_fcf7a7 : OpcodeHexagon { bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } +class Enc_2c3281 : OpcodeHexagon { + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_55355c : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; @@ -745,6 +920,10 @@ class Enc_fef969 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_b2ffce : OpcodeHexagon { + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_63eaeb : OpcodeHexagon { bits <2> Ii; let Inst{1-0} = Ii{1-0}; @@ -769,6 +948,12 @@ class Enc_372c9d : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_9e9047 : OpcodeHexagon { + bits <2> Pt4; + let Inst{9-8} = Pt4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; +} class Enc_4dff07 : OpcodeHexagon { bits <2> Qv4; let Inst{12-11} = Qv4{1-0}; @@ -815,6 +1000,16 @@ class Enc_b388cf : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_880793 : OpcodeHexagon { + bits <3> Qt8; + let Inst{2-0} = Qt8{2-0}; + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_ad1c74 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -854,6 +1049,16 @@ class Enc_5e87ce : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_158beb : OpcodeHexagon { + bits <2> Qs4; + let Inst{6-5} = Qs4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vv32; + let Inst{4-0} = Vv32{4-0}; +} class Enc_f7ea77 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -897,6 +1102,14 @@ class Enc_226535 : OpcodeHexagon { bits <5> Rt32; let Inst{4-0} = Rt32{4-0}; } +class Enc_96f0fd : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; + bits <3> Qdd8; + let Inst{2-0} = Qdd8{2-0}; +} class Enc_31aa6a : OpcodeHexagon { bits <5> Ii; let Inst{6-3} = Ii{4-1}; @@ -907,6 +1120,12 @@ class Enc_31aa6a : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_932b58 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; +} class Enc_397f23 : OpcodeHexagon { bits <8> Ii; let Inst{13-13} = Ii{7-7}; @@ -973,6 +1192,14 @@ class Enc_01d3d0 : OpcodeHexagon { bits <5> Vdd32; let Inst{4-0} = Vdd32{4-0}; } +class Enc_3126d7 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_b0e9d8 : OpcodeHexagon { bits <10> Ii; let Inst{21-21} = Ii{9-9}; @@ -1049,6 +1276,12 @@ class Enc_88c16c : OpcodeHexagon { bits <5> Rxx32; let Inst{4-0} = Rxx32{4-0}; } +class Enc_e7408c : OpcodeHexagon { + bits <6> Sss64; + let Inst{21-16} = Sss64{5-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} class Enc_770858 : OpcodeHexagon { bits <2> Ps4; let Inst{6-5} = Ps4{1-0}; @@ -1090,6 +1323,16 @@ class Enc_412ff0 : OpcodeHexagon { bits <5> Rxx32; let Inst{12-8} = Rxx32{4-0}; } +class Enc_8e9fbd : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; + bits <5> Vy32; + let Inst{12-8} = Vy32{4-0}; +} class Enc_c9a18e : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -1134,6 +1377,16 @@ class Enc_d6990d : OpcodeHexagon { bits <5> Vxx32; let Inst{4-0} = Vxx32{4-0}; } +class Enc_6c4697 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_6c9440 : OpcodeHexagon { bits <10> Ii; let Inst{21-21} = Ii{9-9}; @@ -1278,6 +1531,12 @@ class Enc_a803e0 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; } +class Enc_fde0e3 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_45364e : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; @@ -1298,6 +1557,12 @@ class Enc_b909d2 : OpcodeHexagon { let Inst{13-13} = n1{1-1}; let Inst{8-8} = n1{0-0}; } +class Enc_790d6e : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_e6c957 : OpcodeHexagon { bits <10> Ii; let Inst{21-21} = Ii{9-9}; @@ -1358,6 +1623,14 @@ class Enc_0ed752 : OpcodeHexagon { bits <5> Cdd32; let Inst{4-0} = Cdd32{4-0}; } +class Enc_908985 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vss32; + let Inst{7-3} = Vss32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_143445 : OpcodeHexagon { bits <13> Ii; let Inst{26-25} = Ii{12-11}; @@ -1385,6 +1658,16 @@ class Enc_3e3989 : OpcodeHexagon { let Inst{25-22} = n1{4-1}; let Inst{8-8} = n1{0-0}; } +class Enc_12dd8f : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; +} class Enc_152467 : OpcodeHexagon { bits <5> Ii; let Inst{8-5} = Ii{4-1}; @@ -1393,6 +1676,14 @@ class Enc_152467 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_6b1bc4 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <3> Qt8; + let Inst{10-8} = Qt8{2-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_daea09 : OpcodeHexagon { bits <17> Ii; let Inst{23-22} = Ii{16-15}; @@ -1421,6 +1712,32 @@ class Enc_a198f6 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_a265b7 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} +class Enc_4e4a80 : OpcodeHexagon { + bits <2> Qs4; + let Inst{6-5} = Qs4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vvv32; + let Inst{4-0} = Vvv32{4-0}; +} +class Enc_8d5d98 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vxx32; + let Inst{7-3} = Vxx32{4-0}; +} class Enc_3dac0b : OpcodeHexagon { bits <2> Qt4; let Inst{6-5} = Qt4{1-0}; @@ -1463,6 +1780,16 @@ class Enc_2df31d : OpcodeHexagon { bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } +class Enc_b0e553 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_25bef0 : OpcodeHexagon { bits <16> Ii; let Inst{26-25} = Ii{15-14}; @@ -1482,6 +1809,12 @@ class Enc_f82302 : OpcodeHexagon { let Inst{26-25} = n1{2-1}; let Inst{23-23} = n1{0-0}; } +class Enc_44271f : OpcodeHexagon { + bits <5> Gs32; + let Inst{20-16} = Gs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} class Enc_83ee64 : OpcodeHexagon { bits <5> Ii; let Inst{12-8} = Ii{4-0}; @@ -1524,6 +1857,14 @@ class Enc_4df4e9 : OpcodeHexagon { bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } +class Enc_263841 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} class Enc_91b9fe : OpcodeHexagon { bits <5> Ii; let Inst{6-3} = Ii{4-1}; @@ -1564,6 +1905,11 @@ class Enc_bd1cbc : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_d0fe02 : OpcodeHexagon { + bits <5> Rxx32; + let Inst{20-16} = Rxx32{4-0}; + bits <0> sgp10; +} class Enc_a30110 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; @@ -1583,6 +1929,16 @@ class Enc_f3f408 : OpcodeHexagon { bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; } +class Enc_ce4c54 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_690862 : OpcodeHexagon { bits <13> Ii; let Inst{26-25} = Ii{12-11}; @@ -1593,6 +1949,20 @@ class Enc_690862 : OpcodeHexagon { bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } +class Enc_e570b0 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} +class Enc_3c46e8 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_2a3787 : OpcodeHexagon { bits <13> Ii; let Inst{26-25} = Ii{12-11}; @@ -1640,6 +2010,22 @@ class Enc_729ff7 : OpcodeHexagon { bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_5883d0 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} +class Enc_ff0e49 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <6> Sdd64; + let Inst{5-0} = Sdd64{5-0}; +} class Enc_217147 : OpcodeHexagon { bits <2> Qv4; let Inst{23-22} = Qv4{1-0}; @@ -1674,6 +2060,14 @@ class Enc_541f26 : OpcodeHexagon { bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; } +class Enc_9aae4a : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; + bits <3> Qd8; + let Inst{2-0} = Qd8{2-0}; +} class Enc_724154 : OpcodeHexagon { bits <6> II; let Inst{5-0} = II{5-0}; @@ -1781,6 +2175,12 @@ class Enc_22c845 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_ed5027 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Gdd32; + let Inst{4-0} = Gdd32{4-0}; +} class Enc_9b0bc1 : OpcodeHexagon { bits <2> Pu4; let Inst{6-5} = Pu4{1-0}; @@ -1828,6 +2228,12 @@ class Enc_96ce4f : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_2bbae6 : OpcodeHexagon { + bits <6> Ss64; + let Inst{21-16} = Ss64{5-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} class Enc_143a3c : OpcodeHexagon { bits <6> Ii; let Inst{13-8} = Ii{5-0}; @@ -1959,6 +2365,26 @@ class Enc_b43b67 : OpcodeHexagon { bits <2> Qx4; let Inst{6-5} = Qx4{1-0}; } +class Enc_1cd70f : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} +class Enc_3a527f : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Vs32; + let Inst{7-3} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_4aca3a : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -1977,6 +2403,12 @@ class Enc_b38ffc : OpcodeHexagon { bits <4> Rt16; let Inst{3-0} = Rt16{3-0}; } +class Enc_5c3a80 : OpcodeHexagon { + bits <3> Qt8; + let Inst{10-8} = Qt8{2-0}; + bits <3> Qd8; + let Inst{5-3} = Qd8{2-0}; +} class Enc_cda00a : OpcodeHexagon { bits <12> Ii; let Inst{19-16} = Ii{11-8}; @@ -1994,6 +2426,24 @@ class Enc_2fbf3c : OpcodeHexagon { bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } +class Enc_a4ae28 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <3> Qd8; + let Inst{5-3} = Qd8{2-0}; +} +class Enc_dd5f9f : OpcodeHexagon { + bits <3> Qtt8; + let Inst{2-0} = Qtt8{2-0}; + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_70b24b : OpcodeHexagon { bits <6> Ii; let Inst{8-5} = Ii{5-2}; @@ -2040,6 +2490,16 @@ class Enc_08d755 : OpcodeHexagon { bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } +class Enc_a7ca29 : OpcodeHexagon { + bits <3> Qt8; + let Inst{2-0} = Qt8{2-0}; + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_1178da : OpcodeHexagon { bits <3> Ii; let Inst{7-5} = Ii{2-0}; @@ -2058,6 +2518,14 @@ class Enc_8dbe85 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_17a474 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vs32; + let Inst{7-3} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_5a18b3 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -2118,6 +2586,14 @@ class Enc_12b6e9 : OpcodeHexagon { bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_9a895f : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_6f70ca : OpcodeHexagon { bits <8> Ii; let Inst{8-4} = Ii{7-3}; @@ -2130,6 +2606,12 @@ class Enc_7222b7 : OpcodeHexagon { } class Enc_e3b0c4 : OpcodeHexagon { } +class Enc_d7e8ba : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_a255dc : OpcodeHexagon { bits <3> Ii; let Inst{10-8} = Ii{2-0}; @@ -2138,6 +2620,24 @@ class Enc_a255dc : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_cb785b : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} +class Enc_5b76ab : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-8} = Ii{8-3}; + let Inst{2-0} = Ii{2-0}; + bits <5> Vs32; + let Inst{7-3} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_cb4b4e : OpcodeHexagon { bits <2> Pu4; let Inst{6-5} = Pu4{1-0}; @@ -2148,6 +2648,24 @@ class Enc_cb4b4e : OpcodeHexagon { bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_fbacc2 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vxx32; + let Inst{7-3} = Vxx32{4-0}; + bits <5> Vy32; + let Inst{12-8} = Vy32{4-0}; +} +class Enc_2ad23d : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; +} class Enc_9cdba7 : OpcodeHexagon { bits <8> Ii; let Inst{12-5} = Ii{7-0}; @@ -2165,6 +2683,10 @@ class Enc_5cd7e9 : OpcodeHexagon { bits <5> Ryy32; let Inst{4-0} = Ryy32{4-0}; } +class Enc_e7c9de : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; +} class Enc_454a26 : OpcodeHexagon { bits <2> Pt4; let Inst{9-8} = Pt4{1-0}; @@ -2193,6 +2715,16 @@ class Enc_c175d0 : OpcodeHexagon { bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } +class Enc_16c48b : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <5> Vw32; + let Inst{4-0} = Vw32{4-0}; +} class Enc_895bd9 : OpcodeHexagon { bits <2> Qu4; let Inst{9-8} = Qu4{1-0}; @@ -2254,6 +2786,14 @@ class Enc_d2c7f1 : OpcodeHexagon { bits <2> Pe4; let Inst{6-5} = Pe4{1-0}; } +class Enc_dcfcbb : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_3680c2 : OpcodeHexagon { bits <7> Ii; let Inst{11-5} = Ii{6-0}; @@ -2282,6 +2822,32 @@ class Enc_e957fb : OpcodeHexagon { bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; } +class Enc_2146c1 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <3> Qss8; + let Inst{2-0} = Qss8{2-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} +class Enc_a662ae : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} +class Enc_8f7cc3 : OpcodeHexagon { + bits <3> Qtt8; + let Inst{10-8} = Qtt8{2-0}; + bits <3> Qdd8; + let Inst{5-3} = Qdd8{2-0}; +} class Enc_c9e3bc : OpcodeHexagon { bits <4> Ii; let Inst{13-13} = Ii{3-3}; @@ -2314,6 +2880,40 @@ class Enc_0b2e5b : OpcodeHexagon { bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; } +class Enc_6f83e7 : OpcodeHexagon { + bits <2> Qv4; + let Inst{23-22} = Qv4{1-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} +class Enc_46f33d : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_c1652e : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <3> Qd8; + let Inst{5-3} = Qd8{2-0}; +} +class Enc_b5b643 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; +} +class Enc_85daf5 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; +} class Enc_d483b9 : OpcodeHexagon { bits <1> Ii; let Inst{5-5} = Ii{0-0}; @@ -2346,6 +2946,26 @@ class Enc_70fb07 : OpcodeHexagon { bits <5> Rxx32; let Inst{4-0} = Rxx32{4-0}; } +class Enc_6c9ee0 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_72a92d : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vxx32; + let Inst{7-3} = Vxx32{4-0}; +} +class Enc_44661f : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_277737 : OpcodeHexagon { bits <8> Ii; let Inst{22-21} = Ii{7-6}; @@ -2496,6 +3116,14 @@ class Enc_8e583a : OpcodeHexagon { let Inst{25-23} = n1{3-1}; let Inst{13-13} = n1{0-0}; } +class Enc_334c2b : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_b886fd : OpcodeHexagon { bits <5> Ii; let Inst{6-3} = Ii{4-1}; @@ -2549,12 +3177,36 @@ class Enc_8dbdfe : OpcodeHexagon { bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } +class Enc_7dc746 : OpcodeHexagon { + bits <3> Quu8; + let Inst{10-8} = Quu8{2-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <3> Qdd8; + let Inst{5-3} = Qdd8{2-0}; +} class Enc_90cd8b : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_b8513b : OpcodeHexagon { + bits <5> Vuu32; + let Inst{20-16} = Vuu32{4-0}; + bits <5> Vvv32; + let Inst{12-8} = Vvv32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} +class Enc_b3bac4 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; +} class Enc_bd0b33 : OpcodeHexagon { bits <10> Ii; let Inst{21-21} = Ii{9-9}; @@ -2564,6 +3216,24 @@ class Enc_bd0b33 : OpcodeHexagon { bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } +class Enc_843e80 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{7-3} = Vd32{4-0}; + bits <3> Qxx8; + let Inst{2-0} = Qxx8{2-0}; +} +class Enc_8b8927 : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vv32; + let Inst{4-0} = Vv32{4-0}; +} class Enc_c7cd90 : OpcodeHexagon { bits <4> Ii; let Inst{6-3} = Ii{3-0}; @@ -2711,15 +3381,24 @@ class Enc_1a9974 : OpcodeHexagon { bits <5> Rtt32; let Inst{4-0} = Rtt32{4-0}; } -class Enc_1de724 : OpcodeHexagon { +class Enc_9ce456 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-8} = Ii{8-3}; + let Inst{2-0} = Ii{2-0}; + bits <5> Vss32; + let Inst{7-3} = Vss32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_5de85f : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <4> n1; - let Inst{28-28} = n1{3-3}; - let Inst{24-22} = n1{2-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; } class Enc_dd766a : OpcodeHexagon { bits <5> Vu32; @@ -2737,6 +3416,14 @@ class Enc_0b51ce : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_b5e54d : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} class Enc_b4e6cf : OpcodeHexagon { bits <10> Ii; let Inst{21-21} = Ii{9-9}; @@ -2755,6 +3442,12 @@ class Enc_44215c : OpcodeHexagon { bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } +class Enc_0aa344 : OpcodeHexagon { + bits <5> Gss32; + let Inst{20-16} = Gss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} class Enc_a21d47 : OpcodeHexagon { bits <6> Ii; let Inst{10-5} = Ii{5-0}; @@ -2786,6 +3479,16 @@ class Enc_645d54 : OpcodeHexagon { bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_b5d5a7 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vs32; + let Inst{7-3} = Vs32{4-0}; +} class Enc_667b39 : OpcodeHexagon { bits <5> Css32; let Inst{20-16} = Css32{4-0}; @@ -2843,6 +3546,16 @@ class Enc_b8c967 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_f106e0 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{8-4} = Vv32{4-0}; + bits <5> Vt32; + let Inst{13-9} = Vt32{4-0}; + bits <4> Vdd16; + let Inst{3-0} = Vdd16{3-0}; +} class Enc_fb6577 : OpcodeHexagon { bits <2> Pu4; let Inst{9-8} = Pu4{1-0}; @@ -2851,6 +3564,20 @@ class Enc_fb6577 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_37c406 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vv32; + let Inst{12-8} = Vv32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <4> Vdd16; + let Inst{7-4} = Vdd16{3-0}; +} +class Enc_403871 : OpcodeHexagon { + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_2bae10 : OpcodeHexagon { bits <4> Ii; let Inst{10-8} = Ii{3-1}; @@ -2859,6 +3586,22 @@ class Enc_2bae10 : OpcodeHexagon { bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } +class Enc_f3adb6 : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_aac08c : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; +} class Enc_c4dc92 : OpcodeHexagon { bits <2> Qv4; let Inst{23-22} = Qv4{1-0}; @@ -3000,6 +3743,13 @@ class Enc_134437 : OpcodeHexagon { bits <2> Qd4; let Inst{1-0} = Qd4{1-0}; } +class Enc_33f8ba : OpcodeHexagon { + bits <8> Ii; + let Inst{12-8} = Ii{7-3}; + let Inst{4-2} = Ii{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_97d666 : OpcodeHexagon { bits <4> Rs16; let Inst{7-4} = Rs16{3-0}; @@ -3016,6 +3766,16 @@ class Enc_f82eaf : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_57e245 : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; + bits <5> Vy32; + let Inst{12-8} = Vy32{4-0}; +} class Enc_69d63b : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -3082,6 +3842,24 @@ class Enc_7eaeb6 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_274a4c : OpcodeHexagon { + bits <5> Vu32; + let Inst{20-16} = Vu32{4-0}; + bits <3> Rt8; + let Inst{2-0} = Rt8{2-0}; + bits <5> Vx32; + let Inst{7-3} = Vx32{4-0}; + bits <5> Vy32; + let Inst{12-8} = Vy32{4-0}; +} +class Enc_aceeef : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_f55a0c : OpcodeHexagon { bits <6> Ii; let Inst{11-8} = Ii{5-2}; @@ -3120,6 +3898,16 @@ class Enc_7b523d : OpcodeHexagon { bits <5> Vxx32; let Inst{4-0} = Vxx32{4-0}; } +class Enc_c39a8b : OpcodeHexagon { + bits <16> Ii; + let Inst{21-21} = Ii{15-15}; + let Inst{13-8} = Ii{14-9}; + let Inst{2-0} = Ii{8-6}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vss32; + let Inst{7-3} = Vss32{4-0}; +} class Enc_47ef61 : OpcodeHexagon { bits <3> Ii; let Inst{7-5} = Ii{2-0}; @@ -3229,6 +4017,16 @@ class Enc_eca7c8 : OpcodeHexagon { bits <5> Rt32; let Inst{4-0} = Rt32{4-0}; } +class Enc_598f6c : OpcodeHexagon { + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; +} +class Enc_41dcc3 : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{7-3} = Vdd32{4-0}; +} class Enc_4b39e4 : OpcodeHexagon { bits <3> Ii; let Inst{7-5} = Ii{2-0}; diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td index e42229fd57a5..6e16762ac0eb 100644 --- a/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -1,4 +1,4 @@ -//===--- HexagonDepInstrInfo.td -------------------------------------------===// +//===- HexagonDepInstrInfo.td ---------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,12 +6,15 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + def A2_abs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = abs($Rs32)", -tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { +tc_c2f7d806, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -22,7 +25,7 @@ def A2_absp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = abs($Rss32)", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000100; let prefersSlot3 = 1; @@ -31,7 +34,7 @@ def A2_abssat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = abs($Rs32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { +tc_c2f7d806, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -43,7 +46,7 @@ def A2_add : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011000; @@ -59,7 +62,7 @@ def A2_addh_h16_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.h):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -71,7 +74,7 @@ def A2_addh_h16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.l):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -83,7 +86,7 @@ def A2_addh_h16_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -95,7 +98,7 @@ def A2_addh_h16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -107,7 +110,7 @@ def A2_addh_h16_sat_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -120,7 +123,7 @@ def A2_addh_h16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -133,7 +136,7 @@ def A2_addh_h16_sat_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -146,7 +149,7 @@ def A2_addh_h16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; @@ -159,7 +162,7 @@ def A2_addh_l16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h)", -tc_7ca2ea10, TypeALU64>, Enc_bd6011 { +tc_1b9c9ee5, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; @@ -171,7 +174,7 @@ def A2_addh_l16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l)", -tc_7ca2ea10, TypeALU64>, Enc_bd6011 { +tc_1b9c9ee5, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; @@ -183,7 +186,7 @@ def A2_addh_l16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):sat", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; @@ -196,7 +199,7 @@ def A2_addh_l16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):sat", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; @@ -209,7 +212,7 @@ def A2_addi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,#$Ii)", -tc_548f402d, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel { let Inst{31-28} = 0b1011; let hasNewValue = 1; let opNewValue = 0; @@ -228,7 +231,7 @@ def A2_addp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -239,7 +242,7 @@ def A2_addpsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):sat", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; @@ -251,7 +254,7 @@ def A2_addsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { +tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110010; @@ -266,14 +269,14 @@ def A2_addsp : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rs32,$Rtt32)", -tc_bd16579e, TypeALU64> { +tc_897d1a9d, TypeALU64> { let isPseudo = 1; } def A2_addsph : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):raw:hi", -tc_bd16579e, TypeALU64>, Enc_a56825 { +tc_897d1a9d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; @@ -283,7 +286,7 @@ def A2_addspl : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):raw:lo", -tc_bd16579e, TypeALU64>, Enc_a56825 { +tc_897d1a9d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; @@ -293,7 +296,7 @@ def A2_and : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = and($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001000; @@ -309,7 +312,7 @@ def A2_andir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = and($Rs32,#$Ii)", -tc_548f402d, TypeALU32_2op>, Enc_140c83, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel { let Inst{31-22} = 0b0111011000; let hasNewValue = 1; let opNewValue = 0; @@ -325,7 +328,7 @@ def A2_andp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = and($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -335,7 +338,7 @@ def A2_aslh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = aslh($Rs32)", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000000; let hasNewValue = 1; @@ -347,7 +350,7 @@ def A2_asrh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = asrh($Rs32)", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000001; let hasNewValue = 1; @@ -359,7 +362,7 @@ def A2_combine_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.h,$Rs32.h)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011100; @@ -371,7 +374,7 @@ def A2_combine_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.h,$Rs32.l)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011101; @@ -383,7 +386,7 @@ def A2_combine_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.l,$Rs32.h)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011110; @@ -395,7 +398,7 @@ def A2_combine_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.l,$Rs32.l)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011111; @@ -407,7 +410,7 @@ def A2_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, s8_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_548f402d, TypeALU32_2op>, Enc_18c338 { +tc_b9488031, TypeALU32_2op>, Enc_18c338 { let Inst{31-23} = 0b011111000; let isReMaterializable = 1; let isAsCheapAsAMove = 1; @@ -422,7 +425,7 @@ def A2_combinew : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = combine($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_be32a5, PredNewRel { +tc_b9488031, TypeALU32_3op>, Enc_be32a5, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110101000; @@ -434,7 +437,7 @@ def A2_max : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = max($Rs32,$Rt32)", -tc_47ab9233, TypeALU64>, Enc_5ab2be { +tc_b44c6e2a, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101110; @@ -446,7 +449,7 @@ def A2_maxp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = max($Rss32,$Rtt32)", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -456,7 +459,7 @@ def A2_maxu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = maxu($Rs32,$Rt32)", -tc_47ab9233, TypeALU64>, Enc_5ab2be { +tc_b44c6e2a, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101110; @@ -468,7 +471,7 @@ def A2_maxup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = maxu($Rss32,$Rtt32)", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -478,7 +481,7 @@ def A2_min : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = min($Rt32,$Rs32)", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101101; @@ -490,7 +493,7 @@ def A2_minp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = min($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -500,7 +503,7 @@ def A2_minu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = minu($Rt32,$Rs32)", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101101; @@ -512,7 +515,7 @@ def A2_minup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = minu($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -522,7 +525,7 @@ def A2_neg : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = neg($Rs32)", -tc_f16d5b17, TypeALU32_2op> { +tc_68cb12ce, TypeALU32_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -532,7 +535,7 @@ def A2_negp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = neg($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000100; } @@ -540,7 +543,7 @@ def A2_negsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = neg($Rs32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { +tc_c2f7d806, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -552,7 +555,7 @@ def A2_nop : HInst< (outs), (ins), "nop", -tc_e2c31426, TypeALU32_2op>, Enc_e3b0c4 { +tc_6efc556e, TypeALU32_2op>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0111111100000000; } @@ -560,7 +563,7 @@ def A2_not : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = not($Rs32)", -tc_f16d5b17, TypeALU32_2op> { +tc_68cb12ce, TypeALU32_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -570,7 +573,7 @@ def A2_notp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = not($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000100; } @@ -578,7 +581,7 @@ def A2_or : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = or($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001001; @@ -594,7 +597,7 @@ def A2_orir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = or($Rs32,#$Ii)", -tc_548f402d, TypeALU32_2op>, Enc_140c83, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_140c83, ImmRegRel { let Inst{31-22} = 0b0111011010; let hasNewValue = 1; let opNewValue = 0; @@ -610,7 +613,7 @@ def A2_orp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = or($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -620,7 +623,7 @@ def A2_paddf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = add($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; @@ -636,7 +639,7 @@ def A2_paddfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011000; @@ -653,7 +656,7 @@ def A2_paddif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = add($Rs32,#$Ii)", -tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011101001; let isPredicated = 1; @@ -673,7 +676,7 @@ def A2_paddifnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)", -tc_28d296df, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-23} = 0b011101001; let isPredicated = 1; @@ -694,7 +697,7 @@ def A2_paddit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = add($Rs32,#$Ii)", -tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011101000; let isPredicated = 1; @@ -713,7 +716,7 @@ def A2_padditnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)", -tc_28d296df, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-23} = 0b011101000; let isPredicated = 1; @@ -733,7 +736,7 @@ def A2_paddt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = add($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; @@ -748,7 +751,7 @@ def A2_paddtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011000; @@ -764,7 +767,7 @@ def A2_pandf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = and($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001000; @@ -778,7 +781,7 @@ def A2_pandfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001000; @@ -793,7 +796,7 @@ def A2_pandt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = and($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001000; @@ -806,7 +809,7 @@ def A2_pandtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001000; @@ -820,7 +823,7 @@ def A2_porf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = or($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001001; @@ -834,7 +837,7 @@ def A2_porfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001001; @@ -849,7 +852,7 @@ def A2_port : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = or($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001001; @@ -862,7 +865,7 @@ def A2_portnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001001; @@ -876,7 +879,7 @@ def A2_psubf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)", -tc_1b6011fb, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011001; @@ -890,7 +893,7 @@ def A2_psubfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)", -tc_28d296df, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011001; @@ -905,7 +908,7 @@ def A2_psubt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sub($Rt32,$Rs32)", -tc_1b6011fb, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011001; @@ -918,7 +921,7 @@ def A2_psubtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)", -tc_28d296df, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011001; @@ -932,7 +935,7 @@ def A2_pxorf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001011; @@ -946,7 +949,7 @@ def A2_pxorfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001011; @@ -961,7 +964,7 @@ def A2_pxort : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = xor($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001011; @@ -974,7 +977,7 @@ def A2_pxortnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001011; @@ -988,7 +991,7 @@ def A2_roundsat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = round($Rss32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; @@ -1000,7 +1003,7 @@ def A2_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = sat($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; @@ -1011,7 +1014,7 @@ def A2_satb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satb($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1022,7 +1025,7 @@ def A2_sath : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sath($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1033,7 +1036,7 @@ def A2_satub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satub($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1044,7 +1047,7 @@ def A2_satuh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satuh($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1055,7 +1058,7 @@ def A2_sub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011001; @@ -1070,7 +1073,7 @@ def A2_subh_h16_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.h):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1082,7 +1085,7 @@ def A2_subh_h16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.l):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1094,7 +1097,7 @@ def A2_subh_h16_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1106,7 +1109,7 @@ def A2_subh_h16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):<<16", -tc_bd16579e, TypeALU64>, Enc_bd6011 { +tc_897d1a9d, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1118,7 +1121,7 @@ def A2_subh_h16_sat_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1131,7 +1134,7 @@ def A2_subh_h16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1144,7 +1147,7 @@ def A2_subh_h16_sat_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1157,7 +1160,7 @@ def A2_subh_h16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; @@ -1170,7 +1173,7 @@ def A2_subh_l16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h)", -tc_7ca2ea10, TypeALU64>, Enc_bd6011 { +tc_1b9c9ee5, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; @@ -1182,7 +1185,7 @@ def A2_subh_l16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l)", -tc_7ca2ea10, TypeALU64>, Enc_bd6011 { +tc_1b9c9ee5, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; @@ -1194,7 +1197,7 @@ def A2_subh_l16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):sat", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; @@ -1207,7 +1210,7 @@ def A2_subh_l16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):sat", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; @@ -1220,7 +1223,7 @@ def A2_subp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = sub($Rtt32,$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -1229,7 +1232,7 @@ def A2_subri : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rd32 = sub(#$Ii,$Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel { let Inst{31-22} = 0b0111011001; let hasNewValue = 1; let opNewValue = 0; @@ -1245,7 +1248,7 @@ def A2_subsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { +tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110110; @@ -1259,7 +1262,7 @@ def A2_svaddh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vaddh($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110000; @@ -1272,7 +1275,7 @@ def A2_svaddhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vaddh($Rs32,$Rt32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { +tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110001; @@ -1287,7 +1290,7 @@ def A2_svadduhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vadduh($Rs32,$Rt32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { +tc_5ba5997d, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110011; @@ -1302,13 +1305,12 @@ def A2_svavgh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vavgh($Rs32,$Rt32)", -tc_511f28f6, TypeALU32_3op>, Enc_5ab2be { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111000; let hasNewValue = 1; let opNewValue = 0; -let prefersSlot3 = 1; let InputType = "reg"; let isCommutable = 1; } @@ -1316,13 +1318,12 @@ def A2_svavghs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vavgh($Rs32,$Rt32):rnd", -tc_76c4c5ef, TypeALU32_3op>, Enc_5ab2be { +tc_8fe6b782, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111001; let hasNewValue = 1; let opNewValue = 0; -let prefersSlot3 = 1; let InputType = "reg"; let isCommutable = 1; } @@ -1330,20 +1331,19 @@ def A2_svnavgh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vnavgh($Rt32,$Rs32)", -tc_511f28f6, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111011; let hasNewValue = 1; let opNewValue = 0; -let prefersSlot3 = 1; let InputType = "reg"; } def A2_svsubh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubh($Rt32,$Rs32)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110100; @@ -1355,7 +1355,7 @@ def A2_svsubhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubh($Rt32,$Rs32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { +tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110101; @@ -1369,7 +1369,7 @@ def A2_svsubuhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubuh($Rt32,$Rs32):sat", -tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { +tc_5ba5997d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110111; @@ -1383,7 +1383,7 @@ def A2_swiz : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = swiz($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -1393,7 +1393,7 @@ def A2_sxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxtb($Rs32)", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000101; let hasNewValue = 1; @@ -1405,7 +1405,7 @@ def A2_sxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxth($Rs32)", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000111; let hasNewValue = 1; @@ -1417,7 +1417,7 @@ def A2_sxtw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = sxtw($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100010; } @@ -1425,7 +1425,7 @@ def A2_tfr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = $Rs32", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000011; let hasNewValue = 1; @@ -1438,7 +1438,7 @@ def A2_tfrcrr : HInst< (outs IntRegs:$Rd32), (ins CtrRegs:$Cs32), "$Rd32 = $Cs32", -tc_3b4892c6, TypeCR>, Enc_0cb018 { +tc_29175780, TypeCR>, Enc_0cb018 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01101010000; let hasNewValue = 1; @@ -1448,7 +1448,7 @@ def A2_tfrf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = $Rs32", -tc_1b6011fb, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -1463,7 +1463,7 @@ def A2_tfrfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = $Rs32", -tc_28d296df, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -1479,7 +1479,7 @@ def A2_tfrih : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u16_0Imm:$Ii), "$Rx32.h = #$Ii", -tc_548f402d, TypeALU32_2op>, Enc_51436c { +tc_b9488031, TypeALU32_2op>, Enc_51436c { let Inst{21-21} = 0b1; let Inst{31-24} = 0b01110010; let hasNewValue = 1; @@ -1490,7 +1490,7 @@ def A2_tfril : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u16_0Imm:$Ii), "$Rx32.l = #$Ii", -tc_548f402d, TypeALU32_2op>, Enc_51436c { +tc_b9488031, TypeALU32_2op>, Enc_51436c { let Inst{21-21} = 0b1; let Inst{31-24} = 0b01110001; let hasNewValue = 1; @@ -1501,7 +1501,7 @@ def A2_tfrp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = $Rss32", -tc_548f402d, TypeALU32_2op>, PredNewRel { +tc_b9488031, TypeALU32_2op>, PredNewRel { let BaseOpcode = "A2_tfrp"; let isPredicable = 1; let isPseudo = 1; @@ -1510,7 +1510,7 @@ def A2_tfrpf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if (!$Pu4) $Rdd32 = $Rss32", -tc_548f402d, TypeALU32_2op>, PredNewRel { +tc_b9488031, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedFalse = 1; let BaseOpcode = "A2_tfrp"; @@ -1520,7 +1520,7 @@ def A2_tfrpfnew : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if (!$Pu4.new) $Rdd32 = $Rss32", -tc_b08be45e, TypeALU32_2op>, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedFalse = 1; let isPredicatedNew = 1; @@ -1531,7 +1531,7 @@ def A2_tfrpi : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii), "$Rdd32 = #$Ii", -tc_548f402d, TypeALU64> { +tc_b9488031, TypeALU64> { let isReMaterializable = 1; let isAsCheapAsAMove = 1; let isMoveImm = 1; @@ -1541,7 +1541,7 @@ def A2_tfrpt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if ($Pu4) $Rdd32 = $Rss32", -tc_548f402d, TypeALU32_2op>, PredNewRel { +tc_b9488031, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let BaseOpcode = "A2_tfrp"; let isPseudo = 1; @@ -1550,7 +1550,7 @@ def A2_tfrptnew : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if ($Pu4.new) $Rdd32 = $Rss32", -tc_b08be45e, TypeALU32_2op>, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedNew = 1; let BaseOpcode = "A2_tfrp"; @@ -1560,7 +1560,7 @@ def A2_tfrrcr : HInst< (outs CtrRegs:$Cd32), (ins IntRegs:$Rs32), "$Cd32 = $Rs32", -tc_82f0f122, TypeCR>, Enc_bd811a { +tc_a21dc435, TypeCR>, Enc_bd811a { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01100010001; let hasNewValue = 1; @@ -1570,7 +1570,7 @@ def A2_tfrsi : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii), "$Rd32 = #$Ii", -tc_f16d5b17, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel { let Inst{21-21} = 0b0; let Inst{31-24} = 0b01111000; let hasNewValue = 1; @@ -1592,7 +1592,7 @@ def A2_tfrt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = $Rs32", -tc_1b6011fb, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_d6bf0472, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -1606,7 +1606,7 @@ def A2_tfrtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = $Rs32", -tc_28d296df, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_2b2f4060, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -1621,7 +1621,7 @@ def A2_vabsh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsh($Rss32)", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000010; let prefersSlot3 = 1; @@ -1630,7 +1630,7 @@ def A2_vabshsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsh($Rss32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000010; let prefersSlot3 = 1; @@ -1640,7 +1640,7 @@ def A2_vabsw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsw($Rss32)", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000010; let prefersSlot3 = 1; @@ -1649,7 +1649,7 @@ def A2_vabswsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsw($Rss32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000010; let prefersSlot3 = 1; @@ -1659,7 +1659,7 @@ def A2_vaddb_map : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddb($Rss32,$Rtt32)", -tc_9c18c9a5, TypeMAPPING> { +tc_540fdfbc, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -1667,7 +1667,7 @@ def A2_vaddh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddh($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1676,7 +1676,7 @@ def A2_vaddhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddh($Rss32,$Rtt32):sat", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1687,7 +1687,7 @@ def A2_vaddub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddub($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1696,7 +1696,7 @@ def A2_vaddubs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddub($Rss32,$Rtt32):sat", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1707,7 +1707,7 @@ def A2_vadduhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vadduh($Rss32,$Rtt32):sat", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1718,7 +1718,7 @@ def A2_vaddw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddw($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1727,7 +1727,7 @@ def A2_vaddws : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddw($Rss32,$Rtt32):sat", -tc_47ab9233, TypeALU64>, Enc_a56825 { +tc_b44c6e2a, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1738,17 +1738,16 @@ def A2_vavgh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32)", -tc_cd321066, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavghcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32):crnd", -tc_63cd9d2d, TypeALU64>, Enc_a56825 { +tc_2b6f77c6, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; @@ -1758,87 +1757,79 @@ def A2_vavghr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32):rnd", -tc_37326008, TypeALU64>, Enc_a56825 { +tc_dbdffe3d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavgub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgub($Rss32,$Rtt32)", -tc_cd321066, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavgubr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgub($Rss32,$Rtt32):rnd", -tc_37326008, TypeALU64>, Enc_a56825 { +tc_dbdffe3d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavguh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguh($Rss32,$Rtt32)", -tc_cd321066, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavguhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguh($Rss32,$Rtt32):rnd", -tc_37326008, TypeALU64>, Enc_a56825 { +tc_dbdffe3d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; -let prefersSlot3 = 1; } def A2_vavguw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguw($Rss32,$Rtt32)", -tc_cd321066, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; -let prefersSlot3 = 1; } def A2_vavguwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguw($Rss32,$Rtt32):rnd", -tc_37326008, TypeALU64>, Enc_a56825 { +tc_dbdffe3d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; -let prefersSlot3 = 1; } def A2_vavgw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32)", -tc_cd321066, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; -let prefersSlot3 = 1; } def A2_vavgwcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32):crnd", -tc_63cd9d2d, TypeALU64>, Enc_a56825 { +tc_2b6f77c6, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; @@ -1848,17 +1839,16 @@ def A2_vavgwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32):rnd", -tc_37326008, TypeALU64>, Enc_a56825 { +tc_dbdffe3d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; -let prefersSlot3 = 1; } def A2_vcmpbeq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.eq($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b110000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1867,7 +1857,7 @@ def A2_vcmpbgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.gtu($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b111000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1876,7 +1866,7 @@ def A2_vcmpheq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.eq($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1885,7 +1875,7 @@ def A2_vcmphgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.gt($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1894,7 +1884,7 @@ def A2_vcmphgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.gtu($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1903,7 +1893,7 @@ def A2_vcmpweq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.eq($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1912,7 +1902,7 @@ def A2_vcmpwgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.gt($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1921,7 +1911,7 @@ def A2_vcmpwgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.gtu($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1930,7 +1920,7 @@ def A2_vconj : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vconj($Rss32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { +tc_c2f7d806, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000100; let prefersSlot3 = 1; @@ -1940,7 +1930,7 @@ def A2_vmaxb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxb($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -1950,7 +1940,7 @@ def A2_vmaxh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxh($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -1960,7 +1950,7 @@ def A2_vmaxub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxub($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -1970,7 +1960,7 @@ def A2_vmaxuh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxuh($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -1980,7 +1970,7 @@ def A2_vmaxuw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxuw($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -1990,7 +1980,7 @@ def A2_vmaxw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxw($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -2000,7 +1990,7 @@ def A2_vminb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminb($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; @@ -2010,7 +2000,7 @@ def A2_vminh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminh($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -2020,7 +2010,7 @@ def A2_vminub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminub($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -2030,7 +2020,7 @@ def A2_vminuh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminuh($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -2040,7 +2030,7 @@ def A2_vminuw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminuw($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -2050,7 +2040,7 @@ def A2_vminw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminw($Rtt32,$Rss32)", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; @@ -2060,17 +2050,16 @@ def A2_vnavgh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32)", -tc_cd321066, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; -let prefersSlot3 = 1; } def A2_vnavghcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat", -tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { +tc_2b6f77c6, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2081,7 +2070,7 @@ def A2_vnavghr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat", -tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { +tc_2b6f77c6, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2092,17 +2081,16 @@ def A2_vnavgw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32)", -tc_cd321066, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; -let prefersSlot3 = 1; } def A2_vnavgwcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat", -tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { +tc_2b6f77c6, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2113,7 +2101,7 @@ def A2_vnavgwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat", -tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { +tc_2b6f77c6, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2124,7 +2112,7 @@ def A2_vraddub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vraddub($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -2134,7 +2122,7 @@ def A2_vraddub_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vraddub($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -2145,7 +2133,7 @@ def A2_vrsadub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrsadub($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -2155,7 +2143,7 @@ def A2_vrsadub_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrsadub($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -2166,7 +2154,7 @@ def A2_vsubb_map : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vsubb($Rss32,$Rtt32)", -tc_9c18c9a5, TypeMAPPING> { +tc_540fdfbc, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2174,7 +2162,7 @@ def A2_vsubh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubh($Rtt32,$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2183,7 +2171,7 @@ def A2_vsubhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubh($Rtt32,$Rss32):sat", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2194,7 +2182,7 @@ def A2_vsubub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubub($Rtt32,$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2203,7 +2191,7 @@ def A2_vsububs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubub($Rtt32,$Rss32):sat", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2214,7 +2202,7 @@ def A2_vsubuhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubuh($Rtt32,$Rss32):sat", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2225,7 +2213,7 @@ def A2_vsubw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubw($Rtt32,$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2234,7 +2222,7 @@ def A2_vsubws : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubw($Rtt32,$Rss32):sat", -tc_47ab9233, TypeALU64>, Enc_ea23e4 { +tc_b44c6e2a, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2245,7 +2233,7 @@ def A2_xor : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = xor($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001011; @@ -2260,7 +2248,7 @@ def A2_xorp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = xor($Rss32,$Rtt32)", -tc_9c18c9a5, TypeALU64>, Enc_a56825 { +tc_540fdfbc, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2270,7 +2258,7 @@ def A2_zxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxtb($Rs32)", -tc_548f402d, TypeALU32_2op>, PredNewRel { +tc_b9488031, TypeALU32_2op>, PredNewRel { let hasNewValue = 1; let opNewValue = 0; let BaseOpcode = "A2_zxtb"; @@ -2282,7 +2270,7 @@ def A2_zxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxth($Rs32)", -tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { +tc_68cb12ce, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000110; let hasNewValue = 1; @@ -2294,7 +2282,7 @@ def A4_addp_c : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Px4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in), "$Rdd32 = add($Rss32,$Rtt32,$Px4):carry", -tc_a87879e8, TypeS_3op>, Enc_2b3f60 { +tc_523fcf30, TypeS_3op>, Enc_2b3f60 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010110; @@ -2305,7 +2293,7 @@ def A4_andn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = and($Rt32,~$Rs32)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001100; @@ -2317,7 +2305,7 @@ def A4_andnp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = and($Rtt32,~$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2326,7 +2314,7 @@ def A4_bitsplit : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = bitsplit($Rs32,$Rt32)", -tc_7ca2ea10, TypeALU64>, Enc_be32a5 { +tc_1b9c9ee5, TypeALU64>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010100001; @@ -2336,7 +2324,7 @@ def A4_bitspliti : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rdd32 = bitsplit($Rs32,#$Ii)", -tc_7ca2ea10, TypeS_2op>, Enc_311abd { +tc_1b9c9ee5, TypeS_2op>, Enc_311abd { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001000110; @@ -2346,14 +2334,14 @@ def A4_boundscheck : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rs32,$Rtt32)", -tc_c58f771a, TypeALU64> { +tc_1e856f58, TypeALU64> { let isPseudo = 1; } def A4_boundscheck_hi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -2362,7 +2350,7 @@ def A4_boundscheck_lo : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -2371,7 +2359,7 @@ def A4_cmpbeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.eq($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b110000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2384,7 +2372,7 @@ def A4_cmpbeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Pd4 = cmpb.eq($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101000; @@ -2397,7 +2385,7 @@ def A4_cmpbgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.gt($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2409,7 +2397,7 @@ def A4_cmpbgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s8_0Imm:$Ii), "$Pd4 = cmpb.gt($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101001; @@ -2421,7 +2409,7 @@ def A4_cmpbgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.gtu($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b111000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2433,7 +2421,7 @@ def A4_cmpbgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmpb.gtu($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_02553a, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011101010; @@ -2450,7 +2438,7 @@ def A4_cmpheq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.eq($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2463,7 +2451,7 @@ def A4_cmpheqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmph.eq($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101000; @@ -2481,7 +2469,7 @@ def A4_cmphgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.gt($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2493,7 +2481,7 @@ def A4_cmphgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmph.gt($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101001; @@ -2510,7 +2498,7 @@ def A4_cmphgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.gtu($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2522,7 +2510,7 @@ def A4_cmphgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmph.gtu($Rs32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_02553a, ImmRegRel { +tc_7a830544, TypeALU64>, Enc_02553a, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011101010; @@ -2539,7 +2527,7 @@ def A4_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_548f402d, TypeALU32_2op>, Enc_f0cca7 { +tc_b9488031, TypeALU32_2op>, Enc_f0cca7 { let Inst{31-21} = 0b01111100100; let isExtendable = 1; let opExtendable = 2; @@ -2551,7 +2539,7 @@ def A4_combineir : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rdd32 = combine(#$Ii,$Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_9cdba7 { +tc_b9488031, TypeALU32_2op>, Enc_9cdba7 { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011001; let isExtendable = 1; @@ -2564,7 +2552,7 @@ def A4_combineri : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rdd32 = combine($Rs32,#$Ii)", -tc_548f402d, TypeALU32_2op>, Enc_9cdba7 { +tc_b9488031, TypeALU32_2op>, Enc_9cdba7 { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011000; let isExtendable = 1; @@ -2577,7 +2565,7 @@ def A4_cround_ri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = cround($Rs32,#$Ii)", -tc_63cd9d2d, TypeS_2op>, Enc_a05677 { +tc_2b6f77c6, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -2589,7 +2577,7 @@ def A4_cround_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cround($Rs32,$Rt32)", -tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { +tc_2b6f77c6, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -2601,14 +2589,14 @@ def A4_ext : HInst< (outs), (ins u26_6Imm:$Ii), "immext(#$Ii)", -tc_9a13af9d, TypeEXTENDER>, Enc_2b518f { +tc_452f85af, TypeEXTENDER>, Enc_2b518f { let Inst{31-28} = 0b0000; } def A4_modwrapu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = modwrap($Rs32,$Rt32)", -tc_47ab9233, TypeALU64>, Enc_5ab2be { +tc_b44c6e2a, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2620,7 +2608,7 @@ def A4_orn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = or($Rt32,~$Rs32)", -tc_548f402d, TypeALU32_3op>, Enc_bd6011 { +tc_b9488031, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001101; @@ -2632,7 +2620,7 @@ def A4_ornp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = or($Rtt32,~$Rss32)", -tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { +tc_540fdfbc, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2641,7 +2629,7 @@ def A4_paslhf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = aslh($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000000; @@ -2655,7 +2643,7 @@ def A4_paslhfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = aslh($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000000; @@ -2670,7 +2658,7 @@ def A4_paslht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = aslh($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000000; @@ -2683,7 +2671,7 @@ def A4_paslhtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = aslh($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000000; @@ -2697,7 +2685,7 @@ def A4_pasrhf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = asrh($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000001; @@ -2711,7 +2699,7 @@ def A4_pasrhfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = asrh($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000001; @@ -2726,7 +2714,7 @@ def A4_pasrht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = asrh($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000001; @@ -2739,7 +2727,7 @@ def A4_pasrhtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = asrh($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000001; @@ -2753,7 +2741,7 @@ def A4_psxtbf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sxtb($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000101; @@ -2767,7 +2755,7 @@ def A4_psxtbfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sxtb($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000101; @@ -2782,7 +2770,7 @@ def A4_psxtbt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sxtb($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000101; @@ -2795,7 +2783,7 @@ def A4_psxtbtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sxtb($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000101; @@ -2809,7 +2797,7 @@ def A4_psxthf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sxth($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000111; @@ -2823,7 +2811,7 @@ def A4_psxthfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sxth($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000111; @@ -2838,7 +2826,7 @@ def A4_psxtht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sxth($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000111; @@ -2851,7 +2839,7 @@ def A4_psxthtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sxth($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000111; @@ -2865,7 +2853,7 @@ def A4_pzxtbf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = zxtb($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000100; @@ -2879,7 +2867,7 @@ def A4_pzxtbfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = zxtb($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000100; @@ -2894,7 +2882,7 @@ def A4_pzxtbt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = zxtb($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000100; @@ -2907,7 +2895,7 @@ def A4_pzxtbtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = zxtb($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000100; @@ -2921,7 +2909,7 @@ def A4_pzxthf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = zxth($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000110; @@ -2935,7 +2923,7 @@ def A4_pzxthfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = zxth($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000110; @@ -2950,7 +2938,7 @@ def A4_pzxtht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = zxth($Rs32)", -tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_b9488031, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000110; @@ -2963,7 +2951,7 @@ def A4_pzxthtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = zxth($Rs32)", -tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { +tc_5f6847a1, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000110; @@ -2977,7 +2965,7 @@ def A4_rcmpeq : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmp.eq($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011010; @@ -2991,7 +2979,7 @@ def A4_rcmpeqi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = cmp.eq($Rs32,#$Ii)", -tc_548f402d, TypeALU32_2op>, Enc_b8c967, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011010; let hasNewValue = 1; @@ -3008,7 +2996,7 @@ def A4_rcmpneq : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = !cmp.eq($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { +tc_b9488031, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011011; @@ -3022,7 +3010,7 @@ def A4_rcmpneqi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = !cmp.eq($Rs32,#$Ii)", -tc_548f402d, TypeALU32_2op>, Enc_b8c967, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_b8c967, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011011; let hasNewValue = 1; @@ -3039,7 +3027,7 @@ def A4_round_ri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = round($Rs32,#$Ii)", -tc_63cd9d2d, TypeS_2op>, Enc_a05677 { +tc_2b6f77c6, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -3051,7 +3039,7 @@ def A4_round_ri_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = round($Rs32,#$Ii):sat", -tc_63cd9d2d, TypeS_2op>, Enc_a05677 { +tc_2b6f77c6, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -3064,7 +3052,7 @@ def A4_round_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = round($Rs32,$Rt32)", -tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { +tc_2b6f77c6, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -3076,7 +3064,7 @@ def A4_round_rr_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = round($Rs32,$Rt32):sat", -tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { +tc_2b6f77c6, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -3089,7 +3077,7 @@ def A4_subp_c : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Px4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in), "$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry", -tc_a87879e8, TypeS_3op>, Enc_2b3f60 { +tc_523fcf30, TypeS_3op>, Enc_2b3f60 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010111; @@ -3100,7 +3088,7 @@ def A4_tfrcpp : HInst< (outs DoubleRegs:$Rdd32), (ins CtrRegs64:$Css32), "$Rdd32 = $Css32", -tc_3b4892c6, TypeCR>, Enc_667b39 { +tc_29175780, TypeCR>, Enc_667b39 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01101000000; } @@ -3108,7 +3096,7 @@ def A4_tfrpcp : HInst< (outs CtrRegs64:$Cdd32), (ins DoubleRegs:$Rss32), "$Cdd32 = $Rss32", -tc_82f0f122, TypeCR>, Enc_0ed752 { +tc_a21dc435, TypeCR>, Enc_0ed752 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01100011001; } @@ -3116,7 +3104,7 @@ def A4_tlbmatch : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Pd4 = tlbmatch($Rss32,$Rt32)", -tc_e2c08bb4, TypeALU64>, Enc_03833b { +tc_04c9decc, TypeALU64>, Enc_03833b { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3126,7 +3114,7 @@ def A4_vcmpbeq_any : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3135,7 +3123,7 @@ def A4_vcmpbeqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u8_0Imm:$Ii), "$Pd4 = vcmpb.eq($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3144,7 +3132,7 @@ def A4_vcmpbgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.gt($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3153,7 +3141,7 @@ def A4_vcmpbgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpb.gt($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3162,7 +3150,7 @@ def A4_vcmpbgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmpb.gtu($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_3680c2 { +tc_7a830544, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3171,7 +3159,7 @@ def A4_vcmpheqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmph.eq($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3180,7 +3168,7 @@ def A4_vcmphgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmph.gt($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3189,7 +3177,7 @@ def A4_vcmphgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmph.gtu($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_3680c2 { +tc_7a830544, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3198,7 +3186,7 @@ def A4_vcmpweqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpw.eq($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3207,7 +3195,7 @@ def A4_vcmpwgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpw.gt($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_0d8adb { +tc_7a830544, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3216,7 +3204,7 @@ def A4_vcmpwgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmpw.gtu($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_3680c2 { +tc_7a830544, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b100; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3225,7 +3213,7 @@ def A4_vrmaxh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxh($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3236,7 +3224,7 @@ def A4_vrmaxuh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxuh($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3247,7 +3235,7 @@ def A4_vrmaxuw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxuw($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3258,7 +3246,7 @@ def A4_vrmaxw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxw($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3269,7 +3257,7 @@ def A4_vrminh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminh($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3280,7 +3268,7 @@ def A4_vrminuh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminuh($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3291,7 +3279,7 @@ def A4_vrminuw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminuw($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3302,7 +3290,7 @@ def A4_vrminw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminw($Rss32,$Ru32)", -tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { +tc_c6ce9b3f, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3313,7 +3301,7 @@ def A5_ACS : HInst< (outs DoubleRegs:$Rxx32, PredRegs:$Pe4), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)", -tc_ae0722f7, TypeM>, Enc_831a7d, Requires<[HasV55T]> { +tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55T]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -3326,7 +3314,7 @@ def A5_vaddhubs : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vaddhub($Rss32,$Rtt32):sat", -tc_63cd9d2d, TypeS_3op>, Enc_d2216a, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -3335,11 +3323,20 @@ let opNewValue = 0; let prefersSlot3 = 1; let Defs = [USR_OVF]; } +def A6_vcmpbeq_notany : HInst< +(outs PredRegs:$Pd4), +(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), +"$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))", +tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65T]> { +let Inst{7-2} = 0b001000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b11010010000; +} def A6_vminub_RdP : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Pe4), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)", -tc_583510c7, TypeM>, Enc_d2c7f1, Requires<[HasV62T]> { +tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62T]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -3350,7 +3347,7 @@ def C2_all8 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = all8($Ps4)", -tc_81a23d44, TypeCR>, Enc_65d691 { +tc_f2704b9a, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011101000; } @@ -3358,7 +3355,7 @@ def C2_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = and($Pt4,$Ps4)", -tc_d63b71d1, TypeCR>, Enc_454a26 { +tc_53bc8a6a, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011000000; @@ -3367,7 +3364,7 @@ def C2_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = and($Pt4,!$Ps4)", -tc_d63b71d1, TypeCR>, Enc_454a26 { +tc_53bc8a6a, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011011000; @@ -3376,7 +3373,7 @@ def C2_any8 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = any8($Ps4)", -tc_81a23d44, TypeCR>, Enc_65d691 { +tc_f2704b9a, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011100000; } @@ -3384,7 +3381,7 @@ def C2_bitsclr : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = bitsclr($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111100; @@ -3393,7 +3390,7 @@ def C2_bitsclri : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u6_0Imm:$Ii), "$Pd4 = bitsclr($Rs32,#$Ii)", -tc_5fa2857c, TypeS_2op>, Enc_5d6c34 { +tc_7a830544, TypeS_2op>, Enc_5d6c34 { let Inst{7-2} = 0b000000; let Inst{31-21} = 0b10000101100; } @@ -3401,7 +3398,7 @@ def C2_bitsset : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = bitsset($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111010; @@ -3410,7 +3407,7 @@ def C2_ccombinewf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111101000; @@ -3422,7 +3419,7 @@ def C2_ccombinewnewf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111101000; @@ -3435,7 +3432,7 @@ def C2_ccombinewnewt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)", -tc_28d296df, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { +tc_2b2f4060, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111101000; @@ -3447,7 +3444,7 @@ def C2_ccombinewt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { +tc_d6bf0472, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111101000; @@ -3458,7 +3455,7 @@ def C2_cmoveif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = #$Ii", -tc_548f402d, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111101; @@ -3480,7 +3477,7 @@ def C2_cmoveit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = #$Ii", -tc_548f402d, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { +tc_b9488031, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111100; @@ -3501,7 +3498,7 @@ def C2_cmovenewif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = #$Ii", -tc_b08be45e, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { +tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111101; @@ -3524,7 +3521,7 @@ def C2_cmovenewit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = #$Ii", -tc_b08be45e, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { +tc_5f6847a1, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111100; @@ -3546,7 +3543,7 @@ def C2_cmpeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.eq($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010000; @@ -3559,7 +3556,7 @@ def C2_cmpeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.eq($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-22} = 0b0111010100; let CextOpcode = "C2_cmpeq"; @@ -3575,7 +3572,7 @@ def C2_cmpeqp : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.eq($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3586,7 +3583,7 @@ def C2_cmpgei : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s8_0Imm:$Ii), "$Pd4 = cmp.ge($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op> { +tc_6ebb4a12, TypeALU32_2op> { let isCompare = 1; let isPseudo = 1; } @@ -3594,7 +3591,7 @@ def C2_cmpgeui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Pd4 = cmp.geu($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op> { +tc_6ebb4a12, TypeALU32_2op> { let isCompare = 1; let isPseudo = 1; } @@ -3602,7 +3599,7 @@ def C2_cmpgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.gt($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010010; @@ -3614,7 +3611,7 @@ def C2_cmpgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.gt($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-22} = 0b0111010101; let CextOpcode = "C2_cmpgt"; @@ -3630,7 +3627,7 @@ def C2_cmpgtp : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.gt($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3640,7 +3637,7 @@ def C2_cmpgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.gtu($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010011; @@ -3652,7 +3649,7 @@ def C2_cmpgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmp.gtu($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-21} = 0b01110101100; let CextOpcode = "C2_cmpgtu"; @@ -3668,7 +3665,7 @@ def C2_cmpgtup : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.gtu($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7 { +tc_1e856f58, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3678,7 +3675,7 @@ def C2_cmplt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.lt($Rs32,$Rt32)", -tc_9df8b0dc, TypeALU32_3op> { +tc_6ebb4a12, TypeALU32_3op> { let isCompare = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -3687,7 +3684,7 @@ def C2_cmpltu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.ltu($Rs32,$Rt32)", -tc_9df8b0dc, TypeALU32_3op> { +tc_6ebb4a12, TypeALU32_3op> { let isCompare = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -3696,7 +3693,7 @@ def C2_mask : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4), "$Rdd32 = mask($Pt4)", -tc_b86c7e8b, TypeS_2op>, Enc_78e566 { +tc_cde8b071, TypeS_2op>, Enc_78e566 { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0000; let Inst{31-16} = 0b1000011000000000; @@ -3705,7 +3702,7 @@ def C2_mux : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mux($Pu4,$Rs32,$Rt32)", -tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54 { +tc_d6bf0472, TypeALU32_3op>, Enc_ea4c54 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110100000; @@ -3717,7 +3714,7 @@ def C2_muxii : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II), "$Rd32 = mux($Pu4,#$Ii,#$II)", -tc_1b6011fb, TypeALU32_2op>, Enc_830e5d { +tc_d6bf0472, TypeALU32_2op>, Enc_830e5d { let Inst{31-25} = 0b0111101; let hasNewValue = 1; let opNewValue = 0; @@ -3731,7 +3728,7 @@ def C2_muxir : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = mux($Pu4,$Rs32,#$Ii)", -tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f { +tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011100110; let hasNewValue = 1; @@ -3747,7 +3744,7 @@ def C2_muxri : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32), "$Rd32 = mux($Pu4,#$Ii,$Rs32)", -tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f { +tc_d6bf0472, TypeALU32_2op>, Enc_e38e1f { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011100111; let hasNewValue = 1; @@ -3763,7 +3760,7 @@ def C2_not : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = not($Ps4)", -tc_81a23d44, TypeCR>, Enc_65d691 { +tc_f2704b9a, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011110000; } @@ -3771,7 +3768,7 @@ def C2_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = or($Pt4,$Ps4)", -tc_d63b71d1, TypeCR>, Enc_454a26 { +tc_53bc8a6a, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011001000; @@ -3780,7 +3777,7 @@ def C2_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = or($Pt4,!$Ps4)", -tc_d63b71d1, TypeCR>, Enc_454a26 { +tc_53bc8a6a, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011111000; @@ -3789,7 +3786,7 @@ def C2_pxfer_map : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = $Ps4", -tc_d63b71d1, TypeMAPPING> { +tc_53bc8a6a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -3797,7 +3794,7 @@ def C2_tfrpr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Ps4), "$Rd32 = $Ps4", -tc_b86c7e8b, TypeS_2op>, Enc_f5e933 { +tc_cde8b071, TypeS_2op>, Enc_f5e933 { let Inst{13-5} = 0b000000000; let Inst{31-18} = 0b10001001010000; let hasNewValue = 1; @@ -3807,7 +3804,7 @@ def C2_tfrrp : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32), "$Pd4 = $Rs32", -tc_47f0b7ad, TypeS_2op>, Enc_48b75f { +tc_351fed2d, TypeS_2op>, Enc_48b75f { let Inst{13-2} = 0b000000000000; let Inst{31-21} = 0b10000101010; } @@ -3815,7 +3812,7 @@ def C2_vitpack : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Rd32 = vitpack($Ps4,$Pt4)", -tc_7ca2ea10, TypeS_2op>, Enc_527412 { +tc_1b9c9ee5, TypeS_2op>, Enc_527412 { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b10001001000000; @@ -3827,7 +3824,7 @@ def C2_vmux : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)", -tc_d1b5a4b6, TypeALU64>, Enc_329361 { +tc_f8eeed7a, TypeALU64>, Enc_329361 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010001000; @@ -3836,7 +3833,7 @@ def C2_xor : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = xor($Ps4,$Pt4)", -tc_d63b71d1, TypeCR>, Enc_284ebb { +tc_53bc8a6a, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011010000; @@ -3845,7 +3842,7 @@ def C4_addipc : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = add(pc,#$Ii)", -tc_1fe8323c, TypeCR>, Enc_607661 { +tc_b9c4623f, TypeCR>, Enc_607661 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0110101001001001; @@ -3861,7 +3858,7 @@ def C4_and_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,and($Pt4,$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011000100; @@ -3870,7 +3867,7 @@ def C4_and_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,and($Pt4,!$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011100100; @@ -3879,7 +3876,7 @@ def C4_and_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,or($Pt4,$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011001100; @@ -3888,7 +3885,7 @@ def C4_and_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,or($Pt4,!$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011101100; @@ -3897,7 +3894,7 @@ def C4_cmplte : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.gt($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010010; @@ -3909,7 +3906,7 @@ def C4_cmpltei : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = !cmp.gt($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-22} = 0b0111010101; let CextOpcode = "C4_cmplte"; @@ -3925,7 +3922,7 @@ def C4_cmplteu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.gtu($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010011; @@ -3937,7 +3934,7 @@ def C4_cmplteui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = !cmp.gtu($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-21} = 0b01110101100; let CextOpcode = "C4_cmplteu"; @@ -3953,7 +3950,7 @@ def C4_cmpneq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.eq($Rs32,$Rt32)", -tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { +tc_c6aa82f7, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010000; @@ -3966,7 +3963,7 @@ def C4_cmpneqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = !cmp.eq($Rs32,#$Ii)", -tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { +tc_6ebb4a12, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-22} = 0b0111010100; let CextOpcode = "C4_cmpneq"; @@ -3982,7 +3979,7 @@ def C4_fastcorner9 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = fastcorner9($Ps4,$Pt4)", -tc_d63b71d1, TypeCR>, Enc_284ebb { +tc_53bc8a6a, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b100100; let Inst{13-10} = 0b1000; let Inst{31-18} = 0b01101011000000; @@ -3991,7 +3988,7 @@ def C4_fastcorner9_not : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = !fastcorner9($Ps4,$Pt4)", -tc_d63b71d1, TypeCR>, Enc_284ebb { +tc_53bc8a6a, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b100100; let Inst{13-10} = 0b1000; let Inst{31-18} = 0b01101011000100; @@ -4000,7 +3997,7 @@ def C4_nbitsclr : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !bitsclr($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111101; @@ -4009,7 +4006,7 @@ def C4_nbitsclri : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u6_0Imm:$Ii), "$Pd4 = !bitsclr($Rs32,#$Ii)", -tc_5fa2857c, TypeS_2op>, Enc_5d6c34 { +tc_7a830544, TypeS_2op>, Enc_5d6c34 { let Inst{7-2} = 0b000000; let Inst{31-21} = 0b10000101101; } @@ -4017,7 +4014,7 @@ def C4_nbitsset : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !bitsset($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111011; @@ -4026,7 +4023,7 @@ def C4_or_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,and($Pt4,$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011010100; @@ -4035,7 +4032,7 @@ def C4_or_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,and($Pt4,!$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011110100; @@ -4044,7 +4041,7 @@ def C4_or_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,or($Pt4,$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011011100; @@ -4053,7 +4050,7 @@ def C4_or_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,or($Pt4,!$Pu4))", -tc_43068634, TypeCR>, Enc_9ac432 { +tc_481e5e5c, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011111100; @@ -4062,7 +4059,7 @@ def F2_conv_d2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_d2df($Rss32)", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4072,7 +4069,7 @@ def F2_conv_d2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_d2sf($Rss32)", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; @@ -4084,7 +4081,7 @@ def F2_conv_df2d : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32)", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4094,7 +4091,7 @@ def F2_conv_df2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32):chop", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4104,7 +4101,7 @@ def F2_conv_df2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2sf($Rss32)", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -4116,7 +4113,7 @@ def F2_conv_df2ud : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32)", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4126,7 +4123,7 @@ def F2_conv_df2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32):chop", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4136,7 +4133,7 @@ def F2_conv_df2uw : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32)", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -4148,7 +4145,7 @@ def F2_conv_df2uw_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32):chop", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000101; let hasNewValue = 1; @@ -4160,7 +4157,7 @@ def F2_conv_df2w : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32)", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -4172,7 +4169,7 @@ def F2_conv_df2w_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32):chop", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; @@ -4184,7 +4181,7 @@ def F2_conv_sf2d : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32)", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4194,7 +4191,7 @@ def F2_conv_sf2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32):chop", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4204,7 +4201,7 @@ def F2_conv_sf2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2df($Rs32)", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4214,7 +4211,7 @@ def F2_conv_sf2ud : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32)", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4224,7 +4221,7 @@ def F2_conv_sf2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32):chop", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4234,7 +4231,7 @@ def F2_conv_sf2uw : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32)", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; @@ -4246,7 +4243,7 @@ def F2_conv_sf2uw_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32):chop", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; @@ -4258,7 +4255,7 @@ def F2_conv_sf2w : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32)", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; @@ -4270,7 +4267,7 @@ def F2_conv_sf2w_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32):chop", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; @@ -4282,7 +4279,7 @@ def F2_conv_ud2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_ud2df($Rss32)", -tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4292,7 +4289,7 @@ def F2_conv_ud2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_ud2sf($Rss32)", -tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000001; let hasNewValue = 1; @@ -4304,7 +4301,7 @@ def F2_conv_uw2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_uw2df($Rs32)", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4314,7 +4311,7 @@ def F2_conv_uw2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_uw2sf($Rs32)", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011001; let hasNewValue = 1; @@ -4326,7 +4323,7 @@ def F2_conv_w2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_w2df($Rs32)", -tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4336,7 +4333,7 @@ def F2_conv_w2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_w2sf($Rs32)", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011010; let hasNewValue = 1; @@ -4348,7 +4345,7 @@ def F2_dfclass : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Pd4 = dfclass($Rss32,#$Ii)", -tc_5fa2857c, TypeALU64>, Enc_1f19b5, Requires<[HasV5T]> { +tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5T]> { let Inst{4-2} = 0b100; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b11011100100; @@ -4359,7 +4356,7 @@ def F2_dfcmpeq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.eq($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4371,7 +4368,7 @@ def F2_dfcmpge : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.ge($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4383,7 +4380,7 @@ def F2_dfcmpgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.gt($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4395,7 +4392,7 @@ def F2_dfcmpuo : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.uo($Rss32,$Rtt32)", -tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4407,7 +4404,7 @@ def F2_dfimm_n : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):neg", -tc_485bb57c, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100101; let prefersSlot3 = 1; @@ -4416,7 +4413,7 @@ def F2_dfimm_p : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):pos", -tc_485bb57c, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100100; let prefersSlot3 = 1; @@ -4425,7 +4422,7 @@ def F2_sfadd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfadd($Rs32,$Rt32)", -tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; @@ -4439,7 +4436,7 @@ def F2_sfclass : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = sfclass($Rs32,#$Ii)", -tc_5fa2857c, TypeS_2op>, Enc_83ee64, Requires<[HasV5T]> { +tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101111; @@ -4450,7 +4447,7 @@ def F2_sfcmpeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.eq($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4462,7 +4459,7 @@ def F2_sfcmpge : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.ge($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4474,7 +4471,7 @@ def F2_sfcmpgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.gt($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4486,7 +4483,7 @@ def F2_sfcmpuo : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.uo($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4498,7 +4495,7 @@ def F2_sffixupd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupd($Rs32,$Rt32)", -tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; @@ -4510,7 +4507,7 @@ def F2_sffixupn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupn($Rs32,$Rt32)", -tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; @@ -4522,7 +4519,7 @@ def F2_sffixupr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sffixupr($Rs32)", -tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011101; let hasNewValue = 1; @@ -4533,7 +4530,7 @@ def F2_sffma : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32)", -tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4547,7 +4544,7 @@ def F2_sffma_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32):lib", -tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4561,7 +4558,7 @@ def F2_sffma_sc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4), "$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale", -tc_2e55aa16, TypeM>, Enc_437f33, Requires<[HasV5T]> { +tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5T]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -4575,7 +4572,7 @@ def F2_sffms : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32)", -tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4589,7 +4586,7 @@ def F2_sffms_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32):lib", -tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4603,7 +4600,7 @@ def F2_sfimm_n : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):neg", -tc_485bb57c, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011001; let hasNewValue = 1; @@ -4614,7 +4611,7 @@ def F2_sfimm_p : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):pos", -tc_485bb57c, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011000; let hasNewValue = 1; @@ -4625,7 +4622,7 @@ def F2_sfinvsqrta : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32), "$Rd32,$Pe4 = sfinvsqrta($Rs32)", -tc_f1aa2cdb, TypeS_2op>, Enc_890909, Requires<[HasV5T]> { +tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5T]> { let Inst{13-7} = 0b0000000; let Inst{31-21} = 0b10001011111; let hasNewValue = 1; @@ -4637,7 +4634,7 @@ def F2_sfmax : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmax($Rs32,$Rt32)", -tc_f1240c08, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4651,7 +4648,7 @@ def F2_sfmin : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmin($Rs32,$Rt32)", -tc_f1240c08, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4665,7 +4662,7 @@ def F2_sfmpy : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmpy($Rs32,$Rt32)", -tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011010; @@ -4679,7 +4676,7 @@ def F2_sfrecipa : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)", -tc_09c86199, TypeM>, Enc_a94f3b, Requires<[HasV5T]> { +tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5T]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011111; @@ -4692,7 +4689,7 @@ def F2_sfsub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfsub($Rs32,$Rt32)", -tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; @@ -4705,11 +4702,13 @@ def J2_call : HInst< (outs), (ins a30_2Imm:$Ii), "call $Ii", -tc_639d93ee, TypeJ>, Enc_81ac1d, PredRel { +tc_a27582fa, TypeJ>, Enc_81ac1d, PredRel { let Inst{0-0} = 0b0; let Inst{31-25} = 0b0101101; let isCall = 1; let prefersSlot3 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let BaseOpcode = "J2_call"; @@ -4725,7 +4724,7 @@ def J2_callf : HInst< (outs), (ins PredRegs:$Pu4, a30_2Imm:$Ii), "if (!$Pu4) call $Ii", -tc_0767081f, TypeJ>, Enc_daea09, PredRel { +tc_2f185f5c, TypeJ>, Enc_daea09, PredRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b1; @@ -4734,6 +4733,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isCall = 1; let prefersSlot3 = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let BaseOpcode = "J2_call"; @@ -4749,12 +4751,12 @@ def J2_callr : HInst< (outs), (ins IntRegs:$Rs32), "callr $Rs32", -tc_ecfaae86, TypeJ>, Enc_ecbcc8 { +tc_15411484, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010000101; -let cofMax1 = 1; let isCall = 1; let prefersSlot3 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; @@ -4763,15 +4765,15 @@ def J2_callrf : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) callr $Rs32", -tc_84630363, TypeJ>, Enc_88d4d9 { +tc_10b97e27, TypeJ>, Enc_88d4d9 { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010001001; let isPredicated = 1; let isPredicatedFalse = 1; -let cofMax1 = 1; let isCall = 1; let prefersSlot3 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; @@ -4781,14 +4783,14 @@ def J2_callrt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) callr $Rs32", -tc_84630363, TypeJ>, Enc_88d4d9 { +tc_10b97e27, TypeJ>, Enc_88d4d9 { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010001000; let isPredicated = 1; -let cofMax1 = 1; let isCall = 1; let prefersSlot3 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let hasSideEffects = 1; @@ -4798,7 +4800,7 @@ def J2_callt : HInst< (outs), (ins PredRegs:$Pu4, a30_2Imm:$Ii), "if ($Pu4) call $Ii", -tc_0767081f, TypeJ>, Enc_daea09, PredRel { +tc_2f185f5c, TypeJ>, Enc_daea09, PredRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b0; @@ -4806,6 +4808,9 @@ let Inst{31-24} = 0b01011101; let isPredicated = 1; let isCall = 1; let prefersSlot3 = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [R29]; let Defs = [PC, R31]; let BaseOpcode = "J2_call"; @@ -4821,7 +4826,7 @@ def J2_endloop0 : HInst< (outs), (ins), "endloop0", -tc_aad55963, TypeJ> { +tc_52d7bbea, TypeJ> { let Uses = [LC0, SA0]; let Defs = [LC0, P3, PC, USR]; let isBranch = 1; @@ -4832,7 +4837,7 @@ def J2_endloop01 : HInst< (outs), (ins), "endloop01", -tc_aad55963, TypeJ> { +tc_52d7bbea, TypeJ> { let Uses = [LC0, LC1, SA0, SA1]; let Defs = [LC0, LC1, P3, PC, USR]; let isPseudo = 1; @@ -4841,7 +4846,7 @@ def J2_endloop1 : HInst< (outs), (ins), "endloop1", -tc_aad55963, TypeJ> { +tc_52d7bbea, TypeJ> { let Uses = [LC1, SA1]; let Defs = [LC1, PC]; let isBranch = 1; @@ -4852,11 +4857,13 @@ def J2_jump : HInst< (outs), (ins b30_2Imm:$Ii), "jump $Ii", -tc_a333d2a9, TypeJ>, Enc_81ac1d, PredNewRel { +tc_3669266a, TypeJ>, Enc_81ac1d, PredNewRel { let Inst{0-0} = 0b0; let Inst{31-25} = 0b0101100; let isTerminator = 1; let isBranch = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -4872,7 +4879,7 @@ def J2_jumpf : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4) jump:nt $Ii", -tc_1b834fe7, TypeJ>, Enc_daea09, PredNewRel { +tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b1; @@ -4881,6 +4888,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -4895,7 +4905,7 @@ def J2_jumpf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if (!$Pu4) jump $Ii", -tc_1b834fe7, TypeMAPPING>, Requires<[HasV60T]> { +tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -4903,7 +4913,7 @@ def J2_jumpfnew : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4.new) jump:nt $Ii", -tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { +tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b010; let Inst{21-21} = 0b1; @@ -4913,6 +4923,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -4927,7 +4940,7 @@ def J2_jumpfnewpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4.new) jump:t $Ii", -tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { +tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b110; let Inst{21-21} = 0b1; @@ -4937,6 +4950,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -4951,7 +4967,7 @@ def J2_jumpfpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4) jump:t $Ii", -tc_b5bfaa60, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { +tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b1; @@ -4960,6 +4976,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -4974,7 +4993,7 @@ def J2_jumpr : HInst< (outs), (ins IntRegs:$Rs32), "jumpr $Rs32", -tc_b08b653e, TypeJ>, Enc_ecbcc8, PredNewRel { +tc_9faf76ae, TypeJ>, Enc_ecbcc8, PredNewRel { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010010100; let isTerminator = 1; @@ -4991,7 +5010,7 @@ def J2_jumprf : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr:nt $Rs32", -tc_07ac815d, TypeJ>, Enc_88d4d9, PredNewRel { +tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010011011; @@ -5010,7 +5029,7 @@ def J2_jumprf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr $Rs32", -tc_07ac815d, TypeMAPPING>, Requires<[HasV60T]> { +tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5018,7 +5037,7 @@ def J2_jumprfnew : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) jumpr:nt $Rs32", -tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { +tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0010; let Inst{31-21} = 0b01010011011; @@ -5027,8 +5046,8 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; -let cofMax1 = 1; let isPredicatedNew = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "reg"; let BaseOpcode = "J2_jumpr"; @@ -5038,7 +5057,7 @@ def J2_jumprfnewpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) jumpr:t $Rs32", -tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { +tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0110; let Inst{31-21} = 0b01010011011; @@ -5047,8 +5066,8 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; -let cofMax1 = 1; let isPredicatedNew = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "reg"; let BaseOpcode = "J2_jumpr"; @@ -5058,7 +5077,7 @@ def J2_jumprfpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr:t $Rs32", -tc_a1fb80e1, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { +tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011011; @@ -5077,7 +5096,7 @@ def J2_jumprgtez : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32>=#0) jump:nt $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000101; @@ -5085,6 +5104,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5092,7 +5114,7 @@ def J2_jumprgtezpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32>=#0) jump:t $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000101; @@ -5100,6 +5122,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5107,7 +5132,7 @@ def J2_jumprltez : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32<=#0) jump:nt $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000111; @@ -5115,6 +5140,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5122,7 +5150,7 @@ def J2_jumprltezpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32<=#0) jump:t $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000111; @@ -5130,6 +5158,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5137,7 +5168,7 @@ def J2_jumprnz : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32==#0) jump:nt $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000110; @@ -5145,6 +5176,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5152,7 +5186,7 @@ def J2_jumprnzpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32==#0) jump:t $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000110; @@ -5160,6 +5194,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5167,7 +5204,7 @@ def J2_jumprt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr:nt $Rs32", -tc_07ac815d, TypeJ>, Enc_88d4d9, PredNewRel { +tc_e0739b8c, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010011010; @@ -5185,7 +5222,7 @@ def J2_jumprt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr $Rs32", -tc_07ac815d, TypeMAPPING>, Requires<[HasV60T]> { +tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5193,7 +5230,7 @@ def J2_jumprtnew : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) jumpr:nt $Rs32", -tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { +tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0010; let Inst{31-21} = 0b01010011010; @@ -5201,8 +5238,8 @@ let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; -let cofMax1 = 1; let isPredicatedNew = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "reg"; let BaseOpcode = "J2_jumpr"; @@ -5212,7 +5249,7 @@ def J2_jumprtnewpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) jumpr:t $Rs32", -tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { +tc_181af5d0, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0110; let Inst{31-21} = 0b01010011010; @@ -5220,8 +5257,8 @@ let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; -let cofMax1 = 1; let isPredicatedNew = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "reg"; let BaseOpcode = "J2_jumpr"; @@ -5231,7 +5268,7 @@ def J2_jumprtpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr:t $Rs32", -tc_a1fb80e1, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { +tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011010; @@ -5249,7 +5286,7 @@ def J2_jumprz : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32!=#0) jump:nt $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000100; @@ -5257,6 +5294,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5264,7 +5304,7 @@ def J2_jumprzpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32!=#0) jump:t $Ii", -tc_b324366f, TypeCR>, Enc_0fa531 { +tc_73043bf4, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000100; @@ -5272,6 +5312,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isTaken = Inst{12}; } @@ -5279,7 +5322,7 @@ def J2_jumpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4) jump:nt $Ii", -tc_1b834fe7, TypeJ>, Enc_daea09, PredNewRel { +tc_e9fae2d6, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b0; @@ -5287,6 +5330,9 @@ let Inst{31-24} = 0b01011100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -5301,7 +5347,7 @@ def J2_jumpt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if ($Pu4) jump $Ii", -tc_1b834fe7, TypeMAPPING>, Requires<[HasV60T]> { +tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5309,7 +5355,7 @@ def J2_jumptnew : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4.new) jump:nt $Ii", -tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { +tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b010; let Inst{21-21} = 0b0; @@ -5318,6 +5364,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -5332,7 +5381,7 @@ def J2_jumptnewpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4.new) jump:t $Ii", -tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { +tc_a46f0df5, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b110; let Inst{21-21} = 0b0; @@ -5341,6 +5390,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -5355,7 +5407,7 @@ def J2_jumptpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4) jump:t $Ii", -tc_b5bfaa60, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { +tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b0; @@ -5363,6 +5415,9 @@ let Inst{31-24} = 0b01011100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let InputType = "imm"; let BaseOpcode = "J2_jump"; @@ -5377,10 +5432,12 @@ def J2_loop0i : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "loop0($Ii,#$II)", -tc_1000eb10, TypeCR>, Enc_4dc228 { +tc_cf59f215, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001000; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5392,11 +5449,13 @@ def J2_loop0r : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "loop0($Ii,$Rs32)", -tc_f055fbb6, TypeCR>, Enc_864a5a { +tc_7934b9df, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01100000000; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5408,10 +5467,12 @@ def J2_loop1i : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "loop1($Ii,#$II)", -tc_1000eb10, TypeCR>, Enc_4dc228 { +tc_cf59f215, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001001; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC1, SA1]; let isExtendable = 1; let opExtendable = 0; @@ -5423,11 +5484,13 @@ def J2_loop1r : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "loop1($Ii,$Rs32)", -tc_f055fbb6, TypeCR>, Enc_864a5a { +tc_7934b9df, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01100000001; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC1, SA1]; let isExtendable = 1; let opExtendable = 0; @@ -5439,7 +5502,7 @@ def J2_pause : HInst< (outs), (ins u8_0Imm:$Ii), "pause(#$Ii)", -tc_b189ad4c, TypeJ>, Enc_a51a9a { +tc_681a2300, TypeJ>, Enc_a51a9a { let Inst{1-0} = 0b00; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5450,11 +5513,13 @@ def J2_ploop1si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp1loop0($Ii,#$II)", -tc_feb4974b, TypeCR>, Enc_4dc228 { +tc_c5e2426d, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001101; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5466,12 +5531,14 @@ def J2_ploop1sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp1loop0($Ii,$Rs32)", -tc_d6a805a8, TypeCR>, Enc_864a5a { +tc_4f7cd700, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01100000101; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5483,11 +5550,13 @@ def J2_ploop2si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp2loop0($Ii,#$II)", -tc_feb4974b, TypeCR>, Enc_4dc228 { +tc_c5e2426d, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001110; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5499,12 +5568,14 @@ def J2_ploop2sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp2loop0($Ii,$Rs32)", -tc_d6a805a8, TypeCR>, Enc_864a5a { +tc_4f7cd700, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01100000110; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5516,11 +5587,13 @@ def J2_ploop3si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp3loop0($Ii,#$II)", -tc_feb4974b, TypeCR>, Enc_4dc228 { +tc_c5e2426d, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001111; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5532,12 +5605,14 @@ def J2_ploop3sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp3loop0($Ii,$Rs32)", -tc_d6a805a8, TypeCR>, Enc_864a5a { +tc_4f7cd700, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01100000111; let isPredicateLate = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let Defs = [LC0, P3, SA0, USR]; let isExtendable = 1; let opExtendable = 0; @@ -5549,7 +5624,7 @@ def J2_trap0 : HInst< (outs), (ins u8_0Imm:$Ii), "trap0(#$Ii)", -tc_cbe45117, TypeJ>, Enc_a51a9a { +tc_14cd4cfa, TypeJ>, Enc_a51a9a { let Inst{1-0} = 0b00; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5560,7 +5635,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5569,8 +5644,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqr"; let isTaken = Inst{13}; @@ -5585,7 +5661,7 @@ def J4_cmpeq_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5594,8 +5670,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqr"; let isTaken = Inst{13}; @@ -5610,7 +5687,7 @@ def J4_cmpeq_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010001; @@ -5619,6 +5696,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqp0"; @@ -5633,7 +5713,7 @@ def J4_cmpeq_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010001; @@ -5642,6 +5722,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqp0"; @@ -5656,7 +5739,7 @@ def J4_cmpeq_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010001; @@ -5665,6 +5748,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqp1"; @@ -5679,7 +5765,7 @@ def J4_cmpeq_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010001; @@ -5688,6 +5774,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqp1"; @@ -5702,7 +5791,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5710,8 +5799,9 @@ let Inst{31-22} = 0b0010000000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqr"; let isTaken = Inst{13}; @@ -5726,7 +5816,7 @@ def J4_cmpeq_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5734,8 +5824,9 @@ let Inst{31-22} = 0b0010000000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqr"; let isTaken = Inst{13}; @@ -5750,7 +5841,7 @@ def J4_cmpeq_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010000; @@ -5758,6 +5849,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqp0"; @@ -5772,7 +5866,7 @@ def J4_cmpeq_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010000; @@ -5780,6 +5874,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqp0"; @@ -5794,7 +5891,7 @@ def J4_cmpeq_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010000; @@ -5802,6 +5899,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqp1"; @@ -5816,7 +5916,7 @@ def J4_cmpeq_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010000; @@ -5824,6 +5924,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqp1"; @@ -5838,7 +5941,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5847,8 +5950,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqi"; let isTaken = Inst{13}; @@ -5863,7 +5967,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5872,8 +5976,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqi"; let isTaken = Inst{13}; @@ -5888,7 +5993,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000001; @@ -5897,6 +6002,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqip0"; @@ -5911,7 +6019,7 @@ def J4_cmpeqi_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000001; @@ -5920,6 +6028,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqip0"; @@ -5934,7 +6045,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001001; @@ -5943,6 +6054,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqip1"; @@ -5957,7 +6071,7 @@ def J4_cmpeqi_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001001; @@ -5966,6 +6080,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqip1"; @@ -5980,7 +6097,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5988,8 +6105,9 @@ let Inst{31-22} = 0b0010010000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqi"; let isTaken = Inst{13}; @@ -6004,7 +6122,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6012,8 +6130,9 @@ let Inst{31-22} = 0b0010010000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqi"; let isTaken = Inst{13}; @@ -6028,7 +6147,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000000; @@ -6036,6 +6155,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqip0"; @@ -6050,7 +6172,7 @@ def J4_cmpeqi_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000000; @@ -6058,6 +6180,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqip0"; @@ -6072,7 +6197,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001000; @@ -6080,6 +6205,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqip1"; @@ -6094,7 +6222,7 @@ def J4_cmpeqi_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001000; @@ -6102,6 +6230,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqip1"; @@ -6116,7 +6247,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_e90a15, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_e90a15, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6125,8 +6256,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqn1r"; let isTaken = Inst{13}; @@ -6141,7 +6273,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_5a18b3, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_5a18b3, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6150,8 +6282,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqn1r"; let isTaken = Inst{13}; @@ -6166,7 +6299,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_1de724, PredRel { +tc_99be14ca, TypeCJ>, Enc_1de724, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001000111; @@ -6175,6 +6308,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqn1p0"; @@ -6189,7 +6325,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14640c, PredRel { +tc_99be14ca, TypeCJ>, Enc_14640c, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001000111; @@ -6198,6 +6334,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqn1p0"; @@ -6212,7 +6351,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_668704, PredRel { +tc_99be14ca, TypeCJ>, Enc_668704, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001001111; @@ -6221,6 +6360,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqn1p1"; @@ -6235,7 +6377,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_800e04, PredRel { +tc_99be14ca, TypeCJ>, Enc_800e04, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001001111; @@ -6244,6 +6386,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqn1p1"; @@ -6258,7 +6403,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_4aca3a, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_4aca3a, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6266,8 +6411,9 @@ let Inst{31-22} = 0b0010011000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqn1r"; let isTaken = Inst{13}; @@ -6282,7 +6428,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_f7ea77, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_f7ea77, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6290,8 +6436,9 @@ let Inst{31-22} = 0b0010011000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpeqn1r"; let isTaken = Inst{13}; @@ -6306,7 +6453,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_405228, PredRel { +tc_99be14ca, TypeCJ>, Enc_405228, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001000110; @@ -6314,6 +6461,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqn1p0"; @@ -6328,7 +6478,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_3a2484, PredRel { +tc_99be14ca, TypeCJ>, Enc_3a2484, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001000110; @@ -6336,6 +6486,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpeqn1p0"; @@ -6350,7 +6503,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_736575, PredRel { +tc_99be14ca, TypeCJ>, Enc_736575, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001001110; @@ -6358,6 +6511,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqn1p1"; @@ -6372,7 +6528,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_8e583a, PredRel { +tc_99be14ca, TypeCJ>, Enc_8e583a, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001001110; @@ -6380,6 +6536,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpeqn1p1"; @@ -6394,7 +6553,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6403,8 +6562,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtr"; let isTaken = Inst{13}; @@ -6419,7 +6579,7 @@ def J4_cmpgt_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6428,8 +6588,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtr"; let isTaken = Inst{13}; @@ -6444,7 +6605,7 @@ def J4_cmpgt_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010011; @@ -6453,6 +6614,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtp0"; @@ -6467,7 +6631,7 @@ def J4_cmpgt_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010011; @@ -6476,6 +6640,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtp0"; @@ -6490,7 +6657,7 @@ def J4_cmpgt_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010011; @@ -6499,6 +6666,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtp1"; @@ -6513,7 +6683,7 @@ def J4_cmpgt_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010011; @@ -6522,6 +6692,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtp1"; @@ -6536,7 +6709,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6544,8 +6717,9 @@ let Inst{31-22} = 0b0010000010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtr"; let isTaken = Inst{13}; @@ -6560,7 +6734,7 @@ def J4_cmpgt_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6568,8 +6742,9 @@ let Inst{31-22} = 0b0010000010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtr"; let isTaken = Inst{13}; @@ -6584,7 +6759,7 @@ def J4_cmpgt_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010010; @@ -6592,6 +6767,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtp0"; @@ -6606,7 +6784,7 @@ def J4_cmpgt_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010010; @@ -6614,6 +6792,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtp0"; @@ -6628,7 +6809,7 @@ def J4_cmpgt_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010010; @@ -6636,6 +6817,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtp1"; @@ -6650,7 +6834,7 @@ def J4_cmpgt_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010010; @@ -6658,6 +6842,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtp1"; @@ -6672,7 +6859,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6681,8 +6868,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtir"; let isTaken = Inst{13}; @@ -6697,7 +6885,7 @@ def J4_cmpgti_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6706,8 +6894,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtir"; let isTaken = Inst{13}; @@ -6722,7 +6911,7 @@ def J4_cmpgti_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000011; @@ -6731,6 +6920,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtip0"; @@ -6745,7 +6937,7 @@ def J4_cmpgti_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000011; @@ -6754,6 +6946,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtip0"; @@ -6768,7 +6963,7 @@ def J4_cmpgti_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001011; @@ -6777,6 +6972,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtip1"; @@ -6791,7 +6989,7 @@ def J4_cmpgti_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001011; @@ -6800,6 +6998,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtip1"; @@ -6814,7 +7015,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6822,8 +7023,9 @@ let Inst{31-22} = 0b0010010010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtir"; let isTaken = Inst{13}; @@ -6838,7 +7040,7 @@ def J4_cmpgti_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6846,8 +7048,9 @@ let Inst{31-22} = 0b0010010010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtir"; let isTaken = Inst{13}; @@ -6862,7 +7065,7 @@ def J4_cmpgti_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000010; @@ -6870,6 +7073,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtip0"; @@ -6884,7 +7090,7 @@ def J4_cmpgti_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000010; @@ -6892,6 +7098,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtip0"; @@ -6906,7 +7115,7 @@ def J4_cmpgti_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001010; @@ -6914,6 +7123,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtip1"; @@ -6928,7 +7140,7 @@ def J4_cmpgti_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001010; @@ -6936,6 +7148,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtip1"; @@ -6950,7 +7165,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_3694bd, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_3694bd, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6959,8 +7174,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtn1r"; let isTaken = Inst{13}; @@ -6975,7 +7191,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_a6853f, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_a6853f, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6984,8 +7200,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtn1r"; let isTaken = Inst{13}; @@ -7000,7 +7217,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_a42857, PredRel { +tc_99be14ca, TypeCJ>, Enc_a42857, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001000111; @@ -7009,6 +7226,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtn1p0"; @@ -7023,7 +7243,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_f6fe0b, PredRel { +tc_99be14ca, TypeCJ>, Enc_f6fe0b, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001000111; @@ -7032,6 +7252,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtn1p0"; @@ -7046,7 +7269,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_3e3989, PredRel { +tc_99be14ca, TypeCJ>, Enc_3e3989, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001001111; @@ -7055,6 +7278,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtn1p1"; @@ -7069,7 +7295,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_b909d2, PredRel { +tc_99be14ca, TypeCJ>, Enc_b909d2, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001001111; @@ -7078,6 +7304,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtn1p1"; @@ -7092,7 +7321,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_f82302, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_f82302, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -7100,8 +7329,9 @@ let Inst{31-22} = 0b0010011010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtn1r"; let isTaken = Inst{13}; @@ -7116,7 +7346,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_6413b6, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_6413b6, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -7124,8 +7354,9 @@ let Inst{31-22} = 0b0010011010; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtn1r"; let isTaken = Inst{13}; @@ -7140,7 +7371,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_b78edd, PredRel { +tc_99be14ca, TypeCJ>, Enc_b78edd, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001000110; @@ -7148,6 +7379,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtn1p0"; @@ -7162,7 +7396,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_041d7b, PredRel { +tc_99be14ca, TypeCJ>, Enc_041d7b, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001000110; @@ -7170,6 +7404,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtn1p0"; @@ -7184,7 +7421,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_b1e1fb, PredRel { +tc_99be14ca, TypeCJ>, Enc_b1e1fb, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001001110; @@ -7192,6 +7429,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtn1p1"; @@ -7206,7 +7446,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_178717, PredRel { +tc_99be14ca, TypeCJ>, Enc_178717, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001001110; @@ -7214,6 +7454,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtn1p1"; @@ -7228,7 +7471,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7237,8 +7480,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtur"; let isTaken = Inst{13}; @@ -7253,7 +7497,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7262,8 +7506,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtur"; let isTaken = Inst{13}; @@ -7278,7 +7523,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010101; @@ -7287,6 +7532,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtup0"; @@ -7301,7 +7549,7 @@ def J4_cmpgtu_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010101; @@ -7310,6 +7558,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtup0"; @@ -7324,7 +7575,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010101; @@ -7333,6 +7584,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtup1"; @@ -7347,7 +7601,7 @@ def J4_cmpgtu_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010101; @@ -7356,6 +7610,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtup1"; @@ -7370,7 +7627,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7378,8 +7635,9 @@ let Inst{31-22} = 0b0010000100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtur"; let isTaken = Inst{13}; @@ -7394,7 +7652,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii", -tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { +tc_51b866be, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7402,8 +7660,9 @@ let Inst{31-22} = 0b0010000100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtur"; let isTaken = Inst{13}; @@ -7418,7 +7677,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010100; @@ -7426,6 +7685,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtup0"; @@ -7440,7 +7702,7 @@ def J4_cmpgtu_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010100; @@ -7448,6 +7710,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtup0"; @@ -7462,7 +7727,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010100; @@ -7470,6 +7735,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtup1"; @@ -7484,7 +7752,7 @@ def J4_cmpgtu_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii", -tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { +tc_855b0b61, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010100; @@ -7492,6 +7760,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtup1"; @@ -7506,7 +7777,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7515,8 +7786,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtuir"; let isTaken = Inst{13}; @@ -7531,7 +7803,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7540,8 +7812,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtuir"; let isTaken = Inst{13}; @@ -7556,7 +7829,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000101; @@ -7565,6 +7838,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtuip0"; @@ -7579,7 +7855,7 @@ def J4_cmpgtui_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000101; @@ -7588,6 +7864,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtuip0"; @@ -7602,7 +7881,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001101; @@ -7611,6 +7890,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtuip1"; @@ -7625,7 +7907,7 @@ def J4_cmpgtui_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001101; @@ -7634,6 +7916,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtuip1"; @@ -7648,7 +7933,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7656,8 +7941,9 @@ let Inst{31-22} = 0b0010010100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtuir"; let isTaken = Inst{13}; @@ -7672,7 +7958,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii", -tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { +tc_bde7aaf4, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7680,8 +7966,9 @@ let Inst{31-22} = 0b0010010100; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpgtuir"; let isTaken = Inst{13}; @@ -7696,7 +7983,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000100; @@ -7704,6 +7991,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtuip0"; @@ -7718,7 +8008,7 @@ def J4_cmpgtui_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000100; @@ -7726,6 +8016,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let BaseOpcode = "J4_cmpgtuip0"; @@ -7740,7 +8033,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001100; @@ -7748,6 +8041,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtuip1"; @@ -7762,7 +8058,7 @@ def J4_cmpgtui_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii", -tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { +tc_99be14ca, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001100; @@ -7770,6 +8066,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let BaseOpcode = "J4_cmpgtuip1"; @@ -7784,7 +8083,7 @@ def J4_cmplt_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7793,8 +8092,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltr"; let isTaken = Inst{13}; @@ -7809,7 +8109,7 @@ def J4_cmplt_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7818,8 +8118,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltr"; let isTaken = Inst{13}; @@ -7834,7 +8135,7 @@ def J4_cmplt_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7842,8 +8143,9 @@ let Inst{31-22} = 0b0010000110; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltr"; let isTaken = Inst{13}; @@ -7858,7 +8160,7 @@ def J4_cmplt_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7866,8 +8168,9 @@ let Inst{31-22} = 0b0010000110; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltr"; let isTaken = Inst{13}; @@ -7882,7 +8185,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7891,8 +8194,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltur"; let isTaken = Inst{13}; @@ -7907,7 +8211,7 @@ def J4_cmpltu_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7916,8 +8220,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltur"; let isTaken = Inst{13}; @@ -7932,7 +8237,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7940,8 +8245,9 @@ let Inst{31-22} = 0b0010001000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltur"; let isTaken = Inst{13}; @@ -7956,7 +8262,7 @@ def J4_cmpltu_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii", -tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { +tc_5eb851fc, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7964,8 +8270,9 @@ let Inst{31-22} = 0b0010001000; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let BaseOpcode = "J4_cmpltur"; let isTaken = Inst{13}; @@ -7980,7 +8287,7 @@ def J4_hintjumpr : HInst< (outs), (ins IntRegs:$Rs32), "hintjr($Rs32)", -tc_b08b653e, TypeJ>, Enc_ecbcc8 { +tc_9faf76ae, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010010101; let isTerminator = 1; @@ -7992,13 +8299,15 @@ def J4_jumpseti : HInst< (outs GeneralSubRegs:$Rd16), (ins u6_0Imm:$II, b30_2Imm:$Ii), "$Rd16 = #$II ; jump $Ii", -tc_1e062b18, TypeCJ>, Enc_9e4c3f { +tc_49eb22c8, TypeCJ>, Enc_9e4c3f { let Inst{0-0} = 0b0; let Inst{31-22} = 0b0001011000; let hasNewValue = 1; let opNewValue = 0; let isTerminator = 1; let isBranch = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isExtendable = 1; let opExtendable = 2; @@ -8010,7 +8319,7 @@ def J4_jumpsetr : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "$Rd16 = $Rs16 ; jump $Ii", -tc_1e062b18, TypeCJ>, Enc_66bce1 { +tc_49eb22c8, TypeCJ>, Enc_66bce1 { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001011100; @@ -8018,6 +8327,8 @@ let hasNewValue = 1; let opNewValue = 0; let isTerminator = 1; let isBranch = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Defs = [PC]; let isExtendable = 1; let opExtendable = 2; @@ -8029,7 +8340,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (!tstbit($Ns8.new,#0)) jump:nt $Ii", -tc_dbe218dd, TypeNCJ>, Enc_69d63b { +tc_746baa8e, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -8038,8 +8349,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let isTaken = Inst{13}; let isExtendable = 1; @@ -8053,7 +8365,7 @@ def J4_tstbit0_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (!tstbit($Ns8.new,#0)) jump:t $Ii", -tc_dbe218dd, TypeNCJ>, Enc_69d63b { +tc_746baa8e, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -8062,8 +8374,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let isTaken = Inst{13}; let isExtendable = 1; @@ -8077,7 +8390,7 @@ def J4_tstbit0_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001000111; @@ -8086,6 +8399,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let isTaken = Inst{13}; @@ -8099,7 +8415,7 @@ def J4_tstbit0_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001000111; @@ -8108,6 +8424,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let isTaken = Inst{13}; @@ -8121,7 +8440,7 @@ def J4_tstbit0_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001001111; @@ -8130,6 +8449,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let isTaken = Inst{13}; @@ -8143,7 +8465,7 @@ def J4_tstbit0_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001001111; @@ -8152,6 +8474,9 @@ let isPredicatedFalse = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let isTaken = Inst{13}; @@ -8165,7 +8490,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (tstbit($Ns8.new,#0)) jump:nt $Ii", -tc_dbe218dd, TypeNCJ>, Enc_69d63b { +tc_746baa8e, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -8173,8 +8498,9 @@ let Inst{31-22} = 0b0010010110; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let isTaken = Inst{13}; let isExtendable = 1; @@ -8188,7 +8514,7 @@ def J4_tstbit0_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (tstbit($Ns8.new,#0)) jump:t $Ii", -tc_dbe218dd, TypeNCJ>, Enc_69d63b { +tc_746baa8e, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -8196,8 +8522,9 @@ let Inst{31-22} = 0b0010010110; let isPredicated = 1; let isTerminator = 1; let isBranch = 1; -let cofMax1 = 1; let isNewValue = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let Defs = [PC]; let isTaken = Inst{13}; let isExtendable = 1; @@ -8211,7 +8538,7 @@ def J4_tstbit0_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001000110; @@ -8219,6 +8546,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let isTaken = Inst{13}; @@ -8232,7 +8562,7 @@ def J4_tstbit0_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001000110; @@ -8240,6 +8570,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P0]; let Defs = [P0, PC]; let isTaken = Inst{13}; @@ -8253,7 +8586,7 @@ def J4_tstbit0_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001001110; @@ -8261,6 +8594,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let isTaken = Inst{13}; @@ -8274,7 +8610,7 @@ def J4_tstbit0_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii", -tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { +tc_3cb8ea06, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001001110; @@ -8282,6 +8618,9 @@ let isPredicated = 1; let isTerminator = 1; let isBranch = 1; let isPredicatedNew = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; +let cofMax1 = 1; let Uses = [P1]; let Defs = [P1, PC]; let isTaken = Inst{13}; @@ -8292,24 +8631,22 @@ let opExtentBits = 11; let opExtentAlign = 2; } def L2_deallocframe : HInst< -(outs), -(ins), -"deallocframe", -tc_c1dbc916, TypeLD>, Enc_3a3d62 { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins IntRegs:$Rs32), +"$Rdd32 = deallocframe($Rs32):raw", +tc_d1090e34, TypeLD>, Enc_3a3d62 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010000000; -let Inst{20-16} = 0b11110; let accessSize = DoubleWordAccess; let mayLoad = 1; -let Uses = [R30]; -let Defs = [R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [R29]; } def L2_loadalignb_io : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Ryy32 = memb_fifo($Rs32+#$Ii)", -tc_14da557c, TypeLD>, Enc_a27588 { +tc_ef52ed71, TypeLD>, Enc_a27588 { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8326,7 +8663,7 @@ def L2_loadalignb_pbr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++$Mu2:brev)", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110100; let accessSize = ByteAccess; @@ -8337,7 +8674,7 @@ def L2_loadalignb_pci : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))", -tc_d2a33af5, TypeLD>, Enc_74aef2 { +tc_03220ffa, TypeLD>, Enc_74aef2 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000100; let addrMode = PostInc; @@ -8350,7 +8687,7 @@ def L2_loadalignb_pcr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000100; let addrMode = PostInc; @@ -8363,7 +8700,7 @@ def L2_loadalignb_pi : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii), "$Ryy32 = memb_fifo($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_6b197f { +tc_bad2bcaf, TypeLD>, Enc_6b197f { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010100; let addrMode = PostInc; @@ -8375,7 +8712,7 @@ def L2_loadalignb_pr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++$Mu2)", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100100; let addrMode = PostInc; @@ -8387,7 +8724,7 @@ def L2_loadalignb_zomap : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32), "$Ryy32 = memb_fifo($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let Constraints = "$Ryy32 = $Ryy32in"; @@ -8396,7 +8733,7 @@ def L2_loadalignh_io : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii), "$Ryy32 = memh_fifo($Rs32+#$Ii)", -tc_14da557c, TypeLD>, Enc_5cd7e9 { +tc_ef52ed71, TypeLD>, Enc_5cd7e9 { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8413,7 +8750,7 @@ def L2_loadalignh_pbr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++$Mu2:brev)", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110010; let accessSize = HalfWordAccess; @@ -8424,7 +8761,7 @@ def L2_loadalignh_pci : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))", -tc_d2a33af5, TypeLD>, Enc_9e2e1c { +tc_03220ffa, TypeLD>, Enc_9e2e1c { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000010; let addrMode = PostInc; @@ -8437,7 +8774,7 @@ def L2_loadalignh_pcr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000010; let addrMode = PostInc; @@ -8450,7 +8787,7 @@ def L2_loadalignh_pi : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii), "$Ryy32 = memh_fifo($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_bd1cbc { +tc_bad2bcaf, TypeLD>, Enc_bd1cbc { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010010; let addrMode = PostInc; @@ -8462,7 +8799,7 @@ def L2_loadalignh_pr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++$Mu2)", -tc_ae762521, TypeLD>, Enc_1f5d8f { +tc_bad2bcaf, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100010; let addrMode = PostInc; @@ -8474,7 +8811,7 @@ def L2_loadalignh_zomap : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32), "$Ryy32 = memh_fifo($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let Constraints = "$Ryy32 = $Ryy32in"; @@ -8483,7 +8820,7 @@ def L2_loadbsw2_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = membh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_de0214 { +tc_7f881c76, TypeLD>, Enc_de0214 { let Inst{24-21} = 0b0001; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8501,7 +8838,7 @@ def L2_loadbsw2_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110001; let hasNewValue = 1; @@ -8514,7 +8851,7 @@ def L2_loadbsw2_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = membh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e83554 { +tc_4403ca65, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000001; let hasNewValue = 1; @@ -8529,7 +8866,7 @@ def L2_loadbsw2_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000001; let hasNewValue = 1; @@ -8544,7 +8881,7 @@ def L2_loadbsw2_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = membh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_152467 { +tc_2fc0c436, TypeLD>, Enc_152467 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010001; let hasNewValue = 1; @@ -8558,7 +8895,7 @@ def L2_loadbsw2_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100001; let hasNewValue = 1; @@ -8572,7 +8909,7 @@ def L2_loadbsw2_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = membh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8582,7 +8919,7 @@ def L2_loadbsw4_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rdd32 = membh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_2d7491 { +tc_7f881c76, TypeLD>, Enc_2d7491 { let Inst{24-21} = 0b0111; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8598,7 +8935,7 @@ def L2_loadbsw4_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110111; let accessSize = WordAccess; @@ -8609,7 +8946,7 @@ def L2_loadbsw4_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_70b24b { +tc_4403ca65, TypeLD>, Enc_70b24b { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000111; let addrMode = PostInc; @@ -8622,7 +8959,7 @@ def L2_loadbsw4_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000111; let addrMode = PostInc; @@ -8635,7 +8972,7 @@ def L2_loadbsw4_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rdd32 = membh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_71f1b4 { +tc_2fc0c436, TypeLD>, Enc_71f1b4 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010111; let addrMode = PostInc; @@ -8647,7 +8984,7 @@ def L2_loadbsw4_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100111; let addrMode = PostInc; @@ -8659,7 +8996,7 @@ def L2_loadbsw4_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = membh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -8667,7 +9004,7 @@ def L2_loadbzw2_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memubh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_de0214 { +tc_7f881c76, TypeLD>, Enc_de0214 { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8685,7 +9022,7 @@ def L2_loadbzw2_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110011; let hasNewValue = 1; @@ -8698,7 +9035,7 @@ def L2_loadbzw2_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e83554 { +tc_4403ca65, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000011; let hasNewValue = 1; @@ -8713,7 +9050,7 @@ def L2_loadbzw2_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000011; let hasNewValue = 1; @@ -8728,7 +9065,7 @@ def L2_loadbzw2_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memubh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_152467 { +tc_2fc0c436, TypeLD>, Enc_152467 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010011; let hasNewValue = 1; @@ -8742,7 +9079,7 @@ def L2_loadbzw2_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100011; let hasNewValue = 1; @@ -8756,7 +9093,7 @@ def L2_loadbzw2_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memubh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8766,7 +9103,7 @@ def L2_loadbzw4_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rdd32 = memubh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_2d7491 { +tc_7f881c76, TypeLD>, Enc_2d7491 { let Inst{24-21} = 0b0101; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8782,7 +9119,7 @@ def L2_loadbzw4_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110101; let accessSize = WordAccess; @@ -8793,7 +9130,7 @@ def L2_loadbzw4_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_70b24b { +tc_4403ca65, TypeLD>, Enc_70b24b { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000101; let addrMode = PostInc; @@ -8806,7 +9143,7 @@ def L2_loadbzw4_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000101; let addrMode = PostInc; @@ -8819,7 +9156,7 @@ def L2_loadbzw4_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rdd32 = memubh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_71f1b4 { +tc_2fc0c436, TypeLD>, Enc_71f1b4 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010101; let addrMode = PostInc; @@ -8831,7 +9168,7 @@ def L2_loadbzw4_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100101; let addrMode = PostInc; @@ -8843,7 +9180,7 @@ def L2_loadbzw4_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memubh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -8851,7 +9188,7 @@ def L2_loadrb_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memb($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_211aaa, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8872,7 +9209,7 @@ def L2_loadrb_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111000; let hasNewValue = 1; @@ -8885,7 +9222,7 @@ def L2_loadrb_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memb($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e0a47a { +tc_4403ca65, TypeLD>, Enc_e0a47a { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001000; let hasNewValue = 1; @@ -8900,7 +9237,7 @@ def L2_loadrb_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001000; let hasNewValue = 1; @@ -8915,7 +9252,7 @@ def L2_loadrb_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii), "$Rd32 = memb($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_222336, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011000; let hasNewValue = 1; @@ -8923,6 +9260,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = ByteAccess; let mayLoad = 1; +let CextOpcode = "L2_loadrb"; let BaseOpcode = "L2_loadrb_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -8931,7 +9269,7 @@ def L2_loadrb_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101000; let hasNewValue = 1; @@ -8945,7 +9283,7 @@ def L2_loadrb_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memb($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8955,7 +9293,7 @@ def L2_loadrbgp : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memb(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -8974,7 +9312,7 @@ def L2_loadrd_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s29_3Imm:$Ii), "$Rdd32 = memd($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_fa3ba4, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_fa3ba4, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8993,7 +9331,7 @@ def L2_loadrd_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111110; let accessSize = DoubleWordAccess; @@ -9004,7 +9342,7 @@ def L2_loadrd_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_b05839 { +tc_4403ca65, TypeLD>, Enc_b05839 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001110; let addrMode = PostInc; @@ -9017,7 +9355,7 @@ def L2_loadrd_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001110; let addrMode = PostInc; @@ -9030,12 +9368,13 @@ def L2_loadrd_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii), "$Rdd32 = memd($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_5bdd42, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_5bdd42, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011110; let addrMode = PostInc; let accessSize = DoubleWordAccess; let mayLoad = 1; +let CextOpcode = "L2_loadrd"; let BaseOpcode = "L2_loadrd_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -9044,7 +9383,7 @@ def L2_loadrd_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_7eee72 { +tc_2fc0c436, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101110; let addrMode = PostInc; @@ -9056,7 +9395,7 @@ def L2_loadrd_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memd($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9064,7 +9403,7 @@ def L2_loadrdgp : HInst< (outs DoubleRegs:$Rdd32), (ins u29_3Imm:$Ii), "$Rdd32 = memd(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_509701, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b01001; let accessSize = DoubleWordAccess; @@ -9081,7 +9420,7 @@ def L2_loadrh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_de0214, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9102,7 +9441,7 @@ def L2_loadrh_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111010; let hasNewValue = 1; @@ -9115,7 +9454,7 @@ def L2_loadrh_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e83554 { +tc_4403ca65, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001010; let hasNewValue = 1; @@ -9130,7 +9469,7 @@ def L2_loadrh_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001010; let hasNewValue = 1; @@ -9145,7 +9484,7 @@ def L2_loadrh_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_152467, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011010; let hasNewValue = 1; @@ -9153,6 +9492,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = HalfWordAccess; let mayLoad = 1; +let CextOpcode = "L2_loadrh"; let BaseOpcode = "L2_loadrh_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -9161,7 +9501,7 @@ def L2_loadrh_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101010; let hasNewValue = 1; @@ -9175,7 +9515,7 @@ def L2_loadrh_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9185,7 +9525,7 @@ def L2_loadrhgp : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memh(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9204,7 +9544,7 @@ def L2_loadri_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rd32 = memw($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_2a3787, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_2a3787, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9225,7 +9565,7 @@ def L2_loadri_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111100; let hasNewValue = 1; @@ -9238,7 +9578,7 @@ def L2_loadri_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memw($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_27fd0e { +tc_4403ca65, TypeLD>, Enc_27fd0e { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001100; let hasNewValue = 1; @@ -9253,7 +9593,7 @@ def L2_loadri_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001100; let hasNewValue = 1; @@ -9268,7 +9608,7 @@ def L2_loadri_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rd32 = memw($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_3d920a, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_3d920a, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011100; let hasNewValue = 1; @@ -9276,6 +9616,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = WordAccess; let mayLoad = 1; +let CextOpcode = "L2_loadri"; let BaseOpcode = "L2_loadri_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -9284,7 +9625,7 @@ def L2_loadri_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101100; let hasNewValue = 1; @@ -9298,7 +9639,7 @@ def L2_loadri_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memw($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9308,7 +9649,7 @@ def L2_loadrigp : HInst< (outs IntRegs:$Rd32), (ins u30_2Imm:$Ii), "$Rd32 = memw(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9327,7 +9668,7 @@ def L2_loadrub_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memub($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_211aaa, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_211aaa, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9348,7 +9689,7 @@ def L2_loadrub_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111001; let hasNewValue = 1; @@ -9361,7 +9702,7 @@ def L2_loadrub_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memub($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e0a47a { +tc_4403ca65, TypeLD>, Enc_e0a47a { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001001; let hasNewValue = 1; @@ -9376,7 +9717,7 @@ def L2_loadrub_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001001; let hasNewValue = 1; @@ -9391,7 +9732,7 @@ def L2_loadrub_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii), "$Rd32 = memub($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_222336, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_222336, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011001; let hasNewValue = 1; @@ -9399,6 +9740,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = ByteAccess; let mayLoad = 1; +let CextOpcode = "L2_loadrub"; let BaseOpcode = "L2_loadrub_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -9407,7 +9749,7 @@ def L2_loadrub_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101001; let hasNewValue = 1; @@ -9421,7 +9763,7 @@ def L2_loadrub_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memub($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9431,7 +9773,7 @@ def L2_loadrubgp : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memub(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9450,7 +9792,7 @@ def L2_loadruh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memuh($Rs32+#$Ii)", -tc_bf6fa601, TypeLD>, Enc_de0214, AddrModeRel { +tc_7f881c76, TypeLD>, Enc_de0214, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9471,7 +9813,7 @@ def L2_loadruh_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++$Mu2:brev)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111011; let hasNewValue = 1; @@ -9484,7 +9826,7 @@ def L2_loadruh_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))", -tc_3eab77bd, TypeLD>, Enc_e83554 { +tc_4403ca65, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001011; let hasNewValue = 1; @@ -9499,7 +9841,7 @@ def L2_loadruh_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++I:circ($Mu2))", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001011; let hasNewValue = 1; @@ -9514,7 +9856,7 @@ def L2_loadruh_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memuh($Rx32++#$Ii)", -tc_65dc7cc4, TypeLD>, Enc_152467, PredNewRel { +tc_2fc0c436, TypeLD>, Enc_152467, PredNewRel, PostInc_BaseImm { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011011; let hasNewValue = 1; @@ -9522,6 +9864,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = HalfWordAccess; let mayLoad = 1; +let CextOpcode = "L2_loadruh"; let BaseOpcode = "L2_loadruh_pi"; let isPredicable = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -9530,7 +9873,7 @@ def L2_loadruh_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++$Mu2)", -tc_65dc7cc4, TypeLD>, Enc_74d4e5 { +tc_2fc0c436, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101011; let hasNewValue = 1; @@ -9544,7 +9887,7 @@ def L2_loadruh_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memuh($Rs32)", -tc_bf6fa601, TypeMAPPING> { +tc_7f881c76, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9554,7 +9897,7 @@ def L2_loadruhgp : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memuh(gp+#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9573,7 +9916,7 @@ def L2_loadw_locked : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memw_locked($Rs32)", -tc_29c14515, TypeLD>, Enc_5e2823 { +tc_6aa5711a, TypeLD>, Enc_5e2823 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010010000; let hasNewValue = 1; @@ -9586,7 +9929,7 @@ def L2_ploadrbf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101000; let isPredicated = 1; @@ -9608,7 +9951,7 @@ def L2_ploadrbf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9625,7 +9968,7 @@ def L2_ploadrbf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memb($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9635,7 +9978,7 @@ def L2_ploadrbfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111000; let isPredicated = 1; @@ -9658,7 +10001,7 @@ def L2_ploadrbfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9676,7 +10019,7 @@ def L2_ploadrbfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memb($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9686,7 +10029,7 @@ def L2_ploadrbt_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memb($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001000; let isPredicated = 1; @@ -9707,7 +10050,7 @@ def L2_ploadrbt_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4) $Rd32 = memb($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9723,7 +10066,7 @@ def L2_ploadrbt_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memb($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9733,7 +10076,7 @@ def L2_ploadrbtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011000; let isPredicated = 1; @@ -9755,7 +10098,7 @@ def L2_ploadrbtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9772,7 +10115,7 @@ def L2_ploadrbtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memb($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9782,7 +10125,7 @@ def L2_ploadrdf_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_acd6ed, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101110; let isPredicated = 1; @@ -9802,7 +10145,7 @@ def L2_ploadrdf_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_9d1247, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9817,7 +10160,7 @@ def L2_ploadrdf_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rdd32 = memd($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9825,7 +10168,7 @@ def L2_ploadrdfnew_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111110; let isPredicated = 1; @@ -9846,7 +10189,7 @@ def L2_ploadrdfnew_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_9d1247, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9862,7 +10205,7 @@ def L2_ploadrdfnew_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rdd32 = memd($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9870,7 +10213,7 @@ def L2_ploadrdt_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_acd6ed, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001110; let isPredicated = 1; @@ -9889,7 +10232,7 @@ def L2_ploadrdt_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_9d1247, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9903,7 +10246,7 @@ def L2_ploadrdt_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rdd32 = memd($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9911,7 +10254,7 @@ def L2_ploadrdtnew_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011110; let isPredicated = 1; @@ -9931,7 +10274,7 @@ def L2_ploadrdtnew_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_9d1247, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9946,7 +10289,7 @@ def L2_ploadrdtnew_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rdd32 = memd($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9954,7 +10297,7 @@ def L2_ploadrhf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101010; let isPredicated = 1; @@ -9976,7 +10319,7 @@ def L2_ploadrhf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -9993,7 +10336,7 @@ def L2_ploadrhf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memh($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10003,7 +10346,7 @@ def L2_ploadrhfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111010; let isPredicated = 1; @@ -10026,7 +10369,7 @@ def L2_ploadrhfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -10044,7 +10387,7 @@ def L2_ploadrhfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memh($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10054,7 +10397,7 @@ def L2_ploadrht_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4) $Rd32 = memh($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001010; let isPredicated = 1; @@ -10075,7 +10418,7 @@ def L2_ploadrht_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4) $Rd32 = memh($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -10091,7 +10434,7 @@ def L2_ploadrht_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memh($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10101,7 +10444,7 @@ def L2_ploadrhtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011010; let isPredicated = 1; @@ -10123,7 +10466,7 @@ def L2_ploadrhtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -10140,7 +10483,7 @@ def L2_ploadrhtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memh($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10150,7 +10493,7 @@ def L2_ploadrif_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_f82eaf, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101100; let isPredicated = 1; @@ -10172,7 +10515,7 @@ def L2_ploadrif_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_b97f71, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10189,7 +10532,7 @@ def L2_ploadrif_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memw($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10199,7 +10542,7 @@ def L2_ploadrifnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111100; let isPredicated = 1; @@ -10222,7 +10565,7 @@ def L2_ploadrifnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_b97f71, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10240,7 +10583,7 @@ def L2_ploadrifnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memw($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10250,7 +10593,7 @@ def L2_ploadrit_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if ($Pt4) $Rd32 = memw($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_f82eaf, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001100; let isPredicated = 1; @@ -10271,7 +10614,7 @@ def L2_ploadrit_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if ($Pt4) $Rd32 = memw($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_b97f71, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10287,7 +10630,7 @@ def L2_ploadrit_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memw($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10297,7 +10640,7 @@ def L2_ploadritnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011100; let isPredicated = 1; @@ -10319,7 +10662,7 @@ def L2_ploadritnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_b97f71, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10336,7 +10679,7 @@ def L2_ploadritnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memw($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10346,7 +10689,7 @@ def L2_ploadrubf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101001; let isPredicated = 1; @@ -10368,7 +10711,7 @@ def L2_ploadrubf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10385,7 +10728,7 @@ def L2_ploadrubf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memub($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10395,7 +10738,7 @@ def L2_ploadrubfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111001; let isPredicated = 1; @@ -10418,7 +10761,7 @@ def L2_ploadrubfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10436,7 +10779,7 @@ def L2_ploadrubfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memub($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10446,7 +10789,7 @@ def L2_ploadrubt_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memub($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001001; let isPredicated = 1; @@ -10467,7 +10810,7 @@ def L2_ploadrubt_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4) $Rd32 = memub($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10483,7 +10826,7 @@ def L2_ploadrubt_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memub($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10493,7 +10836,7 @@ def L2_ploadrubtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011001; let isPredicated = 1; @@ -10515,7 +10858,7 @@ def L2_ploadrubtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10532,7 +10875,7 @@ def L2_ploadrubtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memub($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10542,7 +10885,7 @@ def L2_ploadruhf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101011; let isPredicated = 1; @@ -10564,7 +10907,7 @@ def L2_ploadruhf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10581,7 +10924,7 @@ def L2_ploadruhf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memuh($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10591,7 +10934,7 @@ def L2_ploadruhfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111011; let isPredicated = 1; @@ -10614,7 +10957,7 @@ def L2_ploadruhfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10632,7 +10975,7 @@ def L2_ploadruhfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memuh($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10642,7 +10985,7 @@ def L2_ploadruht_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)", -tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_ef52ed71, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001011; let isPredicated = 1; @@ -10663,7 +11006,7 @@ def L2_ploadruht_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)", -tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { +tc_bad2bcaf, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10679,7 +11022,7 @@ def L2_ploadruht_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memuh($Rs32)", -tc_14da557c, TypeMAPPING> { +tc_ef52ed71, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10689,7 +11032,7 @@ def L2_ploadruhtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)", -tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { +tc_2fc0c436, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011011; let isPredicated = 1; @@ -10711,7 +11054,7 @@ def L2_ploadruhtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)", -tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { +tc_63fe3df7, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10728,7 +11071,7 @@ def L2_ploadruhtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memuh($Rs32)", -tc_65dc7cc4, TypeMAPPING> { +tc_2fc0c436, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10738,13 +11081,14 @@ def L4_add_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) += $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { +tc_44126683, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10756,7 +11100,7 @@ def L4_add_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) += $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10764,13 +11108,14 @@ def L4_add_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) += $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_163a3c { +tc_44126683, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10782,7 +11127,7 @@ def L4_add_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) += $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10790,13 +11135,14 @@ def L4_add_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) += $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_226535 { +tc_44126683, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10808,7 +11154,7 @@ def L4_add_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) += $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10816,13 +11162,14 @@ def L4_and_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) &= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { +tc_44126683, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10834,7 +11181,7 @@ def L4_and_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) &= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10842,13 +11189,14 @@ def L4_and_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) &= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_163a3c { +tc_44126683, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10860,7 +11208,7 @@ def L4_and_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) &= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10868,13 +11216,14 @@ def L4_and_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) &= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_226535 { +tc_44126683, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10886,7 +11235,7 @@ def L4_and_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) &= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10894,13 +11243,14 @@ def L4_iadd_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) += #$II", -tc_da79106e, TypeV4LDST>, Enc_46c951 { +tc_44126683, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10912,7 +11262,7 @@ def L4_iadd_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) += #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10920,13 +11270,14 @@ def L4_iadd_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) += #$II", -tc_da79106e, TypeV4LDST>, Enc_e66a97 { +tc_44126683, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10938,7 +11289,7 @@ def L4_iadd_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) += #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10946,13 +11297,14 @@ def L4_iadd_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) += #$II", -tc_da79106e, TypeV4LDST>, Enc_84b2cd { +tc_44126683, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10964,7 +11316,7 @@ def L4_iadd_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) += #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10972,13 +11324,14 @@ def L4_iand_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) = clrbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_46c951 { +tc_44126683, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -10990,7 +11343,7 @@ def L4_iand_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) = clrbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10998,13 +11351,14 @@ def L4_iand_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) = clrbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_e66a97 { +tc_44126683, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11016,7 +11370,7 @@ def L4_iand_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) = clrbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11024,13 +11378,14 @@ def L4_iand_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) = clrbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_84b2cd { +tc_44126683, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11042,7 +11397,7 @@ def L4_iand_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) = clrbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11050,13 +11405,14 @@ def L4_ior_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) = setbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_46c951 { +tc_44126683, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11068,7 +11424,7 @@ def L4_ior_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) = setbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11076,13 +11432,14 @@ def L4_ior_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) = setbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_e66a97 { +tc_44126683, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11094,7 +11451,7 @@ def L4_ior_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) = setbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11102,13 +11459,14 @@ def L4_ior_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) = setbit(#$II)", -tc_da79106e, TypeV4LDST>, Enc_84b2cd { +tc_44126683, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11120,7 +11478,7 @@ def L4_ior_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) = setbit(#$II)", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11128,13 +11486,14 @@ def L4_isub_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) -= #$II", -tc_da79106e, TypeV4LDST>, Enc_46c951 { +tc_44126683, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11146,7 +11505,7 @@ def L4_isub_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) -= #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11154,13 +11513,14 @@ def L4_isub_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) -= #$II", -tc_da79106e, TypeV4LDST>, Enc_e66a97 { +tc_44126683, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11172,7 +11532,7 @@ def L4_isub_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) -= #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11180,13 +11540,14 @@ def L4_isub_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) -= #$II", -tc_da79106e, TypeV4LDST>, Enc_84b2cd { +tc_44126683, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11198,7 +11559,7 @@ def L4_isub_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) -= #$II", -tc_da79106e, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11206,12 +11567,10 @@ def L4_loadalignb_ap : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Re32), (ins DoubleRegs:$Ryy32in, u32_0Imm:$II), "$Ryy32 = memb_fifo($Re32=#$II)", -tc_261d9b78, TypeLD>, Enc_f394d3 { +tc_5acef64a, TypeLD>, Enc_f394d3 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010100; -let hasNewValue = 1; -let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let mayLoad = 1; @@ -11228,7 +11587,7 @@ def L4_loadalignb_ur : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)", -tc_baccf077, TypeLD>, Enc_04c959 { +tc_0cd51c76, TypeLD>, Enc_04c959 { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100100; let addrMode = BaseLongOffset; @@ -11248,12 +11607,10 @@ def L4_loadalignh_ap : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Re32), (ins DoubleRegs:$Ryy32in, u32_0Imm:$II), "$Ryy32 = memh_fifo($Re32=#$II)", -tc_261d9b78, TypeLD>, Enc_f394d3 { +tc_5acef64a, TypeLD>, Enc_f394d3 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010010; -let hasNewValue = 1; -let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let mayLoad = 1; @@ -11270,7 +11627,7 @@ def L4_loadalignh_ur : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)", -tc_baccf077, TypeLD>, Enc_04c959 { +tc_0cd51c76, TypeLD>, Enc_04c959 { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100010; let addrMode = BaseLongOffset; @@ -11290,14 +11647,12 @@ def L4_loadbsw2_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = membh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010001; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let mayLoad = 1; @@ -11313,7 +11668,7 @@ def L4_loadbsw2_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = membh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b { +tc_cf47a43f, TypeLD>, Enc_4f677b { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100001; let hasNewValue = 1; @@ -11334,12 +11689,10 @@ def L4_loadbsw4_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = membh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_7fa7f6 { +tc_b77c481f, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010111; -let hasNewValue = 1; -let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; let mayLoad = 1; @@ -11355,7 +11708,7 @@ def L4_loadbsw4_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = membh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_6185fe { +tc_cf47a43f, TypeLD>, Enc_6185fe { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100111; let addrMode = BaseLongOffset; @@ -11374,14 +11727,12 @@ def L4_loadbzw2_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memubh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010011; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let mayLoad = 1; @@ -11397,7 +11748,7 @@ def L4_loadbzw2_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memubh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b { +tc_cf47a43f, TypeLD>, Enc_4f677b { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100011; let hasNewValue = 1; @@ -11418,12 +11769,10 @@ def L4_loadbzw4_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = memubh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_7fa7f6 { +tc_b77c481f, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010101; -let hasNewValue = 1; -let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; let mayLoad = 1; @@ -11439,7 +11788,7 @@ def L4_loadbzw4_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = memubh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_6185fe { +tc_cf47a43f, TypeLD>, Enc_6185fe { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100101; let addrMode = BaseLongOffset; @@ -11458,7 +11807,7 @@ def L4_loadd_locked : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memd_locked($Rs32)", -tc_29c14515, TypeLD>, Enc_3a3d62 { +tc_6aa5711a, TypeLD>, Enc_3a3d62 { let Inst{13-5} = 0b010000000; let Inst{31-21} = 0b10010010000; let accessSize = DoubleWordAccess; @@ -11469,14 +11818,12 @@ def L4_loadrb_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memb($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011000; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let mayLoad = 1; @@ -11492,7 +11839,7 @@ def L4_loadrb_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memb($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010000; let hasNewValue = 1; @@ -11509,7 +11856,7 @@ def L4_loadrb_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memb($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101000; let hasNewValue = 1; @@ -11531,12 +11878,10 @@ def L4_loadrd_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = memd($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_7fa7f6 { +tc_b77c481f, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011110; -let hasNewValue = 1; -let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = DoubleWordAccess; let mayLoad = 1; @@ -11552,7 +11897,7 @@ def L4_loadrd_rr : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010110; let addrMode = BaseRegOffset; @@ -11567,7 +11912,7 @@ def L4_loadrd_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = memd($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101110; let addrMode = BaseLongOffset; @@ -11587,14 +11932,12 @@ def L4_loadrh_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011010; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let mayLoad = 1; @@ -11610,7 +11953,7 @@ def L4_loadrh_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memh($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010010; let hasNewValue = 1; @@ -11627,7 +11970,7 @@ def L4_loadrh_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101010; let hasNewValue = 1; @@ -11649,14 +11992,12 @@ def L4_loadri_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memw($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011100; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; let mayLoad = 1; @@ -11672,7 +12013,7 @@ def L4_loadri_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memw($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010100; let hasNewValue = 1; @@ -11689,7 +12030,7 @@ def L4_loadri_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memw($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101100; let hasNewValue = 1; @@ -11711,14 +12052,12 @@ def L4_loadrub_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memub($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011001; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let mayLoad = 1; @@ -11734,7 +12073,7 @@ def L4_loadrub_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memub($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010001; let hasNewValue = 1; @@ -11751,7 +12090,7 @@ def L4_loadrub_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memub($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101001; let hasNewValue = 1; @@ -11773,14 +12112,12 @@ def L4_loadruh_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memuh($Re32=#$II)", -tc_b5f5a094, TypeLD>, Enc_323f2d { +tc_b77c481f, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011011; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let mayLoad = 1; @@ -11796,7 +12133,7 @@ def L4_loadruh_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { +tc_f47d212f, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010011; let hasNewValue = 1; @@ -11813,7 +12150,7 @@ def L4_loadruh_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memuh($Rt32<<#$Ii+#$II)", -tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { +tc_cf47a43f, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101011; let hasNewValue = 1; @@ -11835,13 +12172,14 @@ def L4_or_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) |= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { +tc_44126683, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11853,7 +12191,7 @@ def L4_or_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) |= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11861,13 +12199,14 @@ def L4_or_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) |= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_163a3c { +tc_44126683, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11879,7 +12218,7 @@ def L4_or_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) |= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11887,13 +12226,14 @@ def L4_or_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) |= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_226535 { +tc_44126683, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -11905,7 +12245,7 @@ def L4_or_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) |= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11913,7 +12253,7 @@ def L4_ploadrbf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111000; @@ -11938,7 +12278,7 @@ def L4_ploadrbf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -11955,7 +12295,7 @@ def L4_ploadrbfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111000; @@ -11981,7 +12321,7 @@ def L4_ploadrbfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -11999,7 +12339,7 @@ def L4_ploadrbt_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memb(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111000; @@ -12023,7 +12363,7 @@ def L4_ploadrbt_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000000; let isPredicated = 1; let hasNewValue = 1; @@ -12039,7 +12379,7 @@ def L4_ploadrbtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111000; @@ -12064,7 +12404,7 @@ def L4_ploadrbtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010000; let isPredicated = 1; let hasNewValue = 1; @@ -12081,7 +12421,7 @@ def L4_ploadrdf_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rdd32 = memd(#$Ii)", -tc_136c4786, TypeLD>, Enc_2a7b91, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111110; @@ -12104,7 +12444,7 @@ def L4_ploadrdf_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_98c0b8, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110001110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12119,7 +12459,7 @@ def L4_ploadrdfnew_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2a7b91, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111110; @@ -12143,7 +12483,7 @@ def L4_ploadrdfnew_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_98c0b8, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110011110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12159,7 +12499,7 @@ def L4_ploadrdt_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rdd32 = memd(#$Ii)", -tc_136c4786, TypeLD>, Enc_2a7b91, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111110; @@ -12181,7 +12521,7 @@ def L4_ploadrdt_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_98c0b8, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110000110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -12195,7 +12535,7 @@ def L4_ploadrdtnew_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2a7b91, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111110; @@ -12218,7 +12558,7 @@ def L4_ploadrdtnew_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_98c0b8, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110010110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -12233,7 +12573,7 @@ def L4_ploadrhf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memh(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111010; @@ -12258,7 +12598,7 @@ def L4_ploadrhf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12275,7 +12615,7 @@ def L4_ploadrhfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111010; @@ -12301,7 +12641,7 @@ def L4_ploadrhfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12319,7 +12659,7 @@ def L4_ploadrht_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memh(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111010; @@ -12343,7 +12683,7 @@ def L4_ploadrht_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000010; let isPredicated = 1; let hasNewValue = 1; @@ -12359,7 +12699,7 @@ def L4_ploadrhtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memh(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111010; @@ -12384,7 +12724,7 @@ def L4_ploadrhtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010010; let isPredicated = 1; let hasNewValue = 1; @@ -12401,7 +12741,7 @@ def L4_ploadrif_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memw(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111100; @@ -12426,7 +12766,7 @@ def L4_ploadrif_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12443,7 +12783,7 @@ def L4_ploadrifnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111100; @@ -12469,7 +12809,7 @@ def L4_ploadrifnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12487,7 +12827,7 @@ def L4_ploadrit_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memw(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111100; @@ -12511,7 +12851,7 @@ def L4_ploadrit_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000100; let isPredicated = 1; let hasNewValue = 1; @@ -12527,7 +12867,7 @@ def L4_ploadritnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memw(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111100; @@ -12552,7 +12892,7 @@ def L4_ploadritnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010100; let isPredicated = 1; let hasNewValue = 1; @@ -12569,7 +12909,7 @@ def L4_ploadrubf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111001; @@ -12594,7 +12934,7 @@ def L4_ploadrubf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001001; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12611,7 +12951,7 @@ def L4_ploadrubfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111001; @@ -12637,7 +12977,7 @@ def L4_ploadrubfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011001; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12655,7 +12995,7 @@ def L4_ploadrubt_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memub(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111001; @@ -12679,7 +13019,7 @@ def L4_ploadrubt_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000001; let isPredicated = 1; let hasNewValue = 1; @@ -12695,7 +13035,7 @@ def L4_ploadrubtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111001; @@ -12720,7 +13060,7 @@ def L4_ploadrubtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010001; let isPredicated = 1; let hasNewValue = 1; @@ -12737,7 +13077,7 @@ def L4_ploadruhf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memuh(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111011; @@ -12762,7 +13102,7 @@ def L4_ploadruhf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12779,7 +13119,7 @@ def L4_ploadruhfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111011; @@ -12805,7 +13145,7 @@ def L4_ploadruhfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12823,7 +13163,7 @@ def L4_ploadruht_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memuh(#$Ii)", -tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { +tc_1d5a38a8, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111011; @@ -12847,7 +13187,7 @@ def L4_ploadruht_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { +tc_9ef61e5c, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000011; let isPredicated = 1; let hasNewValue = 1; @@ -12863,7 +13203,7 @@ def L4_ploadruhtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh(#$Ii)", -tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { +tc_b77c481f, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111011; @@ -12888,7 +13228,7 @@ def L4_ploadruhtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { +tc_b7dd427e, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010011; let isPredicated = 1; let hasNewValue = 1; @@ -12902,163 +13242,204 @@ let InputType = "reg"; let BaseOpcode = "L4_loadruh_rr"; } def L4_return : HInst< -(outs), -(ins), -"dealloc_return", -tc_dcfee7ae, TypeLD>, Enc_3a3d62, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins IntRegs:$Rs32), +"$Rdd32 = dealloc_return($Rs32):raw", +tc_3d04548d, TypeLD>, Enc_3a3d62, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; +let mayLoad = 1; let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let mayLoad = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isBarrier = 1; let isPredicable = 1; let isTaken = 1; } def L4_return_f : HInst< -(outs), -(ins PredRegs:$Pv4), -"if (!$Pv4) dealloc_return", -tc_9ce7a5ab, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if (!$Pv4) $Rdd32 = dealloc_return($Rs32):raw", +tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1100; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } def L4_return_fnew_pnt : HInst< -(outs), -(ins PredRegs:$Pv4), -"if (!$Pv4.new) dealloc_return:nt", -tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw", +tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } def L4_return_fnew_pt : HInst< -(outs), -(ins PredRegs:$Pv4), -"if (!$Pv4.new) dealloc_return:t", -tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if (!$Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw", +tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1110; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } -def L4_return_t : HInst< +def L4_return_map_to_raw_f : HInst< +(outs), +(ins PredRegs:$Pv4), +"if (!$Pv4) dealloc_return", +tc_513bef45, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_map_to_raw_fnew_pnt : HInst< +(outs), +(ins PredRegs:$Pv4), +"if (!$Pv4.new) dealloc_return:nt", +tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_map_to_raw_fnew_pt : HInst< +(outs), +(ins PredRegs:$Pv4), +"if (!$Pv4.new) dealloc_return:t", +tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_map_to_raw_t : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4) dealloc_return", -tc_9ce7a5ab, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_map_to_raw_tnew_pnt : HInst< +(outs), +(ins PredRegs:$Pv4), +"if ($Pv4.new) dealloc_return:nt", +tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_map_to_raw_tnew_pt : HInst< +(outs), +(ins PredRegs:$Pv4), +"if ($Pv4.new) dealloc_return:t", +tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L4_return_t : HInst< +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if ($Pv4) $Rdd32 = dealloc_return($Rs32):raw", +tc_513bef45, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } def L4_return_tnew_pnt : HInst< -(outs), -(ins PredRegs:$Pv4), -"if ($Pv4.new) dealloc_return:nt", -tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):nt:raw", +tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0010; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } def L4_return_tnew_pt : HInst< -(outs), -(ins PredRegs:$Pv4), -"if ($Pv4.new) dealloc_return:t", -tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { -let Inst{4-0} = 0b11110; +(outs DoubleRegs:$Rdd32), +(ins PredRegs:$Pv4, IntRegs:$Rs32), +"if ($Pv4.new) $Rdd32 = dealloc_return($Rs32):t:raw", +tc_395dc00f, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0110; let Inst{31-21} = 0b10010110000; -let Inst{20-16} = 0b11110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; -let Defs = [PC, R29, R30, R31]; +let Uses = [FRAMEKEY]; +let Defs = [PC, R29]; let BaseOpcode = "L4_return"; let isTaken = Inst{12}; } @@ -13066,13 +13447,14 @@ def L4_sub_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) -= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { +tc_44126683, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -13084,7 +13466,7 @@ def L4_sub_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) -= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13092,13 +13474,14 @@ def L4_sub_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) -= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_163a3c { +tc_44126683, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -13110,7 +13493,7 @@ def L4_sub_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) -= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13118,13 +13501,14 @@ def L4_sub_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) -= $Rt32", -tc_a9c993d9, TypeV4LDST>, Enc_226535 { +tc_44126683, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let isExtendable = 1; let opExtendable = 1; @@ -13136,7 +13520,23 @@ def L4_sub_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) -= $Rt32", -tc_a9c993d9, TypeMAPPING> { +tc_44126683, TypeMAPPING> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L6_deallocframe_map_to_raw : HInst< +(outs), +(ins), +"deallocframe", +tc_d1090e34, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def L6_return_map_to_raw : HInst< +(outs), +(ins), +"dealloc_return", +tc_3d04548d, TypeMAPPING>, Requires<[HasV65T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13144,7 +13544,7 @@ def M2_acci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += add($Rs32,$Rt32)", -tc_c0cd91a8, TypeM>, Enc_2ae154, ImmRegRel { +tc_c74f796f, TypeM>, Enc_2ae154, ImmRegRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -13159,7 +13559,7 @@ def M2_accii : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 += add($Rs32,#$Ii)", -tc_c0cd91a8, TypeM>, Enc_c90aca, ImmRegRel { +tc_c74f796f, TypeM>, Enc_c90aca, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100010000; let hasNewValue = 1; @@ -13178,7 +13578,7 @@ def M2_cmaci_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpyi($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13189,7 +13589,7 @@ def M2_cmacr_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpyr($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13200,7 +13600,7 @@ def M2_cmacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13212,7 +13612,7 @@ def M2_cmacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -13224,7 +13624,7 @@ def M2_cmacsc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32*):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13236,7 +13636,7 @@ def M2_cmacsc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -13248,7 +13648,7 @@ def M2_cmpyi_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpyi($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13258,7 +13658,7 @@ def M2_cmpyr_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpyr($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13268,7 +13668,7 @@ def M2_cmpyrs_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -13281,7 +13681,7 @@ def M2_cmpyrs_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13294,7 +13694,7 @@ def M2_cmpyrsc_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101011; @@ -13307,7 +13707,7 @@ def M2_cmpyrsc_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -13320,7 +13720,7 @@ def M2_cmpys_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32):sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13331,7 +13731,7 @@ def M2_cmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -13342,7 +13742,7 @@ def M2_cmpysc_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32*):sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -13353,7 +13753,7 @@ def M2_cmpysc_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101110; @@ -13364,7 +13764,7 @@ def M2_cnacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13376,7 +13776,7 @@ def M2_cnacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -13388,7 +13788,7 @@ def M2_cnacsc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32*):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13400,7 +13800,7 @@ def M2_cnacsc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -13412,7 +13812,7 @@ def M2_dpmpyss_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13423,7 +13823,7 @@ def M2_dpmpyss_nac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -13434,7 +13834,7 @@ def M2_dpmpyss_rnd_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -13446,7 +13846,7 @@ def M2_dpmpyss_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13456,7 +13856,7 @@ def M2_dpmpyuu_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13467,7 +13867,7 @@ def M2_dpmpyuu_nac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111011; @@ -13478,7 +13878,7 @@ def M2_dpmpyuu_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -13488,7 +13888,7 @@ def M2_hmmpyh_rs1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13501,7 +13901,7 @@ def M2_hmmpyh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13514,7 +13914,7 @@ def M2_hmmpyl_rs1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -13527,7 +13927,7 @@ def M2_hmmpyl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13540,7 +13940,7 @@ def M2_maci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyi($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_2ae154, ImmRegRel { +tc_e913dc32, TypeM>, Enc_2ae154, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -13555,7 +13955,7 @@ def M2_macsin : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rx32 -= mpyi($Rs32,#$Ii)", -tc_a12a5971, TypeM>, Enc_c90aca { +tc_16d0d8d5, TypeM>, Enc_c90aca { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100001100; let hasNewValue = 1; @@ -13573,7 +13973,7 @@ def M2_macsip : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rx32 += mpyi($Rs32,#$Ii)", -tc_a12a5971, TypeM>, Enc_c90aca, ImmRegRel { +tc_16d0d8d5, TypeM>, Enc_c90aca, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100001000; let hasNewValue = 1; @@ -13592,7 +13992,7 @@ def M2_mmachs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -13604,7 +14004,7 @@ def M2_mmachs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -13616,7 +14016,7 @@ def M2_mmachs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -13628,7 +14028,7 @@ def M2_mmachs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -13640,7 +14040,7 @@ def M2_mmacls_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -13652,7 +14052,7 @@ def M2_mmacls_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -13664,7 +14064,7 @@ def M2_mmacls_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -13676,7 +14076,7 @@ def M2_mmacls_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -13688,7 +14088,7 @@ def M2_mmacuhs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -13700,7 +14100,7 @@ def M2_mmacuhs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -13712,7 +14112,7 @@ def M2_mmacuhs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -13724,7 +14124,7 @@ def M2_mmacuhs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -13736,7 +14136,7 @@ def M2_mmaculs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -13748,7 +14148,7 @@ def M2_mmaculs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -13760,7 +14160,7 @@ def M2_mmaculs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -13772,7 +14172,7 @@ def M2_mmaculs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -13784,7 +14184,7 @@ def M2_mmpyh_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -13795,7 +14195,7 @@ def M2_mmpyh_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -13806,7 +14206,7 @@ def M2_mmpyh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -13817,7 +14217,7 @@ def M2_mmpyh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -13828,7 +14228,7 @@ def M2_mmpyl_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -13839,7 +14239,7 @@ def M2_mmpyl_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -13850,7 +14250,7 @@ def M2_mmpyl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -13861,7 +14261,7 @@ def M2_mmpyl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -13872,7 +14272,7 @@ def M2_mmpyuh_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -13883,7 +14283,7 @@ def M2_mmpyuh_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -13894,7 +14294,7 @@ def M2_mmpyuh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -13905,7 +14305,7 @@ def M2_mmpyuh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -13916,7 +14316,7 @@ def M2_mmpyul_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -13927,7 +14327,7 @@ def M2_mmpyul_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -13938,7 +14338,7 @@ def M2_mmpyul_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -13949,7 +14349,7 @@ def M2_mmpyul_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -13960,7 +14360,7 @@ def M2_mpy_acc_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13973,7 +14373,7 @@ def M2_mpy_acc_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -13986,7 +14386,7 @@ def M2_mpy_acc_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13999,7 +14399,7 @@ def M2_mpy_acc_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14012,7 +14412,7 @@ def M2_mpy_acc_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14025,7 +14425,7 @@ def M2_mpy_acc_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14038,7 +14438,7 @@ def M2_mpy_acc_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14051,7 +14451,7 @@ def M2_mpy_acc_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14064,7 +14464,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14078,7 +14478,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14092,7 +14492,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14106,7 +14506,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14120,7 +14520,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14134,7 +14534,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14148,7 +14548,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14162,7 +14562,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14176,7 +14576,7 @@ def M2_mpy_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14188,7 +14588,7 @@ def M2_mpy_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14200,7 +14600,7 @@ def M2_mpy_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14212,7 +14612,7 @@ def M2_mpy_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14224,7 +14624,7 @@ def M2_mpy_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14236,7 +14636,7 @@ def M2_mpy_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14248,7 +14648,7 @@ def M2_mpy_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14260,7 +14660,7 @@ def M2_mpy_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14272,7 +14672,7 @@ def M2_mpy_nac_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14285,7 +14685,7 @@ def M2_mpy_nac_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14298,7 +14698,7 @@ def M2_mpy_nac_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14311,7 +14711,7 @@ def M2_mpy_nac_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14324,7 +14724,7 @@ def M2_mpy_nac_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14337,7 +14737,7 @@ def M2_mpy_nac_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14350,7 +14750,7 @@ def M2_mpy_nac_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14363,7 +14763,7 @@ def M2_mpy_nac_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14376,7 +14776,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14390,7 +14790,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14404,7 +14804,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14418,7 +14818,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14432,7 +14832,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14446,7 +14846,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14460,7 +14860,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14474,7 +14874,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14488,7 +14888,7 @@ def M2_mpy_rnd_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14500,7 +14900,7 @@ def M2_mpy_rnd_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14512,7 +14912,7 @@ def M2_mpy_rnd_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14524,7 +14924,7 @@ def M2_mpy_rnd_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14536,7 +14936,7 @@ def M2_mpy_rnd_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14548,7 +14948,7 @@ def M2_mpy_rnd_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14560,7 +14960,7 @@ def M2_mpy_rnd_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14572,7 +14972,7 @@ def M2_mpy_rnd_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14584,7 +14984,7 @@ def M2_mpy_sat_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14597,7 +14997,7 @@ def M2_mpy_sat_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14610,7 +15010,7 @@ def M2_mpy_sat_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14623,7 +15023,7 @@ def M2_mpy_sat_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14636,7 +15036,7 @@ def M2_mpy_sat_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14649,7 +15049,7 @@ def M2_mpy_sat_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14662,7 +15062,7 @@ def M2_mpy_sat_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14675,7 +15075,7 @@ def M2_mpy_sat_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14688,7 +15088,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14701,7 +15101,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14714,7 +15114,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14727,7 +15127,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14740,7 +15140,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14753,7 +15153,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14766,7 +15166,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14779,7 +15179,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14792,7 +15192,7 @@ def M2_mpy_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101000; @@ -14804,7 +15204,7 @@ def M2_mpy_up_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -14816,7 +15216,7 @@ def M2_mpy_up_s1_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -14829,7 +15229,7 @@ def M2_mpyd_acc_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14840,7 +15240,7 @@ def M2_mpyd_acc_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14851,7 +15251,7 @@ def M2_mpyd_acc_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14862,7 +15262,7 @@ def M2_mpyd_acc_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14873,7 +15273,7 @@ def M2_mpyd_acc_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14884,7 +15284,7 @@ def M2_mpyd_acc_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14895,7 +15295,7 @@ def M2_mpyd_acc_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14906,7 +15306,7 @@ def M2_mpyd_acc_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14917,7 +15317,7 @@ def M2_mpyd_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14927,7 +15327,7 @@ def M2_mpyd_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14937,7 +15337,7 @@ def M2_mpyd_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14947,7 +15347,7 @@ def M2_mpyd_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14957,7 +15357,7 @@ def M2_mpyd_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14967,7 +15367,7 @@ def M2_mpyd_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14977,7 +15377,7 @@ def M2_mpyd_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14987,7 +15387,7 @@ def M2_mpyd_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14997,7 +15397,7 @@ def M2_mpyd_nac_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -15008,7 +15408,7 @@ def M2_mpyd_nac_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15019,7 +15419,7 @@ def M2_mpyd_nac_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -15030,7 +15430,7 @@ def M2_mpyd_nac_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15041,7 +15441,7 @@ def M2_mpyd_nac_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -15052,7 +15452,7 @@ def M2_mpyd_nac_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15063,7 +15463,7 @@ def M2_mpyd_nac_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -15074,7 +15474,7 @@ def M2_mpyd_nac_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15085,7 +15485,7 @@ def M2_mpyd_rnd_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15095,7 +15495,7 @@ def M2_mpyd_rnd_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15105,7 +15505,7 @@ def M2_mpyd_rnd_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15115,7 +15515,7 @@ def M2_mpyd_rnd_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15125,7 +15525,7 @@ def M2_mpyd_rnd_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15135,7 +15535,7 @@ def M2_mpyd_rnd_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15145,7 +15545,7 @@ def M2_mpyd_rnd_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15155,7 +15555,7 @@ def M2_mpyd_rnd_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15165,7 +15565,7 @@ def M2_mpyi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyi($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_5ab2be, ImmRegRel { +tc_8fd5f294, TypeM>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101000; @@ -15179,7 +15579,7 @@ def M2_mpysin : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Rd32 = -mpyi($Rs32,#$Ii)", -tc_ae2c2dc2, TypeM>, Enc_b8c967 { +tc_1853ea6d, TypeM>, Enc_b8c967 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100000100; let hasNewValue = 1; @@ -15190,7 +15590,7 @@ def M2_mpysip : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Rd32 = +mpyi($Rs32,#$Ii)", -tc_ae2c2dc2, TypeM>, Enc_b8c967 { +tc_1853ea6d, TypeM>, Enc_b8c967 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100000000; let hasNewValue = 1; @@ -15206,7 +15606,7 @@ def M2_mpysmi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, m32_0Imm:$Ii), "$Rd32 = mpyi($Rs32,#$Ii)", -tc_ae2c2dc2, TypeM>, ImmRegRel { +tc_1853ea6d, TypeM>, ImmRegRel { let hasNewValue = 1; let opNewValue = 0; let CextOpcode = "M2_mpyi"; @@ -15222,7 +15622,7 @@ def M2_mpysu_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpysu($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101011; @@ -15234,7 +15634,7 @@ def M2_mpyu_acc_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15247,7 +15647,7 @@ def M2_mpyu_acc_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15260,7 +15660,7 @@ def M2_mpyu_acc_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15273,7 +15673,7 @@ def M2_mpyu_acc_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15286,7 +15686,7 @@ def M2_mpyu_acc_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15299,7 +15699,7 @@ def M2_mpyu_acc_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15312,7 +15712,7 @@ def M2_mpyu_acc_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15325,7 +15725,7 @@ def M2_mpyu_acc_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15338,7 +15738,7 @@ def M2_mpyu_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15350,7 +15750,7 @@ def M2_mpyu_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15362,7 +15762,7 @@ def M2_mpyu_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15374,7 +15774,7 @@ def M2_mpyu_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15386,7 +15786,7 @@ def M2_mpyu_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15398,7 +15798,7 @@ def M2_mpyu_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15410,7 +15810,7 @@ def M2_mpyu_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15422,7 +15822,7 @@ def M2_mpyu_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15434,7 +15834,7 @@ def M2_mpyu_nac_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15447,7 +15847,7 @@ def M2_mpyu_nac_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15460,7 +15860,7 @@ def M2_mpyu_nac_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15473,7 +15873,7 @@ def M2_mpyu_nac_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15486,7 +15886,7 @@ def M2_mpyu_nac_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15499,7 +15899,7 @@ def M2_mpyu_nac_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15512,7 +15912,7 @@ def M2_mpyu_nac_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15525,7 +15925,7 @@ def M2_mpyu_nac_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15538,7 +15938,7 @@ def M2_mpyu_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101010; @@ -15550,7 +15950,7 @@ def M2_mpyud_acc_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15561,7 +15961,7 @@ def M2_mpyud_acc_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15572,7 +15972,7 @@ def M2_mpyud_acc_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15583,7 +15983,7 @@ def M2_mpyud_acc_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15594,7 +15994,7 @@ def M2_mpyud_acc_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15605,7 +16005,7 @@ def M2_mpyud_acc_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15616,7 +16016,7 @@ def M2_mpyud_acc_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15627,7 +16027,7 @@ def M2_mpyud_acc_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15638,7 +16038,7 @@ def M2_mpyud_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15648,7 +16048,7 @@ def M2_mpyud_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15658,7 +16058,7 @@ def M2_mpyud_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15668,7 +16068,7 @@ def M2_mpyud_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15678,7 +16078,7 @@ def M2_mpyud_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.h)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15688,7 +16088,7 @@ def M2_mpyud_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15698,7 +16098,7 @@ def M2_mpyud_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.l)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15708,7 +16108,7 @@ def M2_mpyud_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15718,7 +16118,7 @@ def M2_mpyud_nac_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15729,7 +16129,7 @@ def M2_mpyud_nac_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15740,7 +16140,7 @@ def M2_mpyud_nac_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15751,7 +16151,7 @@ def M2_mpyud_nac_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15762,7 +16162,7 @@ def M2_mpyud_nac_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.h)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15773,7 +16173,7 @@ def M2_mpyud_nac_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15784,7 +16184,7 @@ def M2_mpyud_nac_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.l)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15795,7 +16195,7 @@ def M2_mpyud_nac_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15806,7 +16206,7 @@ def M2_mpyui : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyui($Rs32,$Rt32)", -tc_8c8041e6, TypeM> { +tc_8fd5f294, TypeM> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -15816,7 +16216,7 @@ def M2_nacci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= add($Rs32,$Rt32)", -tc_c0cd91a8, TypeM>, Enc_2ae154 { +tc_c74f796f, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111100; @@ -15830,7 +16230,7 @@ def M2_naccii : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 -= add($Rs32,#$Ii)", -tc_c0cd91a8, TypeM>, Enc_c90aca { +tc_c74f796f, TypeM>, Enc_c90aca { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100010100; let hasNewValue = 1; @@ -15848,7 +16248,7 @@ def M2_subacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32), "$Rx32 += sub($Rt32,$Rs32)", -tc_c0cd91a8, TypeM>, Enc_a568d4 { +tc_c74f796f, TypeM>, Enc_a568d4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -15862,7 +16262,7 @@ def M2_vabsdiffh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffh($Rtt32,$Rss32)", -tc_63cd9d2d, TypeM>, Enc_ea23e4 { +tc_2b6f77c6, TypeM>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -15872,7 +16272,7 @@ def M2_vabsdiffw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffw($Rtt32,$Rss32)", -tc_63cd9d2d, TypeM>, Enc_ea23e4 { +tc_2b6f77c6, TypeM>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -15882,7 +16282,7 @@ def M2_vcmac_s0_sat_i : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vcmpyi($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -15894,7 +16294,7 @@ def M2_vcmac_s0_sat_r : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vcmpyr($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -15906,7 +16306,7 @@ def M2_vcmpy_s0_sat_i : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyi($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -15917,7 +16317,7 @@ def M2_vcmpy_s0_sat_r : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyr($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -15928,7 +16328,7 @@ def M2_vcmpy_s1_sat_i : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -15939,7 +16339,7 @@ def M2_vcmpy_s1_sat_r : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -15950,7 +16350,7 @@ def M2_vdmacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpy($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -15962,7 +16362,7 @@ def M2_vdmacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -15974,7 +16374,7 @@ def M2_vdmpyrs_s0 : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001000; @@ -15987,7 +16387,7 @@ def M2_vdmpyrs_s1 : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001100; @@ -16000,7 +16400,7 @@ def M2_vdmpys_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpy($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16011,7 +16411,7 @@ def M2_vdmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -16022,7 +16422,7 @@ def M2_vmac2 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -16033,7 +16433,7 @@ def M2_vmac2es : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -16044,7 +16444,7 @@ def M2_vmac2es_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16056,7 +16456,7 @@ def M2_vmac2es_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -16068,7 +16468,7 @@ def M2_vmac2s_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -16080,7 +16480,7 @@ def M2_vmac2s_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -16092,7 +16492,7 @@ def M2_vmac2su_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyhsu($Rs32,$Rt32):sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111011; @@ -16104,7 +16504,7 @@ def M2_vmac2su_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111111; @@ -16116,7 +16516,7 @@ def M2_vmpy2es_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyeh($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16127,7 +16527,7 @@ def M2_vmpy2es_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -16138,7 +16538,7 @@ def M2_vmpy2s_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyh($Rs32,$Rt32):sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -16149,7 +16549,7 @@ def M2_vmpy2s_s0pack : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -16162,7 +16562,7 @@ def M2_vmpy2s_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16173,7 +16573,7 @@ def M2_vmpy2s_s1pack : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat", -tc_8c8041e6, TypeM>, Enc_5ab2be { +tc_8fd5f294, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -16186,7 +16586,7 @@ def M2_vmpy2su_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyhsu($Rs32,$Rt32):sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -16197,7 +16597,7 @@ def M2_vmpy2su_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16208,7 +16608,7 @@ def M2_vraddh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vraddh($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001001; @@ -16220,7 +16620,7 @@ def M2_vradduh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vradduh($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001000; @@ -16232,7 +16632,7 @@ def M2_vrcmaci_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyi($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16243,7 +16643,7 @@ def M2_vrcmaci_s0c : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyi($Rss32,$Rtt32*)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -16254,7 +16654,7 @@ def M2_vrcmacr_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyr($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16265,7 +16665,7 @@ def M2_vrcmacr_s0c : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyr($Rss32,$Rtt32*)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -16276,7 +16676,7 @@ def M2_vrcmpyi_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyi($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16286,7 +16686,7 @@ def M2_vrcmpyi_s0c : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyi($Rss32,$Rtt32*)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -16296,7 +16696,7 @@ def M2_vrcmpyr_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyr($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16306,7 +16706,7 @@ def M2_vrcmpyr_s0c : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyr($Rss32,$Rtt32*)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -16316,7 +16716,7 @@ def M2_vrcmpys_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM> { +tc_e913dc32, TypeM> { let isPseudo = 1; let Constraints = "$Rxx32 = $Rxx32in"; } @@ -16324,7 +16724,7 @@ def M2_vrcmpys_acc_s1_h : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -16336,7 +16736,7 @@ def M2_vrcmpys_acc_s1_l : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -16348,14 +16748,14 @@ def M2_vrcmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat", -tc_8c8041e6, TypeM> { +tc_8fd5f294, TypeM> { let isPseudo = 1; } def M2_vrcmpys_s1_h : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16366,7 +16766,7 @@ def M2_vrcmpys_s1_l : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -16377,7 +16777,7 @@ def M2_vrcmpys_s1rp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat", -tc_8c8041e6, TypeM> { +tc_8fd5f294, TypeM> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -16386,7 +16786,7 @@ def M2_vrcmpys_s1rp_h : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001101; @@ -16399,7 +16799,7 @@ def M2_vrcmpys_s1rp_l : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo", -tc_8c8041e6, TypeM>, Enc_d2216a { +tc_8fd5f294, TypeM>, Enc_d2216a { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001101; @@ -16412,7 +16812,7 @@ def M2_vrmac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyh($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16423,7 +16823,7 @@ def M2_vrmpy_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyh($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16433,7 +16833,7 @@ def M2_xor_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= xor($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111100; @@ -16447,7 +16847,7 @@ def M4_and_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= and($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16461,7 +16861,7 @@ def M4_and_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= and($Rs32,~$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16475,7 +16875,7 @@ def M4_and_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= or($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16489,7 +16889,7 @@ def M4_and_xor : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= xor($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16503,7 +16903,7 @@ def M4_cmpyi_wh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat", -tc_8c8041e6, TypeS_3op>, Enc_3d5b28 { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16516,7 +16916,7 @@ def M4_cmpyi_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat", -tc_8c8041e6, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16529,7 +16929,7 @@ def M4_cmpyr_wh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat", -tc_8c8041e6, TypeS_3op>, Enc_3d5b28 { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16542,7 +16942,7 @@ def M4_cmpyr_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat", -tc_8c8041e6, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16555,7 +16955,7 @@ def M4_mac_up_s1_sat : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -16570,7 +16970,7 @@ def M4_mpyri_addi : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II), "$Rd32 = add(#$Ii,mpyi($Rs32,#$II))", -tc_a12a5971, TypeALU64>, Enc_322e1b, ImmRegRel { +tc_16d0d8d5, TypeALU64>, Enc_322e1b, ImmRegRel { let Inst{31-24} = 0b11011000; let hasNewValue = 1; let opNewValue = 0; @@ -16586,7 +16986,7 @@ def M4_mpyri_addr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))", -tc_a12a5971, TypeALU64>, Enc_420cf3, ImmRegRel { +tc_16d0d8d5, TypeALU64>, Enc_420cf3, ImmRegRel { let Inst{31-23} = 0b110111111; let hasNewValue = 1; let opNewValue = 0; @@ -16603,7 +17003,7 @@ def M4_mpyri_addr_u2 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32), "$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))", -tc_69bb508b, TypeALU64>, Enc_277737 { +tc_bcc96cee, TypeALU64>, Enc_277737 { let Inst{31-23} = 0b110111110; let hasNewValue = 1; let opNewValue = 0; @@ -16613,7 +17013,7 @@ def M4_mpyrr_addi : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))", -tc_8cb685d9, TypeALU64>, Enc_a7b8e8, ImmRegRel { +tc_e913dc32, TypeALU64>, Enc_a7b8e8, ImmRegRel { let Inst{31-23} = 0b110101110; let hasNewValue = 1; let opNewValue = 0; @@ -16630,7 +17030,7 @@ def M4_mpyrr_addr : HInst< (outs IntRegs:$Ry32), (ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32), "$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))", -tc_8cb685d9, TypeM>, Enc_7f1a05, ImmRegRel { +tc_e913dc32, TypeM>, Enc_7f1a05, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100011000; @@ -16645,7 +17045,7 @@ def M4_nac_up_s1_sat : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32,$Rt32):<<1:sat", -tc_8cb685d9, TypeM>, Enc_2ae154 { +tc_e913dc32, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -16660,7 +17060,7 @@ def M4_or_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= and($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16674,7 +17074,7 @@ def M4_or_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= and($Rs32,~$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16688,7 +17088,7 @@ def M4_or_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= or($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16702,7 +17102,7 @@ def M4_or_xor : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= xor($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16716,7 +17116,7 @@ def M4_pmpyw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = pmpyw($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -16726,7 +17126,7 @@ def M4_pmpyw_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 ^= pmpyw($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -16737,7 +17137,7 @@ def M4_vpmpyh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vpmpyh($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101110; @@ -16747,7 +17147,7 @@ def M4_vpmpyh_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 ^= vpmpyh($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111101; @@ -16758,7 +17158,7 @@ def M4_vrmpyeh_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyweh($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -16769,7 +17169,7 @@ def M4_vrmpyeh_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -16780,7 +17180,7 @@ def M4_vrmpyeh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyweh($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -16790,7 +17190,7 @@ def M4_vrmpyeh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -16800,7 +17200,7 @@ def M4_vrmpyoh_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpywoh($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -16811,7 +17211,7 @@ def M4_vrmpyoh_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -16822,7 +17222,7 @@ def M4_vrmpyoh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpywoh($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -16832,7 +17232,7 @@ def M4_vrmpyoh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16842,7 +17242,7 @@ def M4_xor_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= and($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16856,7 +17256,7 @@ def M4_xor_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= and($Rs32,~$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16870,7 +17270,7 @@ def M4_xor_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= or($Rs32,$Rt32)", -tc_3c10f809, TypeM>, Enc_2ae154 { +tc_84df2cd3, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16884,7 +17284,7 @@ def M4_xor_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 ^= xor($Rss32,$Rtt32)", -tc_3c10f809, TypeS_3op>, Enc_88c16c { +tc_84df2cd3, TypeS_3op>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001010100; @@ -16895,7 +17295,7 @@ def M5_vdmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat", -tc_8cb685d9, TypeM>, Enc_88c16c, Requires<[HasV5T]> { +tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -16907,7 +17307,7 @@ def M5_vdmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat", -tc_8c8041e6, TypeM>, Enc_a56825, Requires<[HasV5T]> { +tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16918,7 +17318,7 @@ def M5_vmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpybsu($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -16929,7 +17329,7 @@ def M5_vmacbuu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpybu($Rs32,$Rt32)", -tc_8cb685d9, TypeM>, Enc_61f0b0 { +tc_e913dc32, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -16940,7 +17340,7 @@ def M5_vmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpybsu($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -16950,7 +17350,7 @@ def M5_vmpybuu : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpybu($Rs32,$Rt32)", -tc_8c8041e6, TypeM>, Enc_be32a5 { +tc_8fd5f294, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16960,7 +17360,7 @@ def M5_vrmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpybsu($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -16971,7 +17371,7 @@ def M5_vrmacbuu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpybu($Rss32,$Rtt32)", -tc_8cb685d9, TypeM>, Enc_88c16c { +tc_e913dc32, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -16982,7 +17382,7 @@ def M5_vrmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpybsu($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -16992,7 +17392,7 @@ def M5_vrmpybuu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpybu($Rss32,$Rtt32)", -tc_8c8041e6, TypeM>, Enc_a56825 { +tc_8fd5f294, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -17002,7 +17402,7 @@ def M6_vabsdiffb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffb($Rtt32,$Rss32)", -tc_faab1248, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { +tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -17012,7 +17412,7 @@ def M6_vabsdiffub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffub($Rtt32,$Rss32)", -tc_faab1248, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { +tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -17022,7 +17422,7 @@ def PS_loadrbabs : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memb(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -17045,7 +17445,7 @@ def PS_loadrdabs : HInst< (outs DoubleRegs:$Rdd32), (ins u29_3Imm:$Ii), "$Rdd32 = memd(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_509701, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_509701, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17066,7 +17466,7 @@ def PS_loadrhabs : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memh(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -17089,7 +17489,7 @@ def PS_loadriabs : HInst< (outs IntRegs:$Rd32), (ins u30_2Imm:$Ii), "$Rd32 = memw(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -17112,7 +17512,7 @@ def PS_loadrubabs : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memub(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -17135,7 +17535,7 @@ def PS_loadruhabs : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memuh(#$Ii)", -tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { +tc_9c98e8af, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -17158,7 +17558,7 @@ def PS_storerbabs : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Rt32), "memb(#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_1b64fb, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel { let Inst{24-21} = 0b0000; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17180,7 +17580,7 @@ def PS_storerbnewabs : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Nt8), "memb(#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_ad1831, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; @@ -17189,6 +17589,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -17205,7 +17606,7 @@ def PS_storerdabs : HInst< (outs), (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd(#$Ii) = $Rtt32", -tc_c14739d5, TypeV2LDST>, Enc_5c124a, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel { let Inst{24-21} = 0b0110; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17226,7 +17627,7 @@ def PS_storerfabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(#$Ii) = $Rt32.h", -tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17247,7 +17648,7 @@ def PS_storerhabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17269,7 +17670,7 @@ def PS_storerhnewabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Nt8), "memh(#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_bc03e5, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; @@ -17278,6 +17679,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -17294,7 +17696,7 @@ def PS_storeriabs : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Rt32), "memw(#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_541f26, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17316,7 +17718,7 @@ def PS_storerinewabs : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Nt8), "memw(#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_78cbf0, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; @@ -17325,6 +17727,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -17341,7 +17744,7 @@ def S2_addasl_rrri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii), "$Rd32 = addasl($Rt32,$Rs32,#$Ii)", -tc_090485bb, TypeS_3op>, Enc_47ef61 { +tc_c74f796f, TypeS_3op>, Enc_47ef61 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000100000; let hasNewValue = 1; @@ -17349,24 +17752,26 @@ let opNewValue = 0; let prefersSlot3 = 1; } def S2_allocframe : HInst< -(outs), -(ins u11_3Imm:$Ii), -"allocframe(#$Ii)", -tc_0cb867f2, TypeST>, Enc_22c845 { +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, u11_3Imm:$Ii), +"allocframe($Rx32,#$Ii):raw", +tc_e216a5db, TypeST>, Enc_22c845 { let Inst{13-11} = 0b000; let Inst{31-21} = 0b10100000100; -let Inst{20-16} = 0b11101; +let hasNewValue = 1; +let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let mayStore = 1; -let Uses = [R29, R30, R31]; -let Defs = [R29, R30]; +let Uses = [FRAMEKEY, FRAMELIMIT, R30, R31]; +let Defs = [R30]; +let Constraints = "$Rx32 = $Rx32in"; } def S2_asl_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asl($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { +tc_540fdfbc, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000000000; } @@ -17374,7 +17779,7 @@ def S2_asl_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += asl($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b110; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17384,7 +17789,7 @@ def S2_asl_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= asl($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17394,7 +17799,7 @@ def S2_asl_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= asl($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17404,7 +17809,7 @@ def S2_asl_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= asl($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b110; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17414,7 +17819,7 @@ def S2_asl_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= asl($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -17424,7 +17829,7 @@ def S2_asl_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asl($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -17435,7 +17840,7 @@ def S2_asl_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += asl($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17448,7 +17853,7 @@ def S2_asl_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= asl($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17461,7 +17866,7 @@ def S2_asl_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= asl($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17474,7 +17879,7 @@ def S2_asl_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= asl($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17487,7 +17892,7 @@ def S2_asl_i_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asl($Rs32,#$Ii):sat", -tc_47ab9233, TypeS_2op>, Enc_a05677 { +tc_b44c6e2a, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100010; @@ -17500,7 +17905,7 @@ def S2_asl_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= asl($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -17513,7 +17918,7 @@ def S2_asl_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vaslh($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { +tc_540fdfbc, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -17522,7 +17927,7 @@ def S2_asl_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vaslw($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { +tc_540fdfbc, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -17531,7 +17936,7 @@ def S2_asl_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = asl($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -17540,7 +17945,7 @@ def S2_asl_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += asl($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -17551,7 +17956,7 @@ def S2_asl_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= asl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -17562,7 +17967,7 @@ def S2_asl_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= asl($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -17573,7 +17978,7 @@ def S2_asl_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= asl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -17584,7 +17989,7 @@ def S2_asl_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= asl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -17595,7 +18000,7 @@ def S2_asl_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asl($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -17606,7 +18011,7 @@ def S2_asl_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += asl($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -17619,7 +18024,7 @@ def S2_asl_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= asl($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -17632,7 +18037,7 @@ def S2_asl_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= asl($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -17645,7 +18050,7 @@ def S2_asl_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= asl($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -17658,7 +18063,7 @@ def S2_asl_r_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asl($Rs32,$Rt32):sat", -tc_47ab9233, TypeS_3op>, Enc_5ab2be { +tc_b44c6e2a, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110000; @@ -17671,7 +18076,7 @@ def S2_asl_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vaslh($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -17680,7 +18085,7 @@ def S2_asl_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vaslw($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -17689,7 +18094,7 @@ def S2_asr_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asr($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { +tc_540fdfbc, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000000000; } @@ -17697,7 +18102,7 @@ def S2_asr_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += asr($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b100; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17707,7 +18112,7 @@ def S2_asr_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= asr($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17717,7 +18122,7 @@ def S2_asr_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= asr($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17727,7 +18132,7 @@ def S2_asr_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= asr($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b100; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17737,7 +18142,7 @@ def S2_asr_i_p_rnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asr($Rss32,#$Ii):rnd", -tc_63cd9d2d, TypeS_2op>, Enc_5eac98, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -17746,14 +18151,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asrrnd($Rss32,#$Ii)", -tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { let isPseudo = 1; } def S2_asr_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asr($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -17764,7 +18169,7 @@ def S2_asr_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += asr($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17777,7 +18182,7 @@ def S2_asr_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= asr($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17790,7 +18195,7 @@ def S2_asr_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= asr($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17803,7 +18208,7 @@ def S2_asr_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= asr($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17816,7 +18221,7 @@ def S2_asr_i_r_rnd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asr($Rs32,#$Ii):rnd", -tc_63cd9d2d, TypeS_2op>, Enc_a05677 { +tc_2b6f77c6, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100010; @@ -17828,7 +18233,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asrrnd($Rs32,#$Ii)", -tc_63cd9d2d, TypeS_2op> { +tc_2b6f77c6, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -17837,7 +18242,7 @@ def S2_asr_i_svw_trun : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rd32 = vasrw($Rss32,#$Ii)", -tc_7ca2ea10, TypeS_2op>, Enc_8dec2e { +tc_1b9c9ee5, TypeS_2op>, Enc_8dec2e { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001000110; @@ -17849,7 +18254,7 @@ def S2_asr_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { +tc_540fdfbc, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -17858,7 +18263,7 @@ def S2_asr_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vasrw($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { +tc_540fdfbc, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -17867,7 +18272,7 @@ def S2_asr_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = asr($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -17876,7 +18281,7 @@ def S2_asr_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += asr($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -17887,7 +18292,7 @@ def S2_asr_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= asr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -17898,7 +18303,7 @@ def S2_asr_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= asr($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -17909,7 +18314,7 @@ def S2_asr_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= asr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -17920,7 +18325,7 @@ def S2_asr_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= asr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -17931,7 +18336,7 @@ def S2_asr_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asr($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -17942,7 +18347,7 @@ def S2_asr_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += asr($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -17955,7 +18360,7 @@ def S2_asr_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= asr($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -17968,7 +18373,7 @@ def S2_asr_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= asr($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -17981,7 +18386,7 @@ def S2_asr_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= asr($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -17994,7 +18399,7 @@ def S2_asr_r_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asr($Rs32,$Rt32):sat", -tc_47ab9233, TypeS_3op>, Enc_5ab2be { +tc_b44c6e2a, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110000; @@ -18007,7 +18412,7 @@ def S2_asr_r_svw_trun : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = vasrw($Rss32,$Rt32)", -tc_7ca2ea10, TypeS_3op>, Enc_3d5b28 { +tc_1b9c9ee5, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -18019,7 +18424,7 @@ def S2_asr_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vasrh($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -18028,7 +18433,7 @@ def S2_asr_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vasrw($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -18037,7 +18442,7 @@ def S2_brev : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = brev($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; @@ -18048,7 +18453,7 @@ def S2_brevp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = brev($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { +tc_d088982c, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -18057,7 +18462,7 @@ def S2_cabacdecbin : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = decbin($Rss32,$Rtt32)", -tc_5d806107, TypeS_3op>, Enc_a56825 { +tc_c6ebf8dd, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -18069,7 +18474,7 @@ def S2_cl0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = cl0($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; @@ -18080,7 +18485,7 @@ def S2_cl0p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = cl0($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; @@ -18091,7 +18496,7 @@ def S2_cl1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = cl1($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; @@ -18102,7 +18507,7 @@ def S2_cl1p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = cl1($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; @@ -18113,7 +18518,7 @@ def S2_clb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = clb($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; @@ -18124,7 +18529,7 @@ def S2_clbnorm : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = normamt($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; @@ -18135,7 +18540,7 @@ def S2_clbp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = clb($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; @@ -18146,7 +18551,7 @@ def S2_clrbit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = clrbit($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -18157,7 +18562,7 @@ def S2_clrbit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = clrbit($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -18168,7 +18573,7 @@ def S2_ct0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = ct0($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; @@ -18179,7 +18584,7 @@ def S2_ct0p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = ct0($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; @@ -18190,7 +18595,7 @@ def S2_ct1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = ct1($Rs32)", -tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { +tc_d088982c, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; @@ -18201,7 +18606,7 @@ def S2_ct1p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = ct1($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; @@ -18212,7 +18617,7 @@ def S2_deinterleave : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = deinterleave($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { +tc_d088982c, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -18221,7 +18626,7 @@ def S2_extractu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rd32 = extractu($Rs32,#$Ii,#$II)", -tc_c0cd91a8, TypeS_2op>, Enc_b388cf { +tc_c74f796f, TypeS_2op>, Enc_b388cf { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011010; let hasNewValue = 1; @@ -18232,7 +18637,7 @@ def S2_extractu_rp : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rd32 = extractu($Rs32,$Rtt32)", -tc_87601822, TypeS_3op>, Enc_e07374 { +tc_2b6f77c6, TypeS_3op>, Enc_e07374 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001001000; @@ -18244,7 +18649,7 @@ def S2_extractup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rdd32 = extractu($Rss32,#$Ii,#$II)", -tc_c0cd91a8, TypeS_2op>, Enc_b84c4c { +tc_c74f796f, TypeS_2op>, Enc_b84c4c { let Inst{31-24} = 0b10000001; let prefersSlot3 = 1; } @@ -18252,7 +18657,7 @@ def S2_extractup_rp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = extractu($Rss32,$Rtt32)", -tc_87601822, TypeS_3op>, Enc_a56825 { +tc_2b6f77c6, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -18262,7 +18667,7 @@ def S2_insert : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = insert($Rs32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op>, Enc_a1e29d { +tc_87735c3b, TypeS_2op>, Enc_a1e29d { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011110; let hasNewValue = 1; @@ -18274,7 +18679,7 @@ def S2_insert_rp : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rx32 = insert($Rs32,$Rtt32)", -tc_3c10f809, TypeS_3op>, Enc_179b35 { +tc_84df2cd3, TypeS_3op>, Enc_179b35 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001000000; @@ -18287,7 +18692,7 @@ def S2_insertp : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rxx32 = insert($Rss32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op>, Enc_143a3c { +tc_87735c3b, TypeS_2op>, Enc_143a3c { let Inst{31-24} = 0b10000011; let prefersSlot3 = 1; let Constraints = "$Rxx32 = $Rxx32in"; @@ -18296,7 +18701,7 @@ def S2_insertp_rp : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 = insert($Rss32,$Rtt32)", -tc_3c10f809, TypeS_3op>, Enc_88c16c { +tc_84df2cd3, TypeS_3op>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001010000; @@ -18307,7 +18712,7 @@ def S2_interleave : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = interleave($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { +tc_d088982c, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -18316,7 +18721,7 @@ def S2_lfsp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = lfs($Rss32,$Rtt32)", -tc_87601822, TypeS_3op>, Enc_a56825 { +tc_2b6f77c6, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -18326,7 +18731,7 @@ def S2_lsl_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = lsl($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -18335,7 +18740,7 @@ def S2_lsl_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += lsl($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -18346,7 +18751,7 @@ def S2_lsl_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= lsl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -18357,7 +18762,7 @@ def S2_lsl_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= lsl($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -18368,7 +18773,7 @@ def S2_lsl_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= lsl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -18379,7 +18784,7 @@ def S2_lsl_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= lsl($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -18390,7 +18795,7 @@ def S2_lsl_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = lsl($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -18401,7 +18806,7 @@ def S2_lsl_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += lsl($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -18414,7 +18819,7 @@ def S2_lsl_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= lsl($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -18427,7 +18832,7 @@ def S2_lsl_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= lsl($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -18440,7 +18845,7 @@ def S2_lsl_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= lsl($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -18453,7 +18858,7 @@ def S2_lsl_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlslh($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -18462,7 +18867,7 @@ def S2_lsl_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlslw($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -18471,7 +18876,7 @@ def S2_lsr_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = lsr($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { +tc_540fdfbc, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000000000; } @@ -18479,7 +18884,7 @@ def S2_lsr_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += lsr($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b101; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -18489,7 +18894,7 @@ def S2_lsr_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= lsr($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -18499,7 +18904,7 @@ def S2_lsr_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= lsr($Rss32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { +tc_c74f796f, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -18509,7 +18914,7 @@ def S2_lsr_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= lsr($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b101; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -18519,7 +18924,7 @@ def S2_lsr_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= lsr($Rss32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_70fb07 { +tc_84df2cd3, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -18529,7 +18934,7 @@ def S2_lsr_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = lsr($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -18540,7 +18945,7 @@ def S2_lsr_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += lsr($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -18553,7 +18958,7 @@ def S2_lsr_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= lsr($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -18566,7 +18971,7 @@ def S2_lsr_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= lsr($Rs32,#$Ii)", -tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { +tc_c74f796f, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -18579,7 +18984,7 @@ def S2_lsr_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= lsr($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -18592,7 +18997,7 @@ def S2_lsr_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= lsr($Rs32,#$Ii)", -tc_3c10f809, TypeS_2op>, Enc_28a2dc { +tc_84df2cd3, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -18605,7 +19010,7 @@ def S2_lsr_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vlsrh($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { +tc_540fdfbc, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b001; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -18614,7 +19019,7 @@ def S2_lsr_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vlsrw($Rss32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { +tc_540fdfbc, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -18623,7 +19028,7 @@ def S2_lsr_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = lsr($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -18632,7 +19037,7 @@ def S2_lsr_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += lsr($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -18643,7 +19048,7 @@ def S2_lsr_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= lsr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -18654,7 +19059,7 @@ def S2_lsr_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= lsr($Rss32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { +tc_c74f796f, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -18665,7 +19070,7 @@ def S2_lsr_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= lsr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -18676,7 +19081,7 @@ def S2_lsr_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= lsr($Rss32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_1aa186 { +tc_84df2cd3, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -18687,7 +19092,7 @@ def S2_lsr_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = lsr($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -18698,7 +19103,7 @@ def S2_lsr_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += lsr($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -18711,7 +19116,7 @@ def S2_lsr_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= lsr($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -18724,7 +19129,7 @@ def S2_lsr_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= lsr($Rs32,$Rt32)", -tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { +tc_c74f796f, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -18737,7 +19142,7 @@ def S2_lsr_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= lsr($Rs32,$Rt32)", -tc_3c10f809, TypeS_3op>, Enc_2ae154 { +tc_84df2cd3, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -18750,7 +19155,7 @@ def S2_lsr_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlsrh($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -18759,7 +19164,7 @@ def S2_lsr_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlsrw($Rss32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_927852 { +tc_540fdfbc, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -18768,7 +19173,7 @@ def S2_packhl : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = packhl($Rs32,$Rt32)", -tc_548f402d, TypeALU32_3op>, Enc_be32a5 { +tc_b9488031, TypeALU32_3op>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110101100; @@ -18778,7 +19183,7 @@ def S2_parityp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = parity($Rss32,$Rtt32)", -tc_87601822, TypeALU64>, Enc_d2216a { +tc_2b6f77c6, TypeALU64>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010000000; @@ -18790,7 +19195,7 @@ def S2_pstorerbf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_da8d43, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100000; let isPredicated = 1; @@ -18812,7 +19217,7 @@ def S2_pstorerbf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_cc449f, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -18830,7 +19235,7 @@ def S2_pstorerbf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -18838,7 +19243,7 @@ def S2_pstorerbfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_cc449f, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -18857,7 +19262,7 @@ def S2_pstorerbnewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_585242, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000100101; @@ -18867,6 +19272,7 @@ let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; @@ -18882,7 +19288,7 @@ def S2_pstorerbnewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_52a5dd, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b100; @@ -18893,6 +19299,7 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; @@ -18903,7 +19310,7 @@ def S2_pstorerbnewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -18912,7 +19319,7 @@ def S2_pstorerbnewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_52a5dd, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -18924,6 +19331,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; @@ -18934,7 +19342,7 @@ def S2_pstorerbnewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_585242, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000000101; @@ -18943,6 +19351,7 @@ let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; @@ -18958,7 +19367,7 @@ def S2_pstorerbnewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_52a5dd, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b100; @@ -18968,6 +19377,7 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; @@ -18978,7 +19388,7 @@ def S2_pstorerbnewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memb($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -18987,7 +19397,7 @@ def S2_pstorerbnewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_52a5dd, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -18998,6 +19408,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; @@ -19008,7 +19419,7 @@ def S2_pstorerbt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_da8d43, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000000; let isPredicated = 1; @@ -19029,7 +19440,7 @@ def S2_pstorerbt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_cc449f, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19046,7 +19457,7 @@ def S2_pstorerbt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memb($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19054,7 +19465,7 @@ def S2_pstorerbtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_cc449f, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19072,7 +19483,7 @@ def S2_pstorerdf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32", -tc_3d905451, TypeV2LDST>, Enc_57a33e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100110; let isPredicated = 1; @@ -19093,7 +19504,7 @@ def S2_pstorerdf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32", -tc_9b73d261, TypeST>, Enc_9a33d5, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19111,7 +19522,7 @@ def S2_pstorerdf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32) = $Rtt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19119,7 +19530,7 @@ def S2_pstorerdfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32", -tc_7675c0e9, TypeST>, Enc_9a33d5, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19138,7 +19549,7 @@ def S2_pstorerdt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32+#$Ii) = $Rtt32", -tc_3d905451, TypeV2LDST>, Enc_57a33e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000110; let isPredicated = 1; @@ -19158,7 +19569,7 @@ def S2_pstorerdt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rx32++#$Ii) = $Rtt32", -tc_9b73d261, TypeST>, Enc_9a33d5, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19175,7 +19586,7 @@ def S2_pstorerdt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32) = $Rtt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19183,7 +19594,7 @@ def S2_pstorerdtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32", -tc_7675c0e9, TypeST>, Enc_9a33d5, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19201,7 +19612,7 @@ def S2_pstorerff_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h", -tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100011; let isPredicated = 1; @@ -19222,7 +19633,7 @@ def S2_pstorerff_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h", -tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19240,7 +19651,7 @@ def S2_pstorerff_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32) = $Rt32.h", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19248,7 +19659,7 @@ def S2_pstorerffnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h", -tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19267,7 +19678,7 @@ def S2_pstorerft_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h", -tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000011; let isPredicated = 1; @@ -19287,7 +19698,7 @@ def S2_pstorerft_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h", -tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19304,7 +19715,7 @@ def S2_pstorerft_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memh($Rs32) = $Rt32.h", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19312,7 +19723,7 @@ def S2_pstorerftnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h", -tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19330,7 +19741,7 @@ def S2_pstorerhf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100010; let isPredicated = 1; @@ -19352,7 +19763,7 @@ def S2_pstorerhf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19370,7 +19781,7 @@ def S2_pstorerhf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19378,7 +19789,7 @@ def S2_pstorerhfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19397,7 +19808,7 @@ def S2_pstorerhnewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_f44229, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000100101; @@ -19407,6 +19818,7 @@ let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; @@ -19422,7 +19834,7 @@ def S2_pstorerhnewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_31aa6a, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b101; @@ -19433,6 +19845,7 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; @@ -19443,7 +19856,7 @@ def S2_pstorerhnewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19452,7 +19865,7 @@ def S2_pstorerhnewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_31aa6a, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -19464,6 +19877,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; @@ -19474,7 +19888,7 @@ def S2_pstorerhnewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_f44229, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000000101; @@ -19483,6 +19897,7 @@ let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; @@ -19498,7 +19913,7 @@ def S2_pstorerhnewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_31aa6a, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b101; @@ -19508,6 +19923,7 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; @@ -19518,7 +19934,7 @@ def S2_pstorerhnewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memh($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19527,7 +19943,7 @@ def S2_pstorerhnewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_31aa6a, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -19538,6 +19954,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; @@ -19548,7 +19965,7 @@ def S2_pstorerht_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000010; let isPredicated = 1; @@ -19569,7 +19986,7 @@ def S2_pstorerht_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19586,7 +20003,7 @@ def S2_pstorerht_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memh($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19594,7 +20011,7 @@ def S2_pstorerhtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19612,7 +20029,7 @@ def S2_pstorerif_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_397f23, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100100; let isPredicated = 1; @@ -19634,7 +20051,7 @@ def S2_pstorerif_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_7eaeb6, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19652,7 +20069,7 @@ def S2_pstorerif_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19660,7 +20077,7 @@ def S2_pstorerifnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_7eaeb6, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19680,7 +20097,7 @@ def S2_pstorerinewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000100101; @@ -19690,6 +20107,7 @@ let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; @@ -19705,7 +20123,7 @@ def S2_pstorerinewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_65f095, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b110; @@ -19716,6 +20134,7 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; @@ -19726,7 +20145,7 @@ def S2_pstorerinewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19735,7 +20154,7 @@ def S2_pstorerinewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_65f095, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -19747,6 +20166,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; @@ -19757,7 +20177,7 @@ def S2_pstorerinewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new", -tc_9da3628f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { +tc_594ab548, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000000101; @@ -19766,6 +20186,7 @@ let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; @@ -19781,7 +20202,7 @@ def S2_pstorerinewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new", -tc_e2480a7f, TypeST>, Enc_65f095, AddrModeRel { +tc_d9f95eef, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b110; @@ -19791,6 +20212,7 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; @@ -19801,7 +20223,7 @@ def S2_pstorerinewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memw($Rs32) = $Nt8.new", -tc_9da3628f, TypeMAPPING> { +tc_594ab548, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19810,7 +20232,7 @@ def S2_pstorerinewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new", -tc_8fab9ac3, TypeST>, Enc_65f095, AddrModeRel { +tc_d24b2d85, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -19821,6 +20243,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; @@ -19831,7 +20254,7 @@ def S2_pstorerit_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rs32+#$Ii) = $Rt32", -tc_3d905451, TypeV2LDST>, Enc_397f23, AddrModeRel { +tc_8b15472a, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000100; let isPredicated = 1; @@ -19852,7 +20275,7 @@ def S2_pstorerit_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rx32++#$Ii) = $Rt32", -tc_9b73d261, TypeST>, Enc_7eaeb6, AddrModeRel { +tc_cd7374a0, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19869,7 +20292,7 @@ def S2_pstorerit_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memw($Rs32) = $Rt32", -tc_3d905451, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19877,7 +20300,7 @@ def S2_pstoreritnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32", -tc_7675c0e9, TypeST>, Enc_7eaeb6, AddrModeRel { +tc_74e47fd9, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19895,7 +20318,7 @@ def S2_setbit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = setbit($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -19906,7 +20329,7 @@ def S2_setbit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = setbit($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -19917,7 +20340,7 @@ def S2_shuffeb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = shuffeb($Rss32,$Rtt32)", -tc_9c18c9a5, TypeS_3op>, Enc_a56825 { +tc_540fdfbc, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19926,7 +20349,7 @@ def S2_shuffeh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = shuffeh($Rss32,$Rtt32)", -tc_9c18c9a5, TypeS_3op>, Enc_a56825 { +tc_540fdfbc, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19935,7 +20358,7 @@ def S2_shuffob : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = shuffob($Rtt32,$Rss32)", -tc_9c18c9a5, TypeS_3op>, Enc_ea23e4 { +tc_540fdfbc, TypeS_3op>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19944,7 +20367,7 @@ def S2_shuffoh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = shuffoh($Rtt32,$Rss32)", -tc_9c18c9a5, TypeS_3op>, Enc_ea23e4 { +tc_540fdfbc, TypeS_3op>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -19953,7 +20376,7 @@ def S2_storerb_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) = $Rt32", -tc_53ee6546, TypeST>, Enc_448f7f, AddrModeRel { +tc_05b6c987, TypeST>, Enc_448f7f, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -19974,7 +20397,7 @@ def S2_storerb_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++$Mu2:brev) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111000; let accessSize = ByteAccess; @@ -19987,7 +20410,7 @@ def S2_storerb_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++#$Ii:circ($Mu2)) = $Rt32", -tc_251c87b2, TypeST>, Enc_b15941 { +tc_9fdb5406, TypeST>, Enc_b15941, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001000; @@ -19995,6 +20418,7 @@ let addrMode = PostInc; let accessSize = ByteAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerb_pci"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20002,13 +20426,14 @@ def S2_storerb_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++I:circ($Mu2)) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001000; let addrMode = PostInc; let accessSize = ByteAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerb_pcr"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20016,7 +20441,7 @@ def S2_storerb_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "memb($Rx32++#$Ii) = $Rt32", -tc_20a8e109, TypeST>, Enc_10bc21, AddrModeRel { +tc_f86c328a, TypeST>, Enc_10bc21, AddrModeRel, PostInc_BaseImm { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20024,6 +20449,7 @@ let Inst{31-21} = 0b10101011000; let addrMode = PostInc; let accessSize = ByteAccess; let mayStore = 1; +let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; let isPredicable = 1; let isNVStorable = 1; @@ -20033,7 +20459,7 @@ def S2_storerb_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++$Mu2) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101000; let addrMode = PostInc; @@ -20046,7 +20472,7 @@ def S2_storerb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) = $Rt32", -tc_53ee6546, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20054,7 +20480,7 @@ def S2_storerbgp : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Rt32), "memb(gp+#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_1b64fb, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_1b64fb, AddrModeRel { let Inst{24-21} = 0b0000; let Inst{31-27} = 0b01001; let accessSize = ByteAccess; @@ -20072,7 +20498,7 @@ def S2_storerbnew_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8), "memb($Rs32+#$Ii) = $Nt8.new", -tc_6c576d46, TypeST>, Enc_4df4e9, AddrModeRel { +tc_f7dd9c9f, TypeST>, Enc_4df4e9, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; @@ -20080,6 +20506,7 @@ let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; @@ -20096,13 +20523,14 @@ def S2_storerbnew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++$Mu2:brev) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101111101; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerb_pbr"; let opNewValue = 3; @@ -20112,7 +20540,7 @@ def S2_storerbnew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -tc_9c68db63, TypeST>, Enc_96ce4f { +tc_9d5941c7, TypeST>, Enc_96ce4f, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b00; @@ -20121,8 +20549,10 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerb_pci"; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; } @@ -20130,7 +20560,7 @@ def S2_storerbnew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++I:circ($Mu2)) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101001101; @@ -20138,8 +20568,10 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerb_pcr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20147,7 +20579,7 @@ def S2_storerbnew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "memb($Rx32++#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_c7cd90, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_c7cd90, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b000; @@ -20156,6 +20588,7 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerb_pi"; let isPredicable = 1; @@ -20167,7 +20600,7 @@ def S2_storerbnew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++$Mu2) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101101101; @@ -20175,6 +20608,7 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20183,7 +20617,7 @@ def S2_storerbnew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memb($Rs32) = $Nt8.new", -tc_6c576d46, TypeMAPPING> { +tc_f7dd9c9f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20192,13 +20626,14 @@ def S2_storerbnewgp : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Nt8), "memb(gp+#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_ad1831, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_ad1831, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storerbabs"; @@ -20213,7 +20648,7 @@ def S2_storerd_io : HInst< (outs), (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+#$Ii) = $Rtt32", -tc_53ee6546, TypeST>, Enc_ce6828, AddrModeRel { +tc_05b6c987, TypeST>, Enc_ce6828, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20233,7 +20668,7 @@ def S2_storerd_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++$Mu2:brev) = $Rtt32", -tc_20a8e109, TypeST>, Enc_928ca1 { +tc_f86c328a, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111110; let accessSize = DoubleWordAccess; @@ -20244,7 +20679,7 @@ def S2_storerd_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32", -tc_251c87b2, TypeST>, Enc_395cc4 { +tc_9fdb5406, TypeST>, Enc_395cc4 { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001110; @@ -20258,7 +20693,7 @@ def S2_storerd_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++I:circ($Mu2)) = $Rtt32", -tc_20a8e109, TypeST>, Enc_928ca1 { +tc_f86c328a, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001110; let addrMode = PostInc; @@ -20271,7 +20706,7 @@ def S2_storerd_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rx32++#$Ii) = $Rtt32", -tc_20a8e109, TypeST>, Enc_85bf58, AddrModeRel { +tc_f86c328a, TypeST>, Enc_85bf58, AddrModeRel, PostInc_BaseImm { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20288,7 +20723,7 @@ def S2_storerd_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++$Mu2) = $Rtt32", -tc_20a8e109, TypeST>, Enc_928ca1 { +tc_f86c328a, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101110; let addrMode = PostInc; @@ -20300,7 +20735,7 @@ def S2_storerd_zomap : HInst< (outs), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "memd($Rs32) = $Rtt32", -tc_53ee6546, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20308,7 +20743,7 @@ def S2_storerdgp : HInst< (outs), (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd(gp+#$Ii) = $Rtt32", -tc_c14739d5, TypeV2LDST>, Enc_5c124a, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_5c124a, AddrModeRel { let Inst{24-21} = 0b0110; let Inst{31-27} = 0b01001; let accessSize = DoubleWordAccess; @@ -20325,7 +20760,7 @@ def S2_storerf_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32.h", -tc_53ee6546, TypeST>, Enc_e957fb, AddrModeRel { +tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20345,7 +20780,7 @@ def S2_storerf_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2:brev) = $Rt32.h", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111011; let accessSize = HalfWordAccess; @@ -20356,7 +20791,7 @@ def S2_storerf_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h", -tc_251c87b2, TypeST>, Enc_935d9b { +tc_9fdb5406, TypeST>, Enc_935d9b { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001011; @@ -20370,7 +20805,7 @@ def S2_storerf_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++I:circ($Mu2)) = $Rt32.h", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001011; let addrMode = PostInc; @@ -20383,7 +20818,7 @@ def S2_storerf_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "memh($Rx32++#$Ii) = $Rt32.h", -tc_20a8e109, TypeST>, Enc_052c7d, AddrModeRel { +tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20400,7 +20835,7 @@ def S2_storerf_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2) = $Rt32.h", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101011; let addrMode = PostInc; @@ -20412,7 +20847,7 @@ def S2_storerf_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) = $Rt32.h", -tc_53ee6546, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20420,7 +20855,7 @@ def S2_storerfgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(gp+#$Ii) = $Rt32.h", -tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; @@ -20437,7 +20872,7 @@ def S2_storerh_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32", -tc_53ee6546, TypeST>, Enc_e957fb, AddrModeRel { +tc_05b6c987, TypeST>, Enc_e957fb, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20458,7 +20893,7 @@ def S2_storerh_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2:brev) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111010; let accessSize = HalfWordAccess; @@ -20471,7 +20906,7 @@ def S2_storerh_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32", -tc_251c87b2, TypeST>, Enc_935d9b { +tc_9fdb5406, TypeST>, Enc_935d9b, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001010; @@ -20479,6 +20914,7 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerh_pci"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20486,13 +20922,14 @@ def S2_storerh_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++I:circ($Mu2)) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001010; let addrMode = PostInc; let accessSize = HalfWordAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerh_pcr"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20500,7 +20937,7 @@ def S2_storerh_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "memh($Rx32++#$Ii) = $Rt32", -tc_20a8e109, TypeST>, Enc_052c7d, AddrModeRel { +tc_f86c328a, TypeST>, Enc_052c7d, AddrModeRel, PostInc_BaseImm { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20508,6 +20945,7 @@ let Inst{31-21} = 0b10101011010; let addrMode = PostInc; let accessSize = HalfWordAccess; let mayStore = 1; +let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; let isPredicable = 1; let isNVStorable = 1; @@ -20517,7 +20955,7 @@ def S2_storerh_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101010; let addrMode = PostInc; @@ -20530,7 +20968,7 @@ def S2_storerh_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) = $Rt32", -tc_53ee6546, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20538,7 +20976,7 @@ def S2_storerhgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(gp+#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; @@ -20556,7 +20994,7 @@ def S2_storerhnew_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8), "memh($Rs32+#$Ii) = $Nt8.new", -tc_6c576d46, TypeST>, Enc_0d8870, AddrModeRel { +tc_f7dd9c9f, TypeST>, Enc_0d8870, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; @@ -20564,6 +21002,7 @@ let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; @@ -20580,13 +21019,14 @@ def S2_storerhnew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++$Mu2:brev) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101111101; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerh_pbr"; let opNewValue = 3; @@ -20596,7 +21036,7 @@ def S2_storerhnew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -tc_9c68db63, TypeST>, Enc_91b9fe { +tc_9d5941c7, TypeST>, Enc_91b9fe, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b01; @@ -20605,8 +21045,10 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerh_pci"; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; } @@ -20614,7 +21056,7 @@ def S2_storerhnew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++I:circ($Mu2)) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101001101; @@ -20622,8 +21064,10 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storerh_pcr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20631,7 +21075,7 @@ def S2_storerhnew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "memh($Rx32++#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_e26546, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_e26546, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b001; @@ -20640,6 +21084,7 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerh_pi"; let isNVStorable = 1; @@ -20651,7 +21096,7 @@ def S2_storerhnew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++$Mu2) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101101101; @@ -20659,6 +21104,7 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20667,7 +21113,7 @@ def S2_storerhnew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memh($Rs32) = $Nt8.new", -tc_6c576d46, TypeMAPPING> { +tc_f7dd9c9f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20676,13 +21122,14 @@ def S2_storerhnewgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Nt8), "memh(gp+#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_bc03e5, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_bc03e5, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storerhabs"; @@ -20697,7 +21144,7 @@ def S2_storeri_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) = $Rt32", -tc_53ee6546, TypeST>, Enc_143445, AddrModeRel { +tc_05b6c987, TypeST>, Enc_143445, AddrModeRel, PostInc_BaseImm { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20718,7 +21165,7 @@ def S2_storeri_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++$Mu2:brev) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111100; let accessSize = WordAccess; @@ -20731,7 +21178,7 @@ def S2_storeri_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++#$Ii:circ($Mu2)) = $Rt32", -tc_251c87b2, TypeST>, Enc_79b8c8 { +tc_9fdb5406, TypeST>, Enc_79b8c8, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001100; @@ -20739,6 +21186,7 @@ let addrMode = PostInc; let accessSize = WordAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storeri_pci"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20746,13 +21194,14 @@ def S2_storeri_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++I:circ($Mu2)) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001100; let addrMode = PostInc; let accessSize = WordAccess; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storeri_pcr"; let isNVStorable = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -20760,7 +21209,7 @@ def S2_storeri_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "memw($Rx32++#$Ii) = $Rt32", -tc_20a8e109, TypeST>, Enc_db40cd, AddrModeRel { +tc_f86c328a, TypeST>, Enc_db40cd, AddrModeRel, PostInc_BaseImm { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20768,6 +21217,7 @@ let Inst{31-21} = 0b10101011100; let addrMode = PostInc; let accessSize = WordAccess; let mayStore = 1; +let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; let isPredicable = 1; let isNVStorable = 1; @@ -20777,7 +21227,7 @@ def S2_storeri_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++$Mu2) = $Rt32", -tc_20a8e109, TypeST>, Enc_d5c73f { +tc_f86c328a, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101100; let addrMode = PostInc; @@ -20790,7 +21240,7 @@ def S2_storeri_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) = $Rt32", -tc_53ee6546, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20798,7 +21248,7 @@ def S2_storerigp : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Rt32), "memw(gp+#$Ii) = $Rt32", -tc_c14739d5, TypeV2LDST>, Enc_541f26, AddrModeRel { +tc_a788683e, TypeV2LDST>, Enc_541f26, AddrModeRel { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b01001; let accessSize = WordAccess; @@ -20816,7 +21266,7 @@ def S2_storerinew_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8), "memw($Rs32+#$Ii) = $Nt8.new", -tc_6c576d46, TypeST>, Enc_690862, AddrModeRel { +tc_f7dd9c9f, TypeST>, Enc_690862, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; @@ -20824,6 +21274,7 @@ let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; @@ -20840,13 +21291,14 @@ def S2_storerinew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++$Mu2:brev) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101111101; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storeri_pbr"; let opNewValue = 3; @@ -20856,7 +21308,7 @@ def S2_storerinew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -tc_9c68db63, TypeST>, Enc_3f97c8 { +tc_9d5941c7, TypeST>, Enc_3f97c8, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b10; @@ -20865,8 +21317,10 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storeri_pci"; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; } @@ -20874,7 +21328,7 @@ def S2_storerinew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++I:circ($Mu2)) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101001101; @@ -20882,8 +21336,10 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [CS]; +let BaseOpcode = "S2_storeri_pcr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20891,7 +21347,7 @@ def S2_storerinew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "memw($Rx32++#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_223005, AddrModeRel { +tc_e7d02c66, TypeST>, Enc_223005, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b010; @@ -20900,6 +21356,7 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storeri_pi"; let isPredicable = 1; @@ -20910,7 +21367,7 @@ def S2_storerinew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++$Mu2) = $Nt8.new", -tc_c8f9a6f6, TypeST>, Enc_8dbe85 { +tc_e7d02c66, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101101101; @@ -20918,6 +21375,7 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20926,7 +21384,7 @@ def S2_storerinew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memw($Rs32) = $Nt8.new", -tc_6c576d46, TypeMAPPING> { +tc_f7dd9c9f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20935,13 +21393,14 @@ def S2_storerinewgp : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Nt8), "memw(gp+#$Ii) = $Nt8.new", -tc_9e86015f, TypeV2LDST>, Enc_78cbf0, AddrModeRel { +tc_ff9ee76e, TypeV2LDST>, Enc_78cbf0, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storeriabs"; @@ -20956,7 +21415,7 @@ def S2_storew_locked : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw_locked($Rs32,$Pd4) = $Rt32", -tc_7d01cbdc, TypeST>, Enc_c2b48e { +tc_1372bca1, TypeST>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100000101; @@ -20969,7 +21428,7 @@ def S2_svsathb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsathb($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -20980,7 +21439,7 @@ def S2_svsathub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsathub($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -20991,7 +21450,7 @@ def S2_tableidxb : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw", -tc_d95f4e98, TypeS_2op>, Enc_cd82bc { +tc_87735c3b, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011100; let hasNewValue = 1; let opNewValue = 0; @@ -21002,7 +21461,7 @@ def S2_tableidxb_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxb($Rs32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op> { +tc_87735c3b, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -21013,7 +21472,7 @@ def S2_tableidxd : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw", -tc_d95f4e98, TypeS_2op>, Enc_cd82bc { +tc_87735c3b, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011111; let hasNewValue = 1; let opNewValue = 0; @@ -21024,7 +21483,7 @@ def S2_tableidxd_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxd($Rs32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op> { +tc_87735c3b, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -21034,7 +21493,7 @@ def S2_tableidxh : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw", -tc_d95f4e98, TypeS_2op>, Enc_cd82bc { +tc_87735c3b, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011101; let hasNewValue = 1; let opNewValue = 0; @@ -21045,7 +21504,7 @@ def S2_tableidxh_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxh($Rs32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op> { +tc_87735c3b, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -21055,7 +21514,7 @@ def S2_tableidxw : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw", -tc_d95f4e98, TypeS_2op>, Enc_cd82bc { +tc_87735c3b, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011110; let hasNewValue = 1; let opNewValue = 0; @@ -21066,7 +21525,7 @@ def S2_tableidxw_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxw($Rs32,#$Ii,#$II)", -tc_d95f4e98, TypeS_2op> { +tc_87735c3b, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -21076,7 +21535,7 @@ def S2_togglebit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = togglebit($Rs32,#$Ii)", -tc_9c18c9a5, TypeS_2op>, Enc_a05677 { +tc_540fdfbc, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -21087,7 +21546,7 @@ def S2_togglebit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = togglebit($Rs32,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { +tc_540fdfbc, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -21098,7 +21557,7 @@ def S2_tstbit_i : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = tstbit($Rs32,#$Ii)", -tc_5fa2857c, TypeS_2op>, Enc_83ee64 { +tc_7a830544, TypeS_2op>, Enc_83ee64 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101000; @@ -21107,7 +21566,7 @@ def S2_tstbit_r : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = tstbit($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111000; @@ -21116,7 +21575,7 @@ def S2_valignib : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii), "$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)", -tc_d1b5a4b6, TypeS_3op>, Enc_729ff7 { +tc_f8eeed7a, TypeS_3op>, Enc_729ff7 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000000000; } @@ -21124,7 +21583,7 @@ def S2_valignrb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4), "$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)", -tc_d1b5a4b6, TypeS_3op>, Enc_8c6530 { +tc_f8eeed7a, TypeS_3op>, Enc_8c6530 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010000; @@ -21133,7 +21592,7 @@ def S2_vcnegh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vcnegh($Rss32,$Rt32)", -tc_47ab9233, TypeS_3op>, Enc_927852 { +tc_b44c6e2a, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011110; @@ -21144,7 +21603,7 @@ def S2_vcrotate : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vcrotate($Rss32,$Rt32)", -tc_63cd9d2d, TypeS_3op>, Enc_927852 { +tc_2b6f77c6, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011110; @@ -21155,7 +21614,7 @@ def S2_vrcnegh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += vrcnegh($Rss32,$Rt32)", -tc_8cb685d9, TypeS_3op>, Enc_1aa186 { +tc_e913dc32, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -21166,7 +21625,7 @@ def S2_vrndpackwh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vrndwh($Rss32)", -tc_88fa2da6, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21177,7 +21636,7 @@ def S2_vrndpackwhs : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vrndwh($Rss32):sat", -tc_94e6ffd9, TypeS_2op>, Enc_90cd8b { +tc_c2f7d806, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21189,7 +21648,7 @@ def S2_vsathb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsathb($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21200,7 +21659,7 @@ def S2_vsathb_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsathb($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21209,7 +21668,7 @@ def S2_vsathub : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsathub($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21220,7 +21679,7 @@ def S2_vsathub_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsathub($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21229,7 +21688,7 @@ def S2_vsatwh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsatwh($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21240,7 +21699,7 @@ def S2_vsatwh_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsatwh($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21249,7 +21708,7 @@ def S2_vsatwuh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsatwuh($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21260,7 +21719,7 @@ def S2_vsatwuh_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsatwuh($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { +tc_cde8b071, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21269,7 +21728,7 @@ def S2_vsplatrb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsplatb($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { +tc_cde8b071, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; @@ -21281,7 +21740,7 @@ def S2_vsplatrh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsplath($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100010; let isReMaterializable = 1; @@ -21291,7 +21750,7 @@ def S2_vspliceib : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii), "$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)", -tc_d1b5a4b6, TypeS_3op>, Enc_d50cd3 { +tc_f8eeed7a, TypeS_3op>, Enc_d50cd3 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000000100; } @@ -21299,7 +21758,7 @@ def S2_vsplicerb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4), "$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)", -tc_d1b5a4b6, TypeS_3op>, Enc_dbd70c { +tc_f8eeed7a, TypeS_3op>, Enc_dbd70c { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010100; @@ -21308,7 +21767,7 @@ def S2_vsxtbh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsxtbh($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21318,7 +21777,7 @@ def S2_vsxthw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsxthw($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21328,7 +21787,7 @@ def S2_vtrunehb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vtrunehb($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21338,7 +21797,7 @@ def S2_vtrunewh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunewh($Rss32,$Rtt32)", -tc_9c18c9a5, TypeS_3op>, Enc_a56825 { +tc_540fdfbc, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -21347,7 +21806,7 @@ def S2_vtrunohb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vtrunohb($Rss32)", -tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { +tc_cde8b071, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21357,7 +21816,7 @@ def S2_vtrunowh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunowh($Rss32,$Rtt32)", -tc_9c18c9a5, TypeS_3op>, Enc_a56825 { +tc_540fdfbc, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -21366,7 +21825,7 @@ def S2_vzxtbh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vzxtbh($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21376,7 +21835,7 @@ def S2_vzxthw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vzxthw($Rs32)", -tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { +tc_cde8b071, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21386,7 +21845,7 @@ def S4_addaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,add($Ru32,#$Ii))", -tc_090485bb, TypeALU64>, Enc_8b8d61 { +tc_c74f796f, TypeALU64>, Enc_8b8d61 { let Inst{31-23} = 0b110110110; let hasNewValue = 1; let opNewValue = 0; @@ -21401,7 +21860,7 @@ def S4_addi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = add(#$Ii,asl($Rx32in,#$II))", -tc_c0cd91a8, TypeALU64>, Enc_c31910 { +tc_c74f796f, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b100; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21419,7 +21878,7 @@ def S4_addi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = add(#$Ii,lsr($Rx32in,#$II))", -tc_c0cd91a8, TypeALU64>, Enc_c31910 { +tc_c74f796f, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b100; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21437,7 +21896,7 @@ def S4_andi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = and(#$Ii,asl($Rx32in,#$II))", -tc_3c10f809, TypeALU64>, Enc_c31910 { +tc_84df2cd3, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b000; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21455,7 +21914,7 @@ def S4_andi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = and(#$Ii,lsr($Rx32in,#$II))", -tc_3c10f809, TypeALU64>, Enc_c31910 { +tc_84df2cd3, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b000; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21473,7 +21932,7 @@ def S4_clbaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s6_0Imm:$Ii), "$Rd32 = add(clb($Rs32),#$Ii)", -tc_87601822, TypeS_2op>, Enc_9fae8a { +tc_2b6f77c6, TypeS_2op>, Enc_9fae8a { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10001100001; let hasNewValue = 1; @@ -21484,7 +21943,7 @@ def S4_clbpaddi : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, s6_0Imm:$Ii), "$Rd32 = add(clb($Rss32),#$Ii)", -tc_87601822, TypeS_2op>, Enc_a1640c { +tc_2b6f77c6, TypeS_2op>, Enc_a1640c { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -21495,7 +21954,7 @@ def S4_clbpnorm : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = normamt($Rss32)", -tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { +tc_d088982c, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -21506,7 +21965,7 @@ def S4_extract : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rd32 = extract($Rs32,#$Ii,#$II)", -tc_c0cd91a8, TypeS_2op>, Enc_b388cf { +tc_c74f796f, TypeS_2op>, Enc_b388cf { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011011; let hasNewValue = 1; @@ -21517,7 +21976,7 @@ def S4_extract_rp : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rd32 = extract($Rs32,$Rtt32)", -tc_87601822, TypeS_3op>, Enc_e07374 { +tc_2b6f77c6, TypeS_3op>, Enc_e07374 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001001000; @@ -21529,7 +21988,7 @@ def S4_extractp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rdd32 = extract($Rss32,#$Ii,#$II)", -tc_c0cd91a8, TypeS_2op>, Enc_b84c4c { +tc_c74f796f, TypeS_2op>, Enc_b84c4c { let Inst{31-24} = 0b10001010; let prefersSlot3 = 1; } @@ -21537,7 +21996,7 @@ def S4_extractp_rp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = extract($Rss32,$Rtt32)", -tc_87601822, TypeS_3op>, Enc_a56825 { +tc_2b6f77c6, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -21547,7 +22006,7 @@ def S4_lsli : HInst< (outs IntRegs:$Rd32), (ins s6_0Imm:$Ii, IntRegs:$Rt32), "$Rd32 = lsl(#$Ii,$Rt32)", -tc_9c18c9a5, TypeS_3op>, Enc_fef969 { +tc_540fdfbc, TypeS_3op>, Enc_fef969 { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -21558,7 +22017,7 @@ def S4_ntstbit_i : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = !tstbit($Rs32,#$Ii)", -tc_5fa2857c, TypeS_2op>, Enc_83ee64 { +tc_7a830544, TypeS_2op>, Enc_83ee64 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101001; @@ -21567,7 +22026,7 @@ def S4_ntstbit_r : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !tstbit($Rs32,$Rt32)", -tc_c58f771a, TypeS_3op>, Enc_c2b48e { +tc_1e856f58, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111001; @@ -21576,7 +22035,7 @@ def S4_or_andi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 |= and($Rs32,#$Ii)", -tc_3c10f809, TypeALU64>, Enc_b0e9d8 { +tc_84df2cd3, TypeALU64>, Enc_b0e9d8 { let Inst{31-22} = 0b1101101000; let hasNewValue = 1; let opNewValue = 0; @@ -21593,7 +22052,7 @@ def S4_or_andix : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii), "$Rx32 = or($Ru32,and($Rx32in,#$Ii))", -tc_3c10f809, TypeALU64>, Enc_b4e6cf { +tc_84df2cd3, TypeALU64>, Enc_b4e6cf { let Inst{31-22} = 0b1101101001; let hasNewValue = 1; let opNewValue = 0; @@ -21609,7 +22068,7 @@ def S4_or_ori : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 |= or($Rs32,#$Ii)", -tc_3c10f809, TypeALU64>, Enc_b0e9d8 { +tc_84df2cd3, TypeALU64>, Enc_b0e9d8 { let Inst{31-22} = 0b1101101010; let hasNewValue = 1; let opNewValue = 0; @@ -21626,7 +22085,7 @@ def S4_ori_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = or(#$Ii,asl($Rx32in,#$II))", -tc_3c10f809, TypeALU64>, Enc_c31910 { +tc_84df2cd3, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b010; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21644,7 +22103,7 @@ def S4_ori_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = or(#$Ii,lsr($Rx32in,#$II))", -tc_3c10f809, TypeALU64>, Enc_c31910 { +tc_84df2cd3, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b010; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21662,7 +22121,7 @@ def S4_parity : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = parity($Rs32,$Rt32)", -tc_87601822, TypeALU64>, Enc_5ab2be { +tc_2b6f77c6, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101111; @@ -21674,7 +22133,7 @@ def S4_pstorerbf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -21699,7 +22158,7 @@ def S4_pstorerbf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -21715,7 +22174,7 @@ def S4_pstorerbfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -21741,7 +22200,7 @@ def S4_pstorerbfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_da8d43, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110000; let isPredicated = 1; @@ -21764,7 +22223,7 @@ def S4_pstorerbfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -21781,7 +22240,7 @@ def S4_pstorerbfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -21789,7 +22248,7 @@ def S4_pstorerbnewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b000; @@ -21801,6 +22260,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -21816,7 +22276,7 @@ def S4_pstorerbnewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -21825,6 +22285,7 @@ let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; @@ -21835,7 +22296,7 @@ def S4_pstorerbnewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -21848,6 +22309,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -21863,7 +22325,7 @@ def S4_pstorerbnewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_585242, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000110101; @@ -21874,6 +22336,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; @@ -21889,7 +22352,7 @@ def S4_pstorerbnewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -21899,6 +22362,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; @@ -21909,7 +22373,7 @@ def S4_pstorerbnewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -21918,7 +22382,7 @@ def S4_pstorerbnewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b000; @@ -21929,6 +22393,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -21944,7 +22409,7 @@ def S4_pstorerbnewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110100101; let isPredicated = 1; @@ -21952,6 +22417,7 @@ let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; @@ -21962,7 +22428,7 @@ def S4_pstorerbnewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -21974,6 +22440,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -21989,7 +22456,7 @@ def S4_pstorerbnewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_585242, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000010101; @@ -21999,6 +22466,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; @@ -22014,7 +22482,7 @@ def S4_pstorerbnewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -22023,6 +22491,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; @@ -22033,7 +22502,7 @@ def S4_pstorerbnewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -22042,7 +22511,7 @@ def S4_pstorerbt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22066,7 +22535,7 @@ def S4_pstorerbt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100000; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22081,7 +22550,7 @@ def S4_pstorerbtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22106,7 +22575,7 @@ def S4_pstorerbtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_da8d43, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010000; let isPredicated = 1; @@ -22128,7 +22597,7 @@ def S4_pstorerbtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110000; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22144,7 +22613,7 @@ def S4_pstorerbtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22152,7 +22621,7 @@ def S4_pstorerdf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd(#$Ii) = $Rtt32", -tc_c85212ca, TypeST>, Enc_50b5ac, AddrModeRel { +tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22176,7 +22645,7 @@ def S4_pstorerdf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -tc_7bc567a7, TypeST>, Enc_1a9974, AddrModeRel { +tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110101110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22191,7 +22660,7 @@ def S4_pstorerdfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd(#$Ii) = $Rtt32", -tc_336e698c, TypeST>, Enc_50b5ac, AddrModeRel { +tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22216,7 +22685,7 @@ def S4_pstorerdfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32", -tc_20a8e109, TypeV2LDST>, Enc_57a33e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110110; let isPredicated = 1; @@ -22238,7 +22707,7 @@ def S4_pstorerdfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -tc_7639d4b0, TypeST>, Enc_1a9974, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110111110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22254,7 +22723,7 @@ def S4_pstorerdfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32) = $Rtt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22262,7 +22731,7 @@ def S4_pstorerdt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd(#$Ii) = $Rtt32", -tc_c85212ca, TypeST>, Enc_50b5ac, AddrModeRel { +tc_238d91d2, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22285,7 +22754,7 @@ def S4_pstorerdt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -tc_7bc567a7, TypeST>, Enc_1a9974, AddrModeRel { +tc_5274e61a, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110100110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22299,7 +22768,7 @@ def S4_pstorerdtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd(#$Ii) = $Rtt32", -tc_336e698c, TypeST>, Enc_50b5ac, AddrModeRel { +tc_66888ded, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22323,7 +22792,7 @@ def S4_pstorerdtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32", -tc_20a8e109, TypeV2LDST>, Enc_57a33e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010110; let isPredicated = 1; @@ -22344,7 +22813,7 @@ def S4_pstorerdtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -tc_7639d4b0, TypeST>, Enc_1a9974, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110110110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22359,7 +22828,7 @@ def S4_pstorerdtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32) = $Rtt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22367,7 +22836,7 @@ def S4_pstorerff_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh(#$Ii) = $Rt32.h", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22391,7 +22860,7 @@ def S4_pstorerff_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22406,7 +22875,7 @@ def S4_pstorerffnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh(#$Ii) = $Rt32.h", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22431,7 +22900,7 @@ def S4_pstorerffnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h", -tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110011; let isPredicated = 1; @@ -22453,7 +22922,7 @@ def S4_pstorerffnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22469,7 +22938,7 @@ def S4_pstorerffnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32) = $Rt32.h", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22477,7 +22946,7 @@ def S4_pstorerft_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh(#$Ii) = $Rt32.h", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22500,7 +22969,7 @@ def S4_pstorerft_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100011; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22514,7 +22983,7 @@ def S4_pstorerftnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh(#$Ii) = $Rt32.h", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22538,7 +23007,7 @@ def S4_pstorerftnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h", -tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010011; let isPredicated = 1; @@ -22559,7 +23028,7 @@ def S4_pstorerftnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110011; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22574,7 +23043,7 @@ def S4_pstorerftnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32) = $Rt32.h", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22582,7 +23051,7 @@ def S4_pstorerhf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22607,7 +23076,7 @@ def S4_pstorerhf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22623,7 +23092,7 @@ def S4_pstorerhfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22649,7 +23118,7 @@ def S4_pstorerhfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110010; let isPredicated = 1; @@ -22672,7 +23141,7 @@ def S4_pstorerhfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22689,7 +23158,7 @@ def S4_pstorerhfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22697,7 +23166,7 @@ def S4_pstorerhnewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b001; @@ -22709,6 +23178,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22724,7 +23194,7 @@ def S4_pstorerhnewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -22733,6 +23203,7 @@ let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; @@ -22743,7 +23214,7 @@ def S4_pstorerhnewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -22756,6 +23227,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22771,7 +23243,7 @@ def S4_pstorerhnewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_f44229, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000110101; @@ -22782,6 +23254,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; @@ -22797,7 +23270,7 @@ def S4_pstorerhnewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -22807,6 +23280,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; @@ -22817,7 +23291,7 @@ def S4_pstorerhnewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -22826,7 +23300,7 @@ def S4_pstorerhnewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b001; @@ -22837,6 +23311,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22852,7 +23327,7 @@ def S4_pstorerhnewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110100101; let isPredicated = 1; @@ -22860,6 +23335,7 @@ let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; @@ -22870,7 +23346,7 @@ def S4_pstorerhnewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -22882,6 +23358,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22897,7 +23374,7 @@ def S4_pstorerhnewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_f44229, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000010101; @@ -22907,6 +23384,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; @@ -22922,7 +23400,7 @@ def S4_pstorerhnewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -22931,6 +23409,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; @@ -22941,7 +23420,7 @@ def S4_pstorerhnewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -22950,7 +23429,7 @@ def S4_pstorerht_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22974,7 +23453,7 @@ def S4_pstorerht_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100010; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22989,7 +23468,7 @@ def S4_pstorerhtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -23014,7 +23493,7 @@ def S4_pstorerhtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010010; let isPredicated = 1; @@ -23036,7 +23515,7 @@ def S4_pstorerhtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110010; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -23052,7 +23531,7 @@ def S4_pstorerhtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23060,7 +23539,7 @@ def S4_pstorerif_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -23085,7 +23564,7 @@ def S4_pstorerif_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23101,7 +23580,7 @@ def S4_pstorerifnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -23127,7 +23606,7 @@ def S4_pstorerifnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_397f23, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110100; let isPredicated = 1; @@ -23150,7 +23629,7 @@ def S4_pstorerifnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23167,7 +23646,7 @@ def S4_pstorerifnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23175,7 +23654,7 @@ def S4_pstorerinewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b010; @@ -23187,6 +23666,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23202,7 +23682,7 @@ def S4_pstorerinewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -23211,6 +23691,7 @@ let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; @@ -23221,7 +23702,7 @@ def S4_pstorerinewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -23234,6 +23715,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23249,7 +23731,7 @@ def S4_pstorerinewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000110101; @@ -23260,6 +23742,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; @@ -23275,7 +23758,7 @@ def S4_pstorerinewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -23285,6 +23768,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; @@ -23295,7 +23779,7 @@ def S4_pstorerinewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -23304,7 +23788,7 @@ def S4_pstorerinewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw(#$Ii) = $Nt8.new", -tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { +tc_6ac37025, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b010; @@ -23315,6 +23799,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23330,7 +23815,7 @@ def S4_pstorerinewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { +tc_adb14c66, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110100101; let isPredicated = 1; @@ -23338,6 +23823,7 @@ let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; @@ -23348,7 +23834,7 @@ def S4_pstorerinewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw(#$Ii) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -23360,6 +23846,7 @@ let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23375,7 +23862,7 @@ def S4_pstorerinewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new", -tc_c8f9a6f6, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { +tc_e7d02c66, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000010101; @@ -23385,6 +23872,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; @@ -23400,7 +23888,7 @@ def S4_pstorerinewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { +tc_e421e012, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -23409,6 +23897,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; @@ -23419,7 +23908,7 @@ def S4_pstorerinewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32) = $Nt8.new", -tc_c8f9a6f6, TypeMAPPING> { +tc_e7d02c66, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -23428,7 +23917,7 @@ def S4_pstorerit_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw(#$Ii) = $Rt32", -tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_238d91d2, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -23452,7 +23941,7 @@ def S4_pstorerit_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { +tc_5274e61a, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100100; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -23467,7 +23956,7 @@ def S4_pstoreritnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw(#$Ii) = $Rt32", -tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { +tc_66888ded, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -23492,7 +23981,7 @@ def S4_pstoreritnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32", -tc_20a8e109, TypeV2LDST>, Enc_397f23, AddrModeRel { +tc_f86c328a, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010100; let isPredicated = 1; @@ -23514,7 +24003,7 @@ def S4_pstoreritnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { +tc_3e07fb90, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110100; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -23530,7 +24019,7 @@ def S4_pstoreritnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32) = $Rt32", -tc_20a8e109, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23538,7 +24027,7 @@ def S4_stored_locked : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "memd_locked($Rs32,$Pd4) = $Rtt32", -tc_7d01cbdc, TypeST>, Enc_d7dc10 { +tc_1372bca1, TypeST>, Enc_d7dc10 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100000111; @@ -23551,7 +24040,7 @@ def S4_storeirb_io : HInst< (outs), (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "memb($Rs32+#$Ii) = #$II", -tc_fcee8723, TypeST>, Enc_8203bb, PredNewRel { +tc_05b6c987, TypeST>, Enc_8203bb, PredNewRel { let Inst{31-21} = 0b00111100000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -23570,7 +24059,7 @@ def S4_storeirb_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memb($Rs32) = #$II", -tc_fcee8723, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23578,7 +24067,7 @@ def S4_storeirbf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memb($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_d7a65e, PredNewRel { +tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111000100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23598,7 +24087,7 @@ def S4_storeirbf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memb($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23606,7 +24095,7 @@ def S4_storeirbfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memb($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_d7a65e, PredNewRel { +tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111001100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23627,7 +24116,7 @@ def S4_storeirbfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memb($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23635,7 +24124,7 @@ def S4_storeirbt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memb($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_d7a65e, PredNewRel { +tc_8b15472a, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111000000; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23654,7 +24143,7 @@ def S4_storeirbt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memb($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23662,7 +24151,7 @@ def S4_storeirbtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memb($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_d7a65e, PredNewRel { +tc_f86c328a, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111001000; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23682,7 +24171,7 @@ def S4_storeirbtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memb($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23690,7 +24179,7 @@ def S4_storeirh_io : HInst< (outs), (ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "memh($Rs32+#$Ii) = #$II", -tc_fcee8723, TypeST>, Enc_a803e0, PredNewRel { +tc_05b6c987, TypeST>, Enc_a803e0, PredNewRel { let Inst{31-21} = 0b00111100001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; @@ -23709,7 +24198,7 @@ def S4_storeirh_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memh($Rs32) = #$II", -tc_fcee8723, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23717,7 +24206,7 @@ def S4_storeirhf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memh($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_f20719, PredNewRel { +tc_8b15472a, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111000101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23737,7 +24226,7 @@ def S4_storeirhf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memh($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23745,7 +24234,7 @@ def S4_storeirhfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memh($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_f20719, PredNewRel { +tc_f86c328a, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111001101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23766,7 +24255,7 @@ def S4_storeirhfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memh($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23774,7 +24263,7 @@ def S4_storeirht_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memh($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_f20719, PredNewRel { +tc_8b15472a, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111000001; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23793,7 +24282,7 @@ def S4_storeirht_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memh($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23801,7 +24290,7 @@ def S4_storeirhtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memh($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_f20719, PredNewRel { +tc_f86c328a, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111001001; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23821,7 +24310,7 @@ def S4_storeirhtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memh($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23829,7 +24318,7 @@ def S4_storeiri_io : HInst< (outs), (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "memw($Rs32+#$Ii) = #$II", -tc_fcee8723, TypeST>, Enc_f37377, PredNewRel { +tc_05b6c987, TypeST>, Enc_f37377, PredNewRel { let Inst{31-21} = 0b00111100010; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -23848,7 +24337,7 @@ def S4_storeiri_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memw($Rs32) = #$II", -tc_fcee8723, TypeMAPPING> { +tc_05b6c987, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23856,7 +24345,7 @@ def S4_storeirif_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memw($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_5ccba9, PredNewRel { +tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111000110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23876,7 +24365,7 @@ def S4_storeirif_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memw($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23884,7 +24373,7 @@ def S4_storeirifnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memw($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_5ccba9, PredNewRel { +tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111001110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23905,7 +24394,7 @@ def S4_storeirifnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memw($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23913,7 +24402,7 @@ def S4_storeirit_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memw($Rs32+#$Ii) = #$II", -tc_1e69aa99, TypeST>, Enc_5ccba9, PredNewRel { +tc_8b15472a, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111000010; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23932,7 +24421,7 @@ def S4_storeirit_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memw($Rs32) = #$II", -tc_1e69aa99, TypeMAPPING> { +tc_8b15472a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23940,7 +24429,7 @@ def S4_storeiritnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memw($Rs32+#$Ii) = #$II", -tc_8f0a6bad, TypeST>, Enc_5ccba9, PredNewRel { +tc_f86c328a, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111001010; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23960,7 +24449,7 @@ def S4_storeiritnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memw($Rs32) = #$II", -tc_8f0a6bad, TypeMAPPING> { +tc_f86c328a, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23968,12 +24457,10 @@ def S4_storerb_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memb($Re32=#$II) = $Rt32", -tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { +tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011000; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let isExtended = 1; @@ -23991,7 +24478,7 @@ def S4_storerb_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { +tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011000; let addrMode = BaseRegOffset; @@ -24007,7 +24494,7 @@ def S4_storerb_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memb($Ru32<<#$Ii+#$II) = $Rt32", -tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { +tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101000; let addrMode = BaseLongOffset; @@ -24029,17 +24516,16 @@ def S4_storerbnew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memb($Re32=#$II) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b000; let Inst{31-21} = 0b10101011101; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerb_ap"; let DecoderNamespace = "MustExtend"; @@ -24054,13 +24540,14 @@ def S4_storerbnew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { +tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0000; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; @@ -24072,7 +24559,7 @@ def S4_storerbnew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memb($Ru32<<#$Ii+#$II) = $Nt8.new", -tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { +tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101101101; @@ -24081,6 +24568,7 @@ let accessSize = ByteAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S4_storerb_ur"; @@ -24096,12 +24584,10 @@ def S4_storerd_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, DoubleRegs:$Rtt32), "memd($Re32=#$II) = $Rtt32", -tc_336e698c, TypeST>, Enc_c7a204 { +tc_66888ded, TypeST>, Enc_c7a204 { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011110; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = DoubleWordAccess; let isExtended = 1; @@ -24118,7 +24604,7 @@ def S4_storerd_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -tc_45631a8d, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl { +tc_d9709180, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011110; let addrMode = BaseRegOffset; @@ -24133,7 +24619,7 @@ def S4_storerd_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32), "memd($Ru32<<#$Ii+#$II) = $Rtt32", -tc_a4567c39, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl { +tc_0dc560de, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101110; let addrMode = BaseLongOffset; @@ -24154,12 +24640,10 @@ def S4_storerf_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memh($Re32=#$II) = $Rt32.h", -tc_336e698c, TypeST>, Enc_8bcba4 { +tc_66888ded, TypeST>, Enc_8bcba4 { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011011; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let isExtended = 1; @@ -24176,7 +24660,7 @@ def S4_storerf_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { +tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011011; let addrMode = BaseRegOffset; @@ -24191,7 +24675,7 @@ def S4_storerf_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memh($Ru32<<#$Ii+#$II) = $Rt32.h", -tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { +tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101011; let addrMode = BaseLongOffset; @@ -24212,12 +24696,10 @@ def S4_storerh_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memh($Re32=#$II) = $Rt32", -tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { +tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011010; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let isExtended = 1; @@ -24235,7 +24717,7 @@ def S4_storerh_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { +tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011010; let addrMode = BaseRegOffset; @@ -24251,7 +24733,7 @@ def S4_storerh_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memh($Ru32<<#$Ii+#$II) = $Rt32", -tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { +tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101010; let addrMode = BaseLongOffset; @@ -24273,17 +24755,16 @@ def S4_storerhnew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memh($Re32=#$II) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b001; let Inst{31-21} = 0b10101011101; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storerh_ap"; let DecoderNamespace = "MustExtend"; @@ -24298,13 +24779,14 @@ def S4_storerhnew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { +tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0001; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; @@ -24316,7 +24798,7 @@ def S4_storerhnew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memh($Ru32<<#$Ii+#$II) = $Nt8.new", -tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { +tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101101101; @@ -24325,6 +24807,7 @@ let accessSize = HalfWordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_ur"; @@ -24340,12 +24823,10 @@ def S4_storeri_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memw($Re32=#$II) = $Rt32", -tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { +tc_66888ded, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011100; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = WordAccess; let isExtended = 1; @@ -24363,7 +24844,7 @@ def S4_storeri_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+$Ru32<<#$Ii) = $Rt32", -tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { +tc_d9709180, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011100; let addrMode = BaseRegOffset; @@ -24379,7 +24860,7 @@ def S4_storeri_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memw($Ru32<<#$Ii+#$II) = $Rt32", -tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { +tc_0dc560de, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101100; let addrMode = BaseLongOffset; @@ -24401,17 +24882,16 @@ def S4_storerinew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memw($Re32=#$II) = $Nt8.new", -tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { +tc_53bdb2f6, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b010; let Inst{31-21} = 0b10101011101; -let hasNewValue = 1; -let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let BaseOpcode = "S2_storeri_ap"; let DecoderNamespace = "MustExtend"; @@ -24426,13 +24906,14 @@ def S4_storerinew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { +tc_b166348b, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0010; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; @@ -24444,7 +24925,7 @@ def S4_storerinew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memw($Ru32<<#$Ii+#$II) = $Nt8.new", -tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { +tc_a8acdac0, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101101101; @@ -24453,6 +24934,7 @@ let accessSize = WordAccess; let isNVStore = 1; let isNewValue = 1; let isExtended = 1; +let isRestrictNoSlot1Store = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_ur"; @@ -24468,7 +24950,7 @@ def S4_subaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32), "$Rd32 = add($Rs32,sub(#$Ii,$Ru32))", -tc_090485bb, TypeALU64>, Enc_8b8d61 { +tc_c74f796f, TypeALU64>, Enc_8b8d61 { let Inst{31-23} = 0b110110111; let hasNewValue = 1; let opNewValue = 0; @@ -24483,7 +24965,7 @@ def S4_subi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = sub(#$Ii,asl($Rx32in,#$II))", -tc_c0cd91a8, TypeALU64>, Enc_c31910 { +tc_c74f796f, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b110; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -24501,7 +24983,7 @@ def S4_subi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))", -tc_c0cd91a8, TypeALU64>, Enc_c31910 { +tc_c74f796f, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b110; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -24519,7 +25001,7 @@ def S4_vrcrotate : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)", -tc_6264c5e0, TypeS_3op>, Enc_645d54 { +tc_b9c0b731, TypeS_3op>, Enc_645d54 { let Inst{7-6} = 0b11; let Inst{31-21} = 0b11000011110; let prefersSlot3 = 1; @@ -24528,7 +25010,7 @@ def S4_vrcrotate_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)", -tc_bc5561d8, TypeS_3op>, Enc_b72622 { +tc_60571023, TypeS_3op>, Enc_b72622 { let Inst{7-6} = 0b00; let Inst{31-21} = 0b11001011101; let prefersSlot3 = 1; @@ -24538,7 +25020,7 @@ def S4_vxaddsubh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat", -tc_47ab9233, TypeS_3op>, Enc_a56825 { +tc_b44c6e2a, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -24549,7 +25031,7 @@ def S4_vxaddsubhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat", -tc_63cd9d2d, TypeS_3op>, Enc_a56825 { +tc_2b6f77c6, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -24560,7 +25042,7 @@ def S4_vxaddsubw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat", -tc_47ab9233, TypeS_3op>, Enc_a56825 { +tc_b44c6e2a, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -24571,7 +25053,7 @@ def S4_vxsubaddh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat", -tc_47ab9233, TypeS_3op>, Enc_a56825 { +tc_b44c6e2a, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -24582,7 +25064,7 @@ def S4_vxsubaddhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat", -tc_63cd9d2d, TypeS_3op>, Enc_a56825 { +tc_2b6f77c6, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -24593,7 +25075,7 @@ def S4_vxsubaddw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat", -tc_47ab9233, TypeS_3op>, Enc_a56825 { +tc_b44c6e2a, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -24604,7 +25086,7 @@ def S5_asrhub_rnd_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):raw", -tc_63cd9d2d, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { let Inst{7-5} = 0b100; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -24617,7 +25099,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat", -tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -24626,7 +25108,7 @@ def S5_asrhub_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):sat", -tc_63cd9d2d, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -24639,7 +25121,7 @@ def S5_popcountp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = popcount($Rss32)", -tc_ca280e8b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -24650,7 +25132,7 @@ def S5_vasrhrnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):raw", -tc_63cd9d2d, TypeS_2op>, Enc_12b6e9, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000001; @@ -24660,14 +25142,22 @@ def S5_vasrhrnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):rnd", -tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { let isPseudo = 1; } +def S6_allocframe_to_raw : HInst< +(outs), +(ins u11_3Imm:$Ii), +"allocframe(#$Ii)", +tc_e216a5db, TypeMAPPING>, Requires<[HasV65T]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} def S6_rol_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = rol($Rss32,#$Ii)", -tc_9f518242, TypeS_2op>, Enc_5eac98, Requires<[HasV60T]> { +tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000000000; } @@ -24675,7 +25165,7 @@ def S6_rol_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += rol($Rss32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -24685,7 +25175,7 @@ def S6_rol_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= rol($Rss32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -24695,7 +25185,7 @@ def S6_rol_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= rol($Rss32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -24705,7 +25195,7 @@ def S6_rol_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= rol($Rss32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -24715,7 +25205,7 @@ def S6_rol_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= rol($Rss32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -24725,7 +25215,7 @@ def S6_rol_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = rol($Rs32,#$Ii)", -tc_9f518242, TypeS_2op>, Enc_a05677, Requires<[HasV60T]> { +tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -24736,7 +25226,7 @@ def S6_rol_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += rol($Rs32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -24749,7 +25239,7 @@ def S6_rol_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= rol($Rs32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -24762,7 +25252,7 @@ def S6_rol_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= rol($Rs32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -24775,7 +25265,7 @@ def S6_rol_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= rol($Rs32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -24788,7 +25278,7 @@ def S6_rol_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= rol($Rs32,#$Ii)", -tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -24801,7 +25291,7 @@ def S6_vsplatrbp : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsplatb($Rs32)", -tc_78b3c689, TypeS_2op>, Enc_3a3d62, Requires<[HasV62T]> { +tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62T]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100010; } @@ -24809,7 +25299,7 @@ def S6_vtrunehb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunehb($Rss32,$Rtt32)", -tc_9f518242, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { +tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -24818,7 +25308,7 @@ def S6_vtrunohb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunohb($Rss32,$Rtt32)", -tc_9f518242, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { +tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -24827,7 +25317,7 @@ def SA1_addi : HInst< (outs GeneralSubRegs:$Rx16), (ins IntRegs:$Rx16in, s32_0Imm:$Ii), "$Rx16 = add($Rx16in,#$Ii)", -tc_821c4233, TypeSUBINSN>, Enc_93af4c { +tc_609d2efe, TypeSUBINSN>, Enc_93af4c { let Inst{12-11} = 0b00; let hasNewValue = 1; let opNewValue = 0; @@ -24844,7 +25334,7 @@ def SA1_addrx : HInst< (outs GeneralSubRegs:$Rx16), (ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16), "$Rx16 = add($Rx16in,$Rs16)", -tc_821c4233, TypeSUBINSN>, Enc_0527db { +tc_609d2efe, TypeSUBINSN>, Enc_0527db { let Inst{12-8} = 0b11000; let hasNewValue = 1; let opNewValue = 0; @@ -24856,7 +25346,7 @@ def SA1_addsp : HInst< (outs GeneralSubRegs:$Rd16), (ins u6_2Imm:$Ii), "$Rd16 = add(r29,#$Ii)", -tc_d2609065, TypeSUBINSN>, Enc_2df31d { +tc_a904d137, TypeSUBINSN>, Enc_2df31d { let Inst{12-10} = 0b011; let hasNewValue = 1; let opNewValue = 0; @@ -24868,7 +25358,7 @@ def SA1_and1 : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = and($Rs16,#1)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10010; let hasNewValue = 1; let opNewValue = 0; @@ -24879,7 +25369,7 @@ def SA1_clrf : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (!p0) $Rd16 = #0", -tc_7c2dcd4d, TypeSUBINSN>, Enc_1f5ba6 { +tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100111; let isPredicated = 1; let isPredicatedFalse = 1; @@ -24893,7 +25383,7 @@ def SA1_clrfnew : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (!p0.new) $Rd16 = #0", -tc_f26aa619, TypeSUBINSN>, Enc_1f5ba6 { +tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -24908,7 +25398,7 @@ def SA1_clrt : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (p0) $Rd16 = #0", -tc_7c2dcd4d, TypeSUBINSN>, Enc_1f5ba6 { +tc_1b82a277, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100110; let isPredicated = 1; let hasNewValue = 1; @@ -24921,7 +25411,7 @@ def SA1_clrtnew : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (p0.new) $Rd16 = #0", -tc_f26aa619, TypeSUBINSN>, Enc_1f5ba6 { +tc_e9c822f7, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100100; let isPredicated = 1; let hasNewValue = 1; @@ -24935,7 +25425,7 @@ def SA1_cmpeqi : HInst< (outs), (ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii), "p0 = cmp.eq($Rs16,#$Ii)", -tc_e8c7a357, TypeSUBINSN>, Enc_63eaeb { +tc_90f3e30c, TypeSUBINSN>, Enc_63eaeb { let Inst{3-2} = 0b00; let Inst{12-8} = 0b11001; let AsmVariantName = "NonParsable"; @@ -24946,7 +25436,7 @@ def SA1_combine0i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#0,#$Ii)", -tc_d2609065, TypeSUBINSN>, Enc_ed48be { +tc_a904d137, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b00; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24958,7 +25448,7 @@ def SA1_combine1i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#1,#$Ii)", -tc_d2609065, TypeSUBINSN>, Enc_ed48be { +tc_a904d137, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b01; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24970,7 +25460,7 @@ def SA1_combine2i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#2,#$Ii)", -tc_d2609065, TypeSUBINSN>, Enc_ed48be { +tc_a904d137, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b10; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24982,7 +25472,7 @@ def SA1_combine3i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#3,#$Ii)", -tc_d2609065, TypeSUBINSN>, Enc_ed48be { +tc_a904d137, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b11; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24994,7 +25484,7 @@ def SA1_combinerz : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins GeneralSubRegs:$Rs16), "$Rdd8 = combine($Rs16,#0)", -tc_d2609065, TypeSUBINSN>, Enc_399e12 { +tc_a904d137, TypeSUBINSN>, Enc_399e12 { let Inst{3-3} = 0b1; let Inst{12-8} = 0b11101; let hasNewValue = 1; @@ -25006,7 +25496,7 @@ def SA1_combinezr : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins GeneralSubRegs:$Rs16), "$Rdd8 = combine(#0,$Rs16)", -tc_d2609065, TypeSUBINSN>, Enc_399e12 { +tc_a904d137, TypeSUBINSN>, Enc_399e12 { let Inst{3-3} = 0b0; let Inst{12-8} = 0b11101; let hasNewValue = 1; @@ -25018,7 +25508,7 @@ def SA1_dec : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, n1Const:$n1), "$Rd16 = add($Rs16,#$n1)", -tc_821c4233, TypeSUBINSN>, Enc_ee5ed0 { +tc_609d2efe, TypeSUBINSN>, Enc_ee5ed0 { let Inst{12-8} = 0b10011; let hasNewValue = 1; let opNewValue = 0; @@ -25029,7 +25519,7 @@ def SA1_inc : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = add($Rs16,#1)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10001; let hasNewValue = 1; let opNewValue = 0; @@ -25040,7 +25530,7 @@ def SA1_seti : HInst< (outs GeneralSubRegs:$Rd16), (ins u32_0Imm:$Ii), "$Rd16 = #$Ii", -tc_d2609065, TypeSUBINSN>, Enc_e39bb2 { +tc_a904d137, TypeSUBINSN>, Enc_e39bb2 { let Inst{12-10} = 0b010; let hasNewValue = 1; let opNewValue = 0; @@ -25056,7 +25546,7 @@ def SA1_setin1 : HInst< (outs GeneralSubRegs:$Rd16), (ins n1Const:$n1), "$Rd16 = #$n1", -tc_d2609065, TypeSUBINSN>, Enc_7a0ea6 { +tc_a904d137, TypeSUBINSN>, Enc_7a0ea6 { let Inst{12-4} = 0b110100000; let hasNewValue = 1; let opNewValue = 0; @@ -25067,7 +25557,7 @@ def SA1_sxtb : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = sxtb($Rs16)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10101; let hasNewValue = 1; let opNewValue = 0; @@ -25078,7 +25568,7 @@ def SA1_sxth : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = sxth($Rs16)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10100; let hasNewValue = 1; let opNewValue = 0; @@ -25089,7 +25579,7 @@ def SA1_tfr : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = $Rs16", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10000; let hasNewValue = 1; let opNewValue = 0; @@ -25100,7 +25590,7 @@ def SA1_zxtb : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = and($Rs16,#255)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10111; let hasNewValue = 1; let opNewValue = 0; @@ -25111,7 +25601,7 @@ def SA1_zxth : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = zxth($Rs16)", -tc_d2609065, TypeSUBINSN>, Enc_97d666 { +tc_a904d137, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10110; let hasNewValue = 1; let opNewValue = 0; @@ -25122,7 +25612,7 @@ def SL1_loadri_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "$Rd16 = memw($Rs16+#$Ii)", -tc_bf6fa601, TypeSUBINSN>, Enc_53dca9 { +tc_7f881c76, TypeSUBINSN>, Enc_53dca9 { let Inst{12-12} = 0b0; let hasNewValue = 1; let opNewValue = 0; @@ -25136,7 +25626,7 @@ def SL1_loadrub_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "$Rd16 = memub($Rs16+#$Ii)", -tc_bf6fa601, TypeSUBINSN>, Enc_c175d0 { +tc_7f881c76, TypeSUBINSN>, Enc_c175d0 { let Inst{12-12} = 0b1; let hasNewValue = 1; let opNewValue = 0; @@ -25150,12 +25640,12 @@ def SL2_deallocframe : HInst< (outs), (ins), "deallocframe", -tc_86442910, TypeSUBINSN>, Enc_e3b0c4 { +tc_36c68ad1, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111100000000; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; let mayLoad = 1; -let Uses = [R30]; +let Uses = [FRAMEKEY, R30]; let Defs = [R30, R29, R31]; let DecoderNamespace = "SUBINSN_L2"; } @@ -25163,12 +25653,12 @@ def SL2_jumpr31 : HInst< (outs), (ins), "jumpr r31", -tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { +tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000000; let isTerminator = 1; let isIndirectBranch = 1; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; +let cofMax1 = 1; let isReturn = 1; let Uses = [R31]; let Defs = [PC]; @@ -25178,14 +25668,14 @@ def SL2_jumpr31_f : HInst< (outs), (ins), "if (!p0) jumpr r31", -tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { +tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000101; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; +let cofMax1 = 1; let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; @@ -25196,15 +25686,15 @@ def SL2_jumpr31_fnew : HInst< (outs), (ins), "if (!p0.new) jumpr:nt r31", -tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { +tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000111; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let isPredicatedNew = 1; +let cofMax1 = 1; let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; @@ -25215,13 +25705,13 @@ def SL2_jumpr31_t : HInst< (outs), (ins), "if (p0) jumpr r31", -tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { +tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000100; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; +let cofMax1 = 1; let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; @@ -25232,14 +25722,14 @@ def SL2_jumpr31_tnew : HInst< (outs), (ins), "if (p0.new) jumpr:nt r31", -tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { +tc_2a160009, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let isPredicatedNew = 1; +let cofMax1 = 1; let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; @@ -25250,7 +25740,7 @@ def SL2_loadrb_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii), "$Rd16 = memb($Rs16+#$Ii)", -tc_bf6fa601, TypeSUBINSN>, Enc_2fbf3c { +tc_7f881c76, TypeSUBINSN>, Enc_2fbf3c { let Inst{12-11} = 0b10; let hasNewValue = 1; let opNewValue = 0; @@ -25264,7 +25754,7 @@ def SL2_loadrd_sp : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u5_3Imm:$Ii), "$Rdd8 = memd(r29+#$Ii)", -tc_70cabf66, TypeSUBINSN>, Enc_86a14b { +tc_9c98e8af, TypeSUBINSN>, Enc_86a14b { let Inst{12-8} = 0b11110; let hasNewValue = 1; let opNewValue = 0; @@ -25279,7 +25769,7 @@ def SL2_loadrh_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii), "$Rd16 = memh($Rs16+#$Ii)", -tc_bf6fa601, TypeSUBINSN>, Enc_2bae10 { +tc_7f881c76, TypeSUBINSN>, Enc_2bae10 { let Inst{12-11} = 0b00; let hasNewValue = 1; let opNewValue = 0; @@ -25293,7 +25783,7 @@ def SL2_loadri_sp : HInst< (outs GeneralSubRegs:$Rd16), (ins u5_2Imm:$Ii), "$Rd16 = memw(r29+#$Ii)", -tc_70cabf66, TypeSUBINSN>, Enc_51635c { +tc_9c98e8af, TypeSUBINSN>, Enc_51635c { let Inst{12-9} = 0b1110; let hasNewValue = 1; let opNewValue = 0; @@ -25308,7 +25798,7 @@ def SL2_loadruh_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii), "$Rd16 = memuh($Rs16+#$Ii)", -tc_bf6fa601, TypeSUBINSN>, Enc_2bae10 { +tc_7f881c76, TypeSUBINSN>, Enc_2bae10 { let Inst{12-11} = 0b01; let hasNewValue = 1; let opNewValue = 0; @@ -25322,16 +25812,17 @@ def SL2_return : HInst< (outs), (ins), "dealloc_return", -tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { +tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000000; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [R30]; +let Uses = [FRAMEKEY, R30]; let Defs = [PC, R30, R29, R31]; let DecoderNamespace = "SUBINSN_L2"; } @@ -25339,18 +25830,19 @@ def SL2_return_f : HInst< (outs), (ins), "if (!p0) dealloc_return", -tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { +tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000101; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [P0, R30]; +let Uses = [FRAMEKEY, P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; let DecoderNamespace = "SUBINSN_L2"; @@ -25359,19 +25851,20 @@ def SL2_return_fnew : HInst< (outs), (ins), "if (!p0.new) dealloc_return:nt", -tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { +tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000111; let isPredicated = 1; let isPredicatedFalse = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [P0, R30]; +let Uses = [FRAMEKEY, P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; let DecoderNamespace = "SUBINSN_L2"; @@ -25380,17 +25873,18 @@ def SL2_return_t : HInst< (outs), (ins), "if (p0) dealloc_return", -tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { +tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000100; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [P0, R30]; +let Uses = [FRAMEKEY, P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; let DecoderNamespace = "SUBINSN_L2"; @@ -25399,18 +25893,19 @@ def SL2_return_tnew : HInst< (outs), (ins), "if (p0.new) dealloc_return:nt", -tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { +tc_fcab4871, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; -let cofMax1 = 1; let AsmVariantName = "NonParsable"; let isPredicatedNew = 1; let mayLoad = 1; +let cofMax1 = 1; +let isRestrictNoSlot1Store = 1; let isReturn = 1; -let Uses = [P0, R30]; +let Uses = [FRAMEKEY, P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; let DecoderNamespace = "SUBINSN_L2"; @@ -25419,7 +25914,7 @@ def SS1_storeb_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16), "memb($Rs16+#$Ii) = $Rt16", -tc_53ee6546, TypeSUBINSN>, Enc_b38ffc { +tc_05b6c987, TypeSUBINSN>, Enc_b38ffc { let Inst{12-12} = 0b1; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25431,7 +25926,7 @@ def SS1_storew_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16), "memw($Rs16+#$Ii) = $Rt16", -tc_53ee6546, TypeSUBINSN>, Enc_f55a0c { +tc_05b6c987, TypeSUBINSN>, Enc_f55a0c { let Inst{12-12} = 0b0; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25443,14 +25938,14 @@ def SS2_allocframe : HInst< (outs), (ins u5_3Imm:$Ii), "allocframe(#$Ii)", -tc_f027ebe9, TypeSUBINSN>, Enc_6f70ca { +tc_0fc1ae07, TypeSUBINSN>, Enc_6f70ca { let Inst{3-0} = 0b0000; let Inst{12-9} = 0b1110; let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; let mayStore = 1; -let Uses = [R30, R29, R31]; +let Uses = [FRAMEKEY, FRAMELIMIT, R30, R29, R31]; let Defs = [R30, R29]; let DecoderNamespace = "SUBINSN_S2"; } @@ -25458,7 +25953,7 @@ def SS2_storebi0 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "memb($Rs16+#$Ii) = #0", -tc_6c52d277, TypeSUBINSN>, Enc_84d359 { +tc_57288781, TypeSUBINSN>, Enc_84d359 { let Inst{12-8} = 0b10010; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25470,7 +25965,7 @@ def SS2_storebi1 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "memb($Rs16+#$Ii) = #1", -tc_6c52d277, TypeSUBINSN>, Enc_84d359 { +tc_57288781, TypeSUBINSN>, Enc_84d359 { let Inst{12-8} = 0b10011; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25482,7 +25977,7 @@ def SS2_stored_sp : HInst< (outs), (ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8), "memd(r29+#$Ii) = $Rtt8", -tc_c14739d5, TypeSUBINSN>, Enc_b8309d { +tc_a788683e, TypeSUBINSN>, Enc_b8309d { let Inst{12-9} = 0b0101; let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; @@ -25495,7 +25990,7 @@ def SS2_storeh_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16), "memh($Rs16+#$Ii) = $Rt16", -tc_53ee6546, TypeSUBINSN>, Enc_625deb { +tc_05b6c987, TypeSUBINSN>, Enc_625deb { let Inst{12-11} = 0b00; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; @@ -25507,7 +26002,7 @@ def SS2_storew_sp : HInst< (outs), (ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16), "memw(r29+#$Ii) = $Rt16", -tc_c14739d5, TypeSUBINSN>, Enc_87c142 { +tc_a788683e, TypeSUBINSN>, Enc_87c142 { let Inst{12-9} = 0b0100; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25520,7 +26015,7 @@ def SS2_storewi0 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "memw($Rs16+#$Ii) = #0", -tc_6c52d277, TypeSUBINSN>, Enc_a6ce9c { +tc_57288781, TypeSUBINSN>, Enc_a6ce9c { let Inst{12-8} = 0b10000; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25532,7 +26027,7 @@ def SS2_storewi1 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "memw($Rs16+#$Ii) = #1", -tc_6c52d277, TypeSUBINSN>, Enc_a6ce9c { +tc_57288781, TypeSUBINSN>, Enc_a6ce9c { let Inst{12-8} = 0b10001; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25544,7 +26039,7 @@ def V6_MAP_equb : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25555,9 +26050,7 @@ def V6_MAP_equb_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25567,9 +26060,7 @@ def V6_MAP_equb_ior : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isAccumulator = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -25580,9 +26071,7 @@ def V6_MAP_equb_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25592,7 +26081,7 @@ def V6_MAP_equh : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25603,9 +26092,7 @@ def V6_MAP_equh_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25615,9 +26102,7 @@ def V6_MAP_equh_ior : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isAccumulator = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -25628,9 +26113,7 @@ def V6_MAP_equh_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25640,7 +26123,7 @@ def V6_MAP_equw : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25651,9 +26134,7 @@ def V6_MAP_equw_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25663,9 +26144,7 @@ def V6_MAP_equw_ior : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isAccumulator = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -25676,9 +26155,7 @@ def V6_MAP_equw_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -25688,7 +26165,7 @@ def V6_extractw : HInst< (outs IntRegs:$Rd32), (ins HvxVR:$Vu32, IntRegs:$Rs32), "$Rd32 = vextract($Vu32,$Rs32)", -tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[HasV60T,UseHVX]> { +tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10010010000; @@ -25702,7 +26179,7 @@ def V6_extractw_alt : HInst< (outs IntRegs:$Rd32), (ins HvxVR:$Vu32, IntRegs:$Rs32), "$Rd32.w = vextract($Vu32,$Rs32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25713,7 +26190,7 @@ def V6_hi : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vss32), "$Vd32 = hi($Vss32)", -CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25723,7 +26200,7 @@ def V6_ld0 : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vmem($Rt32)", -PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25734,7 +26211,7 @@ def V6_ldcnp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32.cur = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25745,7 +26222,7 @@ def V6_ldcnpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32.cur = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25756,7 +26233,7 @@ def V6_ldcp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32.cur = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25767,7 +26244,7 @@ def V6_ldcpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32.cur = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25778,7 +26255,7 @@ def V6_ldnp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32 = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25789,7 +26266,7 @@ def V6_ldnpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32 = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25800,7 +26277,18 @@ def V6_ldnt0 : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vmem($Rt32):nt", -PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldntnt0 : HInst< +(outs HvxVR:$Vd32), +(ins IntRegs:$Rt32), +"$Vd32 = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25811,7 +26299,7 @@ def V6_ldp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32 = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25822,7 +26310,7 @@ def V6_ldpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32 = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25833,7 +26321,7 @@ def V6_ldtnp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32.tmp = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25844,7 +26332,7 @@ def V6_ldtnpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if (!$Pv4) $Vd32.tmp = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25855,7 +26343,7 @@ def V6_ldtp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32.tmp = vmem($Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25866,7 +26354,7 @@ def V6_ldtpnt0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), "if ($Pv4) $Vd32.tmp = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25877,7 +26365,7 @@ def V6_ldu0 : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vmemu($Rt32)", -PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_LD>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25888,7 +26376,7 @@ def V6_lo : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vss32), "$Vd32 = lo($Vss32)", -CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25898,7 +26386,7 @@ def V6_lvsplatb : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32.b = vsplat($Rt32)", -tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25909,7 +26397,7 @@ def V6_lvsplath : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32.h = vsplat($Rt32)", -tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[UseHVXV62]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25920,7 +26408,7 @@ def V6_lvsplatw : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vsplat($Rt32)", -tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[HasV60T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[UseHVXV60]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -25931,7 +26419,7 @@ def V6_pred_and : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4 = and($Qs4,$Qt4)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -25944,7 +26432,7 @@ def V6_pred_and_n : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4 = and($Qs4,!$Qt4)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000101; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -25957,7 +26445,7 @@ def V6_pred_not : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4), "$Qd4 = not($Qs4)", -tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000010; let Inst{13-10} = 0b0000; let Inst{31-16} = 0b0001111000000011; @@ -25969,7 +26457,7 @@ def V6_pred_or : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4 = or($Qs4,$Qt4)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000001; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -25982,7 +26470,7 @@ def V6_pred_or_n : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4 = or($Qs4,!$Qt4)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000100; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -25995,7 +26483,7 @@ def V6_pred_scalar2 : HInst< (outs HvxQR:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq($Rt32)", -tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV60T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV60]> { let Inst{13-2} = 0b000000010001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26006,7 +26494,7 @@ def V6_pred_scalar2v2 : HInst< (outs HvxQR:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq2($Rt32)", -tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV62T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[UseHVXV62]> { let Inst{13-2} = 0b000000010011; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26017,7 +26505,7 @@ def V6_pred_xor : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4 = xor($Qs4,$Qt4)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000011; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26030,7 +26518,7 @@ def V6_shuffeqh : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4.b = vshuffe($Qs4.h,$Qt4.h)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> { let Inst{7-2} = 0b000110; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26043,7 +26531,7 @@ def V6_shuffeqw : HInst< (outs HvxQR:$Qd4), (ins HvxQR:$Qs4, HvxQR:$Qt4), "$Qd4.h = vshuffe($Qs4.w,$Qt4.w)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[UseHVXV62]> { let Inst{7-2} = 0b000111; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26056,7 +26544,7 @@ def V6_st0 : HInst< (outs), (ins IntRegs:$Rt32, HvxVR:$Vs32), "vmem($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26065,7 +26553,7 @@ def V6_stn0 : HInst< (outs), (ins IntRegs:$Rt32, HvxVR:$Os8), "vmem($Rt32) = $Os8.new", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26075,7 +26563,7 @@ def V6_stnnt0 : HInst< (outs), (ins IntRegs:$Rt32, HvxVR:$Os8), "vmem($Rt32):nt = $Os8.new", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26085,7 +26573,7 @@ def V6_stnp0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if (!$Pv4) vmem($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26094,7 +26582,7 @@ def V6_stnpnt0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if (!$Pv4) vmem($Rt32):nt = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26103,7 +26591,7 @@ def V6_stnq0 : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32), "if (!$Qv4) vmem($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26112,7 +26600,7 @@ def V6_stnqnt0 : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32), "if (!$Qv4) vmem($Rt32):nt = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26121,7 +26609,7 @@ def V6_stnt0 : HInst< (outs), (ins IntRegs:$Rt32, HvxVR:$Vs32), "vmem($Rt32):nt = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26130,7 +26618,7 @@ def V6_stp0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if ($Pv4) vmem($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26139,7 +26627,7 @@ def V6_stpnt0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if ($Pv4) vmem($Rt32):nt = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26148,7 +26636,7 @@ def V6_stq0 : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32), "if ($Qv4) vmem($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26157,7 +26645,7 @@ def V6_stqnt0 : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, HvxVR:$Vs32), "if ($Qv4) vmem($Rt32):nt = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26166,7 +26654,7 @@ def V6_stu0 : HInst< (outs), (ins IntRegs:$Rt32, HvxVR:$Vs32), "vmemu($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26175,7 +26663,7 @@ def V6_stunp0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if (!$Pv4) vmemu($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26184,7 +26672,7 @@ def V6_stup0 : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, HvxVR:$Vs32), "if ($Pv4) vmemu($Rt32) = $Vs32", -PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeCVI_VM_ST>, Requires<[UseHVXV60]> { let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; @@ -26193,7 +26681,7 @@ def V6_vL32Ub_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmemu($Rt32+#$Ii)", -tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26203,13 +26691,14 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32Ub_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmemu($Rx32++#$Ii)", -tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26219,6 +26708,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26227,7 +26717,7 @@ def V6_vL32Ub_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmemu($Rx32++$Mu2)", -tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[UseHVXV60]> { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26236,6 +26726,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26243,7 +26734,7 @@ def V6_vL32b_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26253,16 +26744,17 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_ai"; let isCVLoadable = 1; let isPredicable = 1; -let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26273,15 +26765,16 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26293,6 +26786,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26300,7 +26794,7 @@ def V6_vL32b_cur_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26313,6 +26807,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26321,7 +26816,7 @@ def V6_vL32b_cur_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26333,6 +26828,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26341,7 +26837,7 @@ def V6_vL32b_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26352,8 +26848,9 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26361,7 +26858,7 @@ def V6_vL32b_cur_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26371,8 +26868,9 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26380,7 +26878,7 @@ def V6_vL32b_cur_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26391,6 +26889,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26398,7 +26897,7 @@ def V6_vL32b_cur_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26410,6 +26909,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26418,7 +26918,7 @@ def V6_vL32b_cur_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26429,6 +26929,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26437,7 +26938,7 @@ def V6_vL32b_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26448,6 +26949,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26455,7 +26957,7 @@ def V6_vL32b_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26467,6 +26969,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26475,7 +26978,7 @@ def V6_vL32b_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26486,6 +26989,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26494,7 +26998,7 @@ def V6_vL32b_nt_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26505,16 +27009,17 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_nt_ai"; let isCVLoadable = 1; let isPredicable = 1; -let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26526,15 +27031,16 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ai"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26547,6 +27053,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26554,7 +27061,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26568,6 +27075,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26576,7 +27084,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26589,6 +27097,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26597,7 +27106,7 @@ def V6_vL32b_nt_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26609,8 +27118,9 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_pi"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26618,7 +27128,7 @@ def V6_vL32b_nt_cur_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26629,8 +27139,9 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ppu"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26638,7 +27149,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26650,6 +27161,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26657,7 +27169,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26670,6 +27182,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26678,7 +27191,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26690,6 +27203,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26698,7 +27212,7 @@ def V6_vL32b_nt_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26710,6 +27224,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26717,7 +27232,7 @@ def V6_vL32b_nt_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26730,6 +27245,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26738,7 +27254,7 @@ def V6_vL32b_nt_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26750,6 +27266,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26758,7 +27275,7 @@ def V6_vL32b_nt_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26769,9 +27286,10 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_nt_pi"; let isCVLoadable = 1; let isPredicable = 1; -let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26779,7 +27297,7 @@ def V6_vL32b_nt_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26789,9 +27307,10 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_nt_ppu"; let isCVLoadable = 1; let isPredicable = 1; -let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26799,7 +27318,7 @@ def V6_vL32b_nt_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26810,6 +27329,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26817,7 +27337,7 @@ def V6_vL32b_nt_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26829,6 +27349,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26837,7 +27358,7 @@ def V6_vL32b_nt_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26848,6 +27369,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26856,7 +27378,7 @@ def V6_vL32b_nt_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26867,15 +27389,16 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ai"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26887,6 +27410,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26894,7 +27418,7 @@ def V6_vL32b_nt_tmp_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26907,6 +27431,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26915,7 +27440,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26927,6 +27452,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26935,7 +27461,7 @@ def V6_vL32b_nt_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26946,8 +27472,9 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_pi"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26955,7 +27482,7 @@ def V6_vL32b_nt_tmp_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26965,8 +27492,9 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26974,7 +27502,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26985,6 +27513,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -26992,7 +27521,7 @@ def V6_vL32b_nt_tmp_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27004,6 +27533,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27012,7 +27542,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27023,6 +27553,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27031,7 +27562,7 @@ def V6_vL32b_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27041,6 +27572,8 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_pi"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27050,7 +27583,7 @@ def V6_vL32b_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2)", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -27059,9 +27592,10 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32b_ppu"; let isCVLoadable = 1; let isPredicable = 1; -let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27069,7 +27603,7 @@ def V6_vL32b_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27079,6 +27613,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -27086,7 +27621,7 @@ def V6_vL32b_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27097,6 +27632,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27105,7 +27641,7 @@ def V6_vL32b_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27115,6 +27651,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27123,7 +27660,7 @@ def V6_vL32b_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27133,15 +27670,16 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_tmp_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27152,6 +27690,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -27159,7 +27698,7 @@ def V6_vL32b_tmp_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27171,6 +27710,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27179,7 +27719,7 @@ def V6_vL32b_tmp_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27190,6 +27730,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27198,7 +27739,7 @@ def V6_vL32b_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27208,8 +27749,9 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27217,7 +27759,7 @@ def V6_vL32b_tmp_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2)", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[UseHVXV60]>, PredRel { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -27226,8 +27768,9 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; -let isPredicable = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27235,7 +27778,7 @@ def V6_vL32b_tmp_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27245,6 +27788,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } @@ -27252,7 +27796,7 @@ def V6_vL32b_tmp_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[UseHVXV62]>, PredRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27263,6 +27807,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27271,7 +27816,7 @@ def V6_vL32b_tmp_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[UseHVXV62]>, PredRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27281,6 +27826,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27289,7 +27835,7 @@ def V6_vS32Ub_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rt32+#$Ii) = $Vs32", -tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -27304,7 +27850,7 @@ def V6_vS32Ub_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32", -tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -27319,7 +27865,7 @@ def V6_vS32Ub_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32", -tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -27336,7 +27882,7 @@ def V6_vS32Ub_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32", -tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -27352,7 +27898,7 @@ def V6_vS32Ub_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rx32++#$Ii) = $Vs32", -tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -27368,7 +27914,7 @@ def V6_vS32Ub_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "vmemu($Rx32++$Mu2) = $Vs32", -tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -27383,7 +27929,7 @@ def V6_vS32Ub_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32", -tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -27397,7 +27943,7 @@ def V6_vS32Ub_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32", -tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -27413,7 +27959,7 @@ def V6_vS32Ub_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32", -tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -27428,7 +27974,7 @@ def V6_vS32b_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii) = $Vs32", -tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -27444,7 +27990,7 @@ def V6_vS32b_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii) = $Os8.new", -tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -27463,7 +28009,7 @@ def V6_vS32b_new_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new", -tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -27482,7 +28028,7 @@ def V6_vS32b_new_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -27503,7 +28049,7 @@ def V6_vS32b_new_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-3} = 0b00001101; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -27523,7 +28069,7 @@ def V6_vS32b_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii) = $Os8.new", -tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -27543,7 +28089,7 @@ def V6_vS32b_new_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "vmem($Rx32++$Mu2) = $Os8.new", -tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -27562,7 +28108,7 @@ def V6_vS32b_new_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new", -tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -27580,7 +28126,7 @@ def V6_vS32b_new_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -27600,7 +28146,7 @@ def V6_vS32b_new_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-3} = 0b00001000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -27619,7 +28165,7 @@ def V6_vS32b_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32", -tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -27635,7 +28181,7 @@ def V6_vS32b_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -27653,7 +28199,7 @@ def V6_vS32b_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -27670,7 +28216,7 @@ def V6_vS32b_nqpred_ai : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32", -tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -27682,7 +28228,7 @@ def V6_vS32b_nqpred_pi : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27696,7 +28242,7 @@ def V6_vS32b_nqpred_ppu : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -27709,7 +28255,7 @@ def V6_vS32b_nt_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii):nt = $Vs32", -tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -27726,7 +28272,7 @@ def V6_vS32b_nt_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii):nt = $Os8.new", -tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -27746,7 +28292,7 @@ def V6_vS32b_nt_new_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -27766,7 +28312,7 @@ def V6_vS32b_nt_new_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -27788,7 +28334,7 @@ def V6_vS32b_nt_new_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-3} = 0b00001111; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -27809,7 +28355,7 @@ def V6_vS32b_nt_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii):nt = $Os8.new", -tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -27830,7 +28376,7 @@ def V6_vS32b_nt_new_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "vmem($Rx32++$Mu2):nt = $Os8.new", -tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[UseHVXV60]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; @@ -27850,7 +28396,7 @@ def V6_vS32b_nt_new_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -27869,7 +28415,7 @@ def V6_vS32b_nt_new_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -27890,7 +28436,7 @@ def V6_vS32b_nt_new_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Os8), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-3} = 0b00001010; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -27910,7 +28456,7 @@ def V6_vS32b_nt_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -27927,7 +28473,7 @@ def V6_vS32b_nt_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -27946,7 +28492,7 @@ def V6_vS32b_nt_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -27964,7 +28510,7 @@ def V6_vS32b_nt_nqpred_ai : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; @@ -27977,7 +28523,7 @@ def V6_vS32b_nt_nqpred_pi : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27992,7 +28538,7 @@ def V6_vS32b_nt_nqpred_ppu : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; @@ -28006,7 +28552,7 @@ def V6_vS32b_nt_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii):nt = $Vs32", -tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -28024,7 +28570,7 @@ def V6_vS32b_nt_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "vmem($Rx32++$Mu2):nt = $Vs32", -tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; @@ -28041,7 +28587,7 @@ def V6_vS32b_nt_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -28057,7 +28603,7 @@ def V6_vS32b_nt_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -28075,7 +28621,7 @@ def V6_vS32b_nt_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -28092,7 +28638,7 @@ def V6_vS32b_nt_qpred_ai : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; @@ -28105,7 +28651,7 @@ def V6_vS32b_nt_qpred_pi : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -28120,7 +28666,7 @@ def V6_vS32b_nt_qpred_ppu : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; @@ -28134,7 +28680,7 @@ def V6_vS32b_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii) = $Vs32", -tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28151,7 +28697,7 @@ def V6_vS32b_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "vmem($Rx32++$Mu2) = $Vs32", -tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[UseHVXV60]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -28166,7 +28712,7 @@ def V6_vS32b_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii) = $Vs32", -tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28181,7 +28727,7 @@ def V6_vS32b_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[UseHVXV60]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28198,7 +28744,7 @@ def V6_vS32b_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[UseHVXV60]>, NewValueRel { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -28214,7 +28760,7 @@ def V6_vS32b_qpred_ai : HInst< (outs), (ins HvxQR:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii) = $Vs32", -tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -28226,7 +28772,7 @@ def V6_vS32b_qpred_pi : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii) = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28240,7 +28786,7 @@ def V6_vS32b_qpred_ppu : HInst< (outs IntRegs:$Rx32), (ins HvxQR:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2) = $Vs32", -tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[UseHVXV60]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -28249,11 +28795,100 @@ let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } +def V6_vS32b_srls_ai : HInst< +(outs), +(ins IntRegs:$Rt32, s4_0Imm:$Ii), +"vmem($Rt32+#$Ii):scatter_release", +tc_29841470, TypeCVI_SCATTER_NEW_RST>, Enc_ff3442, Requires<[UseHVXV65]> { +let Inst{7-0} = 0b00101000; +let Inst{12-11} = 0b00; +let Inst{31-21} = 0b00101000001; +let addrMode = BaseImmOffset; +let accessSize = HVXVectorAccess; +let CVINew = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vS32b_srls_pi : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, s3_0Imm:$Ii), +"vmem($Rx32++#$Ii):scatter_release", +tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_6c9ee0, Requires<[UseHVXV65]> { +let Inst{7-0} = 0b00101000; +let Inst{13-11} = 0b000; +let Inst{31-21} = 0b00101001001; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let CVINew = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} +def V6_vS32b_srls_ppu : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, ModRegs:$Mu2), +"vmem($Rx32++$Mu2):scatter_release", +tc_5c03dc63, TypeCVI_SCATTER_NEW_RST>, Enc_44661f, Requires<[UseHVXV65]> { +let Inst{12-0} = 0b0000000101000; +let Inst{31-21} = 0b00101011001; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let CVINew = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} +def V6_vabsb : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.b = vabs($Vu32.b)", +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b0; +let Inst{31-16} = 0b0001111000000001; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsb_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32 = vabsb($Vu32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsb_sat : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.b = vabs($Vu32.b):sat", +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b0; +let Inst{31-16} = 0b0001111000000001; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsb_sat_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32 = vabsb($Vu32):sat", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vabsdiffh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -28265,7 +28900,7 @@ def V6_vabsdiffh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vabsdiffh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28276,7 +28911,7 @@ def V6_vabsdiffub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -28288,7 +28923,7 @@ def V6_vabsdiffub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vabsdiffub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28299,7 +28934,7 @@ def V6_vabsdiffuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -28311,7 +28946,7 @@ def V6_vabsdiffuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vabsdiffuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28322,7 +28957,7 @@ def V6_vabsdiffw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -28334,7 +28969,7 @@ def V6_vabsdiffw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vabsdiffw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28345,7 +28980,7 @@ def V6_vabsh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vabs($Vu32.h)", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -28357,7 +28992,7 @@ def V6_vabsh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vabsh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28368,7 +29003,7 @@ def V6_vabsh_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vabs($Vu32.h):sat", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -28380,7 +29015,40 @@ def V6_vabsh_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vabsh($Vu32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsub_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.ub = vabs($Vu32.b)", +tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsuh_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.uh = vabs($Vu32.h)", +tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabsuw_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.uw = vabs($Vu32.w)", +tc_71337255, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28391,7 +29059,7 @@ def V6_vabsw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.w = vabs($Vu32.w)", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -28403,7 +29071,7 @@ def V6_vabsw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vabsw($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28414,7 +29082,7 @@ def V6_vabsw_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.w = vabs($Vu32.w):sat", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -28426,7 +29094,7 @@ def V6_vabsw_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vabsw($Vu32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28437,7 +29105,7 @@ def V6_vaddb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -28449,7 +29117,7 @@ def V6_vaddb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28460,7 +29128,7 @@ def V6_vaddb_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -28472,7 +29140,7 @@ def V6_vaddb_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddb($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28483,7 +29151,7 @@ def V6_vaddbnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.b += $Vu32.b", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -28498,7 +29166,7 @@ def V6_vaddbnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.b) $Vx32.b += $Vu32.b", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28511,7 +29179,7 @@ def V6_vaddbq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.b += $Vu32.b", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -28526,7 +29194,7 @@ def V6_vaddbq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.b) $Vx32.b += $Vu32.b", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28539,7 +29207,7 @@ def V6_vaddbsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -28551,7 +29219,7 @@ def V6_vaddbsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddb($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28562,7 +29230,7 @@ def V6_vaddbsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -28574,7 +29242,7 @@ def V6_vaddbsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddb($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28585,14 +29253,12 @@ def V6_vaddcarry : HInst< (outs HvxVR:$Vd32, HvxQR:$Qx4), (ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in), "$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry", -tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -28600,7 +29266,7 @@ def V6_vaddclbh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -28612,7 +29278,7 @@ def V6_vaddclbw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -28624,7 +29290,7 @@ def V6_vaddh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -28636,7 +29302,7 @@ def V6_vaddh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28647,7 +29313,7 @@ def V6_vaddh_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -28659,7 +29325,7 @@ def V6_vaddh_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddh($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28670,7 +29336,7 @@ def V6_vaddhnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.h += $Vu32.h", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -28685,7 +29351,7 @@ def V6_vaddhnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.h) $Vx32.h += $Vu32.h", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28698,7 +29364,7 @@ def V6_vaddhq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.h += $Vu32.h", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -28713,7 +29379,7 @@ def V6_vaddhq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.h) $Vx32.h += $Vu32.h", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28726,7 +29392,7 @@ def V6_vaddhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -28738,7 +29404,7 @@ def V6_vaddhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28749,7 +29415,7 @@ def V6_vaddhsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -28761,7 +29427,7 @@ def V6_vaddhsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddh($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28772,7 +29438,7 @@ def V6_vaddhw : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vadd($Vu32.h,$Vv32.h)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -28784,7 +29450,7 @@ def V6_vaddhw_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.w += vadd($Vu32.h,$Vv32.h)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -28798,7 +29464,7 @@ def V6_vaddhw_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vaddh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28811,7 +29477,7 @@ def V6_vaddhw_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vaddh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28822,7 +29488,7 @@ def V6_vaddubh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -28834,7 +29500,7 @@ def V6_vaddubh_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -28848,7 +29514,7 @@ def V6_vaddubh_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vaddub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -28861,7 +29527,7 @@ def V6_vaddubh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vaddub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28872,7 +29538,7 @@ def V6_vaddubsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -28884,7 +29550,7 @@ def V6_vaddubsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddub($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28895,7 +29561,7 @@ def V6_vaddubsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -28907,7 +29573,7 @@ def V6_vaddubsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddub($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28918,7 +29584,7 @@ def V6_vaddububb_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -28930,7 +29596,7 @@ def V6_vadduhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -28942,7 +29608,7 @@ def V6_vadduhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vadduh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28953,7 +29619,7 @@ def V6_vadduhsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -28965,7 +29631,7 @@ def V6_vadduhsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vadduh($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -28976,7 +29642,7 @@ def V6_vadduhw : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -28988,7 +29654,7 @@ def V6_vadduhw_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -29002,7 +29668,7 @@ def V6_vadduhw_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vadduh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29015,7 +29681,7 @@ def V6_vadduhw_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vadduh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29026,7 +29692,7 @@ def V6_vadduwsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -29038,7 +29704,7 @@ def V6_vadduwsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vadduw($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29049,7 +29715,7 @@ def V6_vadduwsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -29061,7 +29727,7 @@ def V6_vadduwsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vadduw($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29072,7 +29738,7 @@ def V6_vaddw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -29084,7 +29750,7 @@ def V6_vaddw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29095,7 +29761,7 @@ def V6_vaddw_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -29107,7 +29773,7 @@ def V6_vaddw_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddw($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29118,7 +29784,7 @@ def V6_vaddwnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.w += $Vu32.w", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -29133,7 +29799,7 @@ def V6_vaddwnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.w) $Vx32.w += $Vu32.w", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29146,7 +29812,7 @@ def V6_vaddwq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.w += $Vu32.w", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -29161,7 +29827,7 @@ def V6_vaddwq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.w) $Vx32.w += $Vu32.w", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29174,7 +29840,7 @@ def V6_vaddwsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -29186,7 +29852,7 @@ def V6_vaddwsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaddw($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29197,7 +29863,7 @@ def V6_vaddwsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -29209,7 +29875,7 @@ def V6_vaddwsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vaddw($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29220,7 +29886,7 @@ def V6_valignb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = valign($Vu32,$Vv32,$Rt8)", -tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29232,7 +29898,7 @@ def V6_valignbi : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vd32 = valign($Vu32,$Vv32,#$Ii)", -tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -29243,7 +29909,7 @@ def V6_vand : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vand($Vu32,$Vv32)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -29255,7 +29921,7 @@ def V6_vandnqrt : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qu4, IntRegs:$Rt32), "$Vd32 = vand(!$Qu4,$Rt32)", -tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[HasV62T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[UseHVXV62]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0001; let Inst{31-21} = 0b00011001101; @@ -29267,7 +29933,7 @@ def V6_vandnqrt_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand(!$Qu4,$Rt32)", -tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[HasV62T,UseHVX]> { +tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b00011001011; @@ -29281,7 +29947,7 @@ def V6_vandnqrt_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32), "$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29294,7 +29960,7 @@ def V6_vandnqrt_alt : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qu4, IntRegs:$Rt32), "$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29305,7 +29971,7 @@ def V6_vandqrt : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qu4, IntRegs:$Rt32), "$Vd32 = vand($Qu4,$Rt32)", -tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b00011001101; @@ -29317,7 +29983,7 @@ def V6_vandqrt_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand($Qu4,$Rt32)", -tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b00011001011; @@ -29331,7 +29997,7 @@ def V6_vandqrt_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxQR:$Qu4, IntRegs:$Rt32), "$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29344,7 +30010,7 @@ def V6_vandqrt_alt : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qu4, IntRegs:$Rt32), "$Vd32.ub = vand($Qu4.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29355,7 +30021,7 @@ def V6_vandvnqv : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qv4, HvxVR:$Vu32), "$Vd32 = vand(!$Qv4,$Vu32)", -tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -29368,7 +30034,7 @@ def V6_vandvqv : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qv4, HvxVR:$Vu32), "$Vd32 = vand($Qv4,$Vu32)", -tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[UseHVXV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -29381,7 +30047,7 @@ def V6_vandvrt : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Qd4 = vand($Vu32,$Rt32)", -tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -29393,12 +30059,10 @@ def V6_vandvrt_acc : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, IntRegs:$Rt32), "$Qx4 |= vand($Vu32,$Rt32)", -tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -29407,9 +30071,7 @@ def V6_vandvrt_acc_alt : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, IntRegs:$Rt32), "$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { -let hasNewValue = 1; -let opNewValue = 0; +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let isAccumulator = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -29420,7 +30082,7 @@ def V6_vandvrt_alt : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Qd4.ub = vand($Vu32.ub,$Rt32.ub)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29431,7 +30093,7 @@ def V6_vaslh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasl($Vu32.h,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -29439,11 +30101,38 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vaslh_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vx32.h += vasl($Vu32.h,$Rt32)", +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001101; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} +def V6_vaslh_acc_alt : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vx32 += vaslh($Vu32,$Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vaslh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vaslh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29454,7 +30143,7 @@ def V6_vaslhv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vasl($Vu32.h,$Vv32.h)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -29466,7 +30155,7 @@ def V6_vaslhv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaslh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29477,7 +30166,7 @@ def V6_vaslw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasl($Vu32.w,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -29489,7 +30178,7 @@ def V6_vaslw_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasl($Vu32.w,$Rt32)", -tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -29503,7 +30192,7 @@ def V6_vaslw_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vaslw($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29516,7 +30205,7 @@ def V6_vaslw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vaslw($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29527,7 +30216,7 @@ def V6_vaslwv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vasl($Vu32.w,$Vv32.w)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -29539,7 +30228,7 @@ def V6_vaslwv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vaslw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29550,7 +30239,7 @@ def V6_vasrh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasr($Vu32.h,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -29558,11 +30247,38 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vasrh_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vx32.h += vasr($Vu32.h,$Rt32)", +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001100; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} +def V6_vasrh_acc_alt : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vx32 += vasrh($Vu32,$Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vasrh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vasrh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29573,7 +30289,7 @@ def V6_vasrhbrndsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -29595,7 +30311,7 @@ def V6_vasrhbsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -29607,7 +30323,7 @@ def V6_vasrhubrndsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29629,7 +30345,7 @@ def V6_vasrhubsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29651,7 +30367,7 @@ def V6_vasrhv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vasr($Vu32.h,$Vv32.h)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -29663,18 +30379,42 @@ def V6_vasrhv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vasrh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vasruhubrndsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):rnd:sat", +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b0; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasruhubsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32.ub = vasr($Vu32.uh,$Vv32.uh,$Rt8):sat", +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vasruwuhrndsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -29682,11 +30422,23 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vasruwuhsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), +"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):sat", +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-24} = 0b00011000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vasrw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasr($Vu32.w,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -29698,7 +30450,7 @@ def V6_vasrw_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasr($Vu32.w,$Rt32)", -tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -29712,7 +30464,7 @@ def V6_vasrw_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vasrw($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -29725,7 +30477,7 @@ def V6_vasrw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vasrw($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29736,7 +30488,7 @@ def V6_vasrwh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29758,7 +30510,7 @@ def V6_vasrwhrndsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29780,7 +30532,7 @@ def V6_vasrwhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29802,7 +30554,7 @@ def V6_vasrwuhrndsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV62]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -29814,7 +30566,7 @@ def V6_vasrwuhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -29836,7 +30588,7 @@ def V6_vasrwv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vasr($Vu32.w,$Vv32.w)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -29848,7 +30600,7 @@ def V6_vasrwv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vasrw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29859,7 +30611,7 @@ def V6_vassign : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = $Vu32", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000011; @@ -29871,17 +30623,63 @@ def V6_vassignp : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32), "$Vdd32 = $Vuu32", -CVI_VA, TypeCVI_VA_DV>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA_DV>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavgb : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.b = vavg($Vu32.b,$Vv32.b)", +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavgb_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32 = vavgb($Vu32,$Vv32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavgbrnd : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.b = vavg($Vu32.b,$Vv32.b):rnd", +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavgbrnd_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32 = vavgb($Vu32,$Vv32):rnd", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -29893,7 +30691,7 @@ def V6_vavgh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29904,7 +30702,7 @@ def V6_vavghrnd : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -29916,7 +30714,7 @@ def V6_vavghrnd_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgh($Vu32,$Vv32):rnd", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29927,7 +30725,7 @@ def V6_vavgub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -29939,7 +30737,7 @@ def V6_vavgub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29950,7 +30748,7 @@ def V6_vavgubrnd : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -29962,7 +30760,7 @@ def V6_vavgubrnd_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgub($Vu32,$Vv32):rnd", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29973,7 +30771,7 @@ def V6_vavguh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -29985,7 +30783,7 @@ def V6_vavguh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavguh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -29996,7 +30794,7 @@ def V6_vavguhrnd : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -30008,7 +30806,53 @@ def V6_vavguhrnd_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavguh($Vu32,$Vv32):rnd", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavguw : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw)", +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavguw_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32 = vavguw($Vu32,$Vv32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavguwrnd : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.uw = vavg($Vu32.uw,$Vv32.uw):rnd", +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vavguwrnd_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32 = vavguw($Vu32,$Vv32):rnd", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30019,7 +30863,7 @@ def V6_vavgw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30031,7 +30875,7 @@ def V6_vavgw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30042,7 +30886,7 @@ def V6_vavgwrnd : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -30054,7 +30898,7 @@ def V6_vavgwrnd_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vavgw($Vu32,$Vv32):rnd", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30065,7 +30909,7 @@ def V6_vccombine : HInst< (outs HvxWR:$Vdd32), (ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32), "if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010011; @@ -30078,7 +30922,7 @@ def V6_vcl0h : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.uh = vcl0($Vu32.uh)", -tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -30090,7 +30934,7 @@ def V6_vcl0h_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vcl0h($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30101,7 +30945,7 @@ def V6_vcl0w : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.uw = vcl0($Vu32.uw)", -tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -30113,7 +30957,7 @@ def V6_vcl0w_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vcl0w($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30124,7 +30968,7 @@ def V6_vcmov : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Ps4, HvxVR:$Vu32), "if ($Ps4) $Vd32 = $Vu32", -tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000000000; @@ -30137,7 +30981,7 @@ def V6_vcombine : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vcombine($Vu32,$Vv32)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -30150,7 +30994,18 @@ def V6_vd0 : HInst< (outs HvxVR:$Vd32), (ins), "$Vd32 = #0", -CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vdd0 : HInst< +(outs HvxWR:$Vdd32), +(ins), +"$Vdd32 = #0", +tc_8a6eb39a, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30161,7 +31016,7 @@ def V6_vdeal : HInst< (outs HvxVR:$Vy32, HvxVR:$Vx32), (ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32), "vdeal($Vy32,$Vx32,$Rt32)", -tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -30176,7 +31031,7 @@ def V6_vdealb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.b = vdeal($Vu32.b)", -tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30188,7 +31043,7 @@ def V6_vdealb4w : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vdeale($Vu32.b,$Vv32.b)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -30200,7 +31055,7 @@ def V6_vdealb4w_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vdealb4w($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30211,7 +31066,7 @@ def V6_vdealb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vdealb($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30222,7 +31077,7 @@ def V6_vdealh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vdeal($Vu32.h)", -tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30234,7 +31089,7 @@ def V6_vdealh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vdealh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30245,7 +31100,7 @@ def V6_vdealvdd : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)", -tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -30257,7 +31112,7 @@ def V6_vdelta : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vdelta($Vu32,$Vv32)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -30269,7 +31124,7 @@ def V6_vdmpybus : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -30281,7 +31136,7 @@ def V6_vdmpybus_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -30295,7 +31150,7 @@ def V6_vdmpybus_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vdmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30308,7 +31163,7 @@ def V6_vdmpybus_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vdmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30319,7 +31174,7 @@ def V6_vdmpybus_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -30331,7 +31186,7 @@ def V6_vdmpybus_dv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -30345,7 +31200,7 @@ def V6_vdmpybus_dv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vdmpybus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30358,7 +31213,7 @@ def V6_vdmpybus_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vdmpybus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30369,7 +31224,7 @@ def V6_vdmpyhb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.b)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -30381,7 +31236,7 @@ def V6_vdmpyhb_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.b)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -30395,7 +31250,7 @@ def V6_vdmpyhb_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vdmpyhb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30408,7 +31263,7 @@ def V6_vdmpyhb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vdmpyhb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30419,7 +31274,7 @@ def V6_vdmpyhb_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -30431,7 +31286,7 @@ def V6_vdmpyhb_dv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -30445,7 +31300,7 @@ def V6_vdmpyhb_dv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vdmpyhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30458,7 +31313,7 @@ def V6_vdmpyhb_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vdmpyhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30469,7 +31324,7 @@ def V6_vdmpyhisat : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -30481,7 +31336,7 @@ def V6_vdmpyhisat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -30495,7 +31350,7 @@ def V6_vdmpyhisat_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vx32 += vdmpyh($Vuu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30508,7 +31363,7 @@ def V6_vdmpyhisat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vd32 = vdmpyh($Vuu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30519,7 +31374,7 @@ def V6_vdmpyhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -30531,7 +31386,7 @@ def V6_vdmpyhsat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -30545,7 +31400,7 @@ def V6_vdmpyhsat_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vdmpyh($Vu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30558,7 +31413,7 @@ def V6_vdmpyhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vdmpyh($Vu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30569,7 +31424,7 @@ def V6_vdmpyhsuisat : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -30581,7 +31436,7 @@ def V6_vdmpyhsuisat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -30595,7 +31450,7 @@ def V6_vdmpyhsuisat_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30608,7 +31463,7 @@ def V6_vdmpyhsuisat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30619,7 +31474,7 @@ def V6_vdmpyhsusat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -30631,7 +31486,7 @@ def V6_vdmpyhsusat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -30645,7 +31500,7 @@ def V6_vdmpyhsusat_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vdmpyhsu($Vu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30658,7 +31513,7 @@ def V6_vdmpyhsusat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vdmpyhsu($Vu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30669,7 +31524,7 @@ def V6_vdmpyhvsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -30681,7 +31536,7 @@ def V6_vdmpyhvsat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -30695,7 +31550,7 @@ def V6_vdmpyhvsat_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vdmpyh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30708,7 +31563,7 @@ def V6_vdmpyhvsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vdmpyh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30719,7 +31574,7 @@ def V6_vdsaduh : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -30731,7 +31586,7 @@ def V6_vdsaduh_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -30745,7 +31600,7 @@ def V6_vdsaduh_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vdsaduh($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -30758,7 +31613,7 @@ def V6_vdsaduh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vdsaduh($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30769,7 +31624,7 @@ def V6_veqb : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -30781,12 +31636,10 @@ def V6_veqb_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30794,12 +31647,10 @@ def V6_veqb_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -30808,12 +31659,10 @@ def V6_veqb_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30821,7 +31670,7 @@ def V6_veqh : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -30833,12 +31682,10 @@ def V6_veqh_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30846,12 +31693,10 @@ def V6_veqh_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -30860,12 +31705,10 @@ def V6_veqh_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30873,7 +31716,7 @@ def V6_veqw : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -30885,12 +31728,10 @@ def V6_veqw_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30898,12 +31739,10 @@ def V6_veqw_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -30912,20 +31751,114 @@ def V6_veqw_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgathermh : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), +"vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h", +tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> { +let Inst{12-5} = 0b00001000; +let Inst{31-21} = 0b00101111000; let hasNewValue = 1; let opNewValue = 0; +let accessSize = HalfWordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgathermhq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), +"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vv32.h).h", +tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> { +let Inst{12-7} = 0b001010; +let Inst{31-21} = 0b00101111000; +let hasNewValue = 1; +let opNewValue = 0; +let accessSize = HalfWordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgathermhw : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32), +"vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h", +tc_bfe309d5, TypeCVI_GATHER>, Enc_28dcbb, Requires<[UseHVXV65]> { +let Inst{12-5} = 0b00010000; +let Inst{31-21} = 0b00101111000; +let hasNewValue = 1; +let opNewValue = 0; +let accessSize = HalfWordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgathermhwq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32), +"if ($Qs4) vtmp.h = vgather($Rt32,$Mu2,$Vvv32.w).h", +tc_98733e9d, TypeCVI_GATHER>, Enc_4e4a80, Requires<[UseHVXV65]> { +let Inst{12-7} = 0b001100; +let Inst{31-21} = 0b00101111000; +let hasNewValue = 1; +let opNewValue = 0; +let accessSize = HalfWordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgathermw : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), +"vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w", +tc_66bb62ea, TypeCVI_GATHER>, Enc_8b8927, Requires<[UseHVXV65]> { +let Inst{12-5} = 0b00000000; +let Inst{31-21} = 0b00101111000; +let hasNewValue = 1; +let opNewValue = 0; +let accessSize = WordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgathermwq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), +"if ($Qs4) vtmp.w = vgather($Rt32,$Mu2,$Vv32.w).w", +tc_63e3d94c, TypeCVI_GATHER>, Enc_158beb, Requires<[UseHVXV65]> { +let Inst{12-7} = 0b001000; +let Inst{31-21} = 0b00101111000; +let hasNewValue = 1; +let opNewValue = 0; +let accessSize = WordAccess; +let isCVLoad = 1; +let hasTmpDst = 1; +let mayLoad = 1; +let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; -let Constraints = "$Qx4 = $Qx4in"; } def V6_vgtb : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -30937,12 +31870,10 @@ def V6_vgtb_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30950,12 +31881,10 @@ def V6_vgtb_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -30964,12 +31893,10 @@ def V6_vgtb_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30977,7 +31904,7 @@ def V6_vgth : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -30989,12 +31916,10 @@ def V6_vgth_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31002,12 +31927,10 @@ def V6_vgth_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -31016,12 +31939,10 @@ def V6_vgth_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31029,7 +31950,7 @@ def V6_vgtub : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -31041,12 +31962,10 @@ def V6_vgtub_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31054,12 +31973,10 @@ def V6_vgtub_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -31068,12 +31985,10 @@ def V6_vgtub_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31081,7 +31996,7 @@ def V6_vgtuh : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -31093,12 +32008,10 @@ def V6_vgtuh_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31106,12 +32019,10 @@ def V6_vgtuh_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b011001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -31120,12 +32031,10 @@ def V6_vgtuh_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b101001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31133,7 +32042,7 @@ def V6_vgtuw : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -31145,12 +32054,10 @@ def V6_vgtuw_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31158,12 +32065,10 @@ def V6_vgtuw_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b011010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -31172,12 +32077,10 @@ def V6_vgtuw_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b101010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31185,7 +32088,7 @@ def V6_vgtw : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -31197,12 +32100,10 @@ def V6_vgtw_and : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31210,12 +32111,10 @@ def V6_vgtw_or : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b010110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let isAccumulator = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; @@ -31224,12 +32123,10 @@ def V6_vgtw_xor : HInst< (outs HvxQR:$Qx4), (ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)", -tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV60]> { let Inst{7-2} = 0b100110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; -let hasNewValue = 1; -let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -31237,7 +32134,7 @@ def V6_vhist : HInst< (outs), (ins), "vhist", -tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV60T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV60]> { let Inst{13-0} = 0b10000010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -31246,7 +32143,7 @@ def V6_vhistq : HInst< (outs), (ins HvxQR:$Qv4), "vhist($Qv4)", -tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV60T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV60]> { let Inst{13-0} = 0b10000010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -31256,7 +32153,7 @@ def V6_vinsertwr : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, IntRegs:$Rt32), "$Vx32.w = vinsert($Rt32)", -tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[UseHVXV60]> { let Inst{13-5} = 0b100000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -31268,7 +32165,7 @@ def V6_vlalignb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vlalign($Vu32,$Vv32,$Rt8)", -tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -31280,7 +32177,7 @@ def V6_vlalignbi : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vd32 = vlalign($Vu32,$Vv32,#$Ii)", -tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV60]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -31291,7 +32188,7 @@ def V6_vlsrb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.ub = vlsr($Vu32.ub,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -31303,7 +32200,7 @@ def V6_vlsrh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.uh = vlsr($Vu32.uh,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -31315,7 +32212,7 @@ def V6_vlsrh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vlsrh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31326,7 +32223,7 @@ def V6_vlsrhv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vlsr($Vu32.h,$Vv32.h)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -31338,7 +32235,7 @@ def V6_vlsrhv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vlsrh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31349,7 +32246,7 @@ def V6_vlsrw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vlsr($Vu32.uw,$Rt32)", -tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -31361,7 +32258,7 @@ def V6_vlsrw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vlsrw($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31372,7 +32269,7 @@ def V6_vlsrwv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vlsr($Vu32.w,$Vv32.w)", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -31384,18 +32281,30 @@ def V6_vlsrwv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vlsrw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vlut4 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vd32.h = vlut4($Vu32.uh,$Rtt32.h)", +tc_fa99dc24, TypeCVI_VX_DV>, Enc_263841, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011001011; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vlutvvb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)", -tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -31407,7 +32316,7 @@ def V6_vlutvvb_nm : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch", -tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -31419,7 +32328,7 @@ def V6_vlutvvb_oracc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)", -tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -31433,7 +32342,7 @@ def V6_vlutvvb_oracci : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)", -tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[UseHVXV62]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100110; let hasNewValue = 1; @@ -31446,7 +32355,7 @@ def V6_vlutvvbi : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)", -tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[UseHVXV62]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -31457,7 +32366,7 @@ def V6_vlutvwh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)", -tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -31469,7 +32378,7 @@ def V6_vlutvwh_nm : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch", -tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -31481,7 +32390,7 @@ def V6_vlutvwh_oracc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)", -tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -31495,7 +32404,7 @@ def V6_vlutvwh_oracci : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)", -tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[UseHVXV62]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100111; let hasNewValue = 1; @@ -31508,7 +32417,7 @@ def V6_vlutvwhi : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, u3_0Imm:$Ii), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)", -tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[UseHVXV62]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -31519,7 +32428,7 @@ def V6_vmaxb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vmax($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -31531,7 +32440,7 @@ def V6_vmaxb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmaxb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31542,7 +32451,7 @@ def V6_vmaxh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmax($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31554,7 +32463,7 @@ def V6_vmaxh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmaxh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31565,7 +32474,7 @@ def V6_vmaxub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31577,7 +32486,7 @@ def V6_vmaxub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmaxub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31588,7 +32497,7 @@ def V6_vmaxuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31600,7 +32509,7 @@ def V6_vmaxuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmaxuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31611,7 +32520,7 @@ def V6_vmaxw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmax($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -31623,7 +32532,7 @@ def V6_vmaxw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmaxw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31634,7 +32543,7 @@ def V6_vminb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vmin($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -31646,7 +32555,7 @@ def V6_vminb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vminb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31657,7 +32566,7 @@ def V6_vminh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmin($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31669,7 +32578,7 @@ def V6_vminh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vminh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31680,7 +32589,7 @@ def V6_vminub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31692,7 +32601,7 @@ def V6_vminub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vminub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31703,7 +32612,7 @@ def V6_vminuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31715,7 +32624,7 @@ def V6_vminuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vminuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31726,7 +32635,7 @@ def V6_vminw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmin($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31738,7 +32647,7 @@ def V6_vminw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vminw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31749,7 +32658,7 @@ def V6_vmpabus : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -31761,7 +32670,7 @@ def V6_vmpabus_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -31775,7 +32684,7 @@ def V6_vmpabus_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vmpabus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -31788,7 +32697,7 @@ def V6_vmpabus_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vmpabus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31799,7 +32708,7 @@ def V6_vmpabusv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -31811,7 +32720,57 @@ def V6_vmpabusv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vmpabus($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpabuu : HInst< +(outs HvxWR:$Vdd32), +(ins HvxWR:$Vuu32, IntRegs:$Rt32), +"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.ub)", +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011001011; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpabuu_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), +"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.ub)", +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001101; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vmpabuu_acc_alt : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), +"$Vxx32 += vmpabuu($Vuu32,$Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vmpabuu_alt : HInst< +(outs HvxWR:$Vdd32), +(ins HvxWR:$Vuu32, IntRegs:$Rt32), +"$Vdd32 = vmpabuu($Vuu32,$Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31822,7 +32781,7 @@ def V6_vmpabuuv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -31834,7 +32793,7 @@ def V6_vmpabuuv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vmpabuu($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31845,7 +32804,7 @@ def V6_vmpahb : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -31857,7 +32816,7 @@ def V6_vmpahb_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -31871,7 +32830,7 @@ def V6_vmpahb_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vmpahb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -31884,18 +32843,31 @@ def V6_vmpahb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vmpahb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpahhsat : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vx32.h = vmpa($Vx32in.h,$Vu32.h,$Rtt32.h):sat", +tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001100; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vmpauhb : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV62T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV62]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -31907,7 +32879,7 @@ def V6_vmpauhb_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV62T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV62]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -31921,7 +32893,7 @@ def V6_vmpauhb_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vmpauhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -31934,18 +32906,44 @@ def V6_vmpauhb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vmpauhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpauhuhsat : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vx32.h = vmpa($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat", +tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001100; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} +def V6_vmpsuhuhsat : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vx32.h = vmps($Vx32in.h,$Vu32.uh,$Rtt32.uh):sat", +tc_7474003e, TypeCVI_VX_DV>, Enc_310ba1, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001100; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vmpybus : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -31957,7 +32955,7 @@ def V6_vmpybus_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -31971,7 +32969,7 @@ def V6_vmpybus_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32 += vmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -31984,7 +32982,7 @@ def V6_vmpybus_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32 = vmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -31995,7 +32993,7 @@ def V6_vmpybusv : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32007,7 +33005,7 @@ def V6_vmpybusv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -32021,7 +33019,7 @@ def V6_vmpybusv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpybus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32034,7 +33032,7 @@ def V6_vmpybusv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpybus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32045,7 +33043,7 @@ def V6_vmpybv : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.h = vmpy($Vu32.b,$Vv32.b)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32057,7 +33055,7 @@ def V6_vmpybv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.h += vmpy($Vu32.b,$Vv32.b)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -32071,7 +33069,7 @@ def V6_vmpybv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32084,7 +33082,7 @@ def V6_vmpybv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpyb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32095,7 +33093,7 @@ def V6_vmpyewuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpye($Vu32.w,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -32107,7 +33105,7 @@ def V6_vmpyewuh_64 : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpye($Vu32.w,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV62T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV62]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -32119,7 +33117,7 @@ def V6_vmpyewuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyewuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32130,7 +33128,7 @@ def V6_vmpyh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32.w = vmpy($Vu32.h,$Rt32.h)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -32138,11 +33136,38 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpyh_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vxx32.w += vmpy($Vu32.h,$Rt32.h)", +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001101; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vmpyh_acc_alt : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vxx32 += vmpyh($Vu32,$Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} def V6_vmpyh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32 = vmpyh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32153,7 +33178,7 @@ def V6_vmpyhsat_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -32167,7 +33192,7 @@ def V6_vmpyhsat_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32 += vmpyh($Vu32,$Rt32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32180,7 +33205,7 @@ def V6_vmpyhsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -32192,7 +33217,7 @@ def V6_vmpyhsrs_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32203,7 +33228,7 @@ def V6_vmpyhss : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -32215,7 +33240,7 @@ def V6_vmpyhss_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32226,7 +33251,7 @@ def V6_vmpyhus : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32238,7 +33263,7 @@ def V6_vmpyhus_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32252,7 +33277,7 @@ def V6_vmpyhus_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyhus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32265,7 +33290,7 @@ def V6_vmpyhus_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpyhus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32276,7 +33301,7 @@ def V6_vmpyhv : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.h)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32288,7 +33313,7 @@ def V6_vmpyhv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.h)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -32302,7 +33327,7 @@ def V6_vmpyhv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32315,7 +33340,7 @@ def V6_vmpyhv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpyh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32326,7 +33351,7 @@ def V6_vmpyhvsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32338,7 +33363,7 @@ def V6_vmpyhvsrs_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32349,7 +33374,7 @@ def V6_vmpyieoh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -32361,7 +33386,7 @@ def V6_vmpyiewh_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.h)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -32375,7 +33400,7 @@ def V6_vmpyiewh_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vmpyiewh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32388,7 +33413,7 @@ def V6_vmpyiewuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -32400,7 +33425,7 @@ def V6_vmpyiewuh_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32414,7 +33439,7 @@ def V6_vmpyiewuh_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vmpyiewuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32427,7 +33452,7 @@ def V6_vmpyiewuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyiewuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32438,7 +33463,7 @@ def V6_vmpyih : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmpyi($Vu32.h,$Vv32.h)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32450,7 +33475,7 @@ def V6_vmpyih_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.h += vmpyi($Vu32.h,$Vv32.h)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32464,7 +33489,7 @@ def V6_vmpyih_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vmpyih($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32477,7 +33502,7 @@ def V6_vmpyih_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyih($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32488,7 +33513,7 @@ def V6_vmpyihb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpyi($Vu32.h,$Rt32.b)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -32500,7 +33525,7 @@ def V6_vmpyihb_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.h += vmpyi($Vu32.h,$Rt32.b)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -32514,7 +33539,7 @@ def V6_vmpyihb_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vmpyihb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32527,7 +33552,7 @@ def V6_vmpyihb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyihb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32538,7 +33563,7 @@ def V6_vmpyiowh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpyio($Vu32.w,$Vv32.h)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -32550,7 +33575,7 @@ def V6_vmpyiowh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyiowh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32561,7 +33586,7 @@ def V6_vmpyiwb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.b)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -32573,7 +33598,7 @@ def V6_vmpyiwb_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.b)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -32587,7 +33612,7 @@ def V6_vmpyiwb_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vmpyiwb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32600,7 +33625,7 @@ def V6_vmpyiwb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyiwb($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32611,7 +33636,7 @@ def V6_vmpyiwh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.h)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -32623,7 +33648,7 @@ def V6_vmpyiwh_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.h)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -32637,7 +33662,7 @@ def V6_vmpyiwh_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vmpyiwh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32650,7 +33675,7 @@ def V6_vmpyiwh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyiwh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32661,7 +33686,7 @@ def V6_vmpyiwub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV62]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -32673,7 +33698,7 @@ def V6_vmpyiwub_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV62T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -32687,7 +33712,7 @@ def V6_vmpyiwub_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vmpyiwub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32700,7 +33725,7 @@ def V6_vmpyiwub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vmpyiwub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32711,7 +33736,7 @@ def V6_vmpyowh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -32723,7 +33748,7 @@ def V6_vmpyowh_64_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyo($Vu32.w,$Vv32.h)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32737,7 +33762,7 @@ def V6_vmpyowh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32748,7 +33773,7 @@ def V6_vmpyowh_rnd : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -32760,7 +33785,7 @@ def V6_vmpyowh_rnd_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32771,7 +33796,7 @@ def V6_vmpyowh_rnd_sacc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32785,7 +33810,7 @@ def V6_vmpyowh_rnd_sacc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vmpyowh($Vu32,$Vv32):<<1:rnd:sat:shift", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32797,7 +33822,7 @@ def V6_vmpyowh_sacc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32811,7 +33836,7 @@ def V6_vmpyowh_sacc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vmpyowh($Vu32,$Vv32):<<1:sat:shift", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32823,7 +33848,7 @@ def V6_vmpyub : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001110; @@ -32835,7 +33860,7 @@ def V6_vmpyub_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -32849,7 +33874,7 @@ def V6_vmpyub_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32 += vmpyub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32862,7 +33887,7 @@ def V6_vmpyub_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32 = vmpyub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32873,7 +33898,7 @@ def V6_vmpyubv : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32885,7 +33910,7 @@ def V6_vmpyubv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -32899,7 +33924,7 @@ def V6_vmpyubv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32912,7 +33937,7 @@ def V6_vmpyubv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpyub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -32923,7 +33948,7 @@ def V6_vmpyuh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -32935,7 +33960,7 @@ def V6_vmpyuh_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -32949,7 +33974,7 @@ def V6_vmpyuh_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vxx32 += vmpyuh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -32962,18 +33987,44 @@ def V6_vmpyuh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vdd32 = vmpyuh($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpyuhe : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, IntRegs:$Rt32), +"$Vd32.uw = vmpye($Vu32.uh,$Rt32.uh)", +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011001011; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpyuhe_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), +"$Vx32.uw += vmpye($Vu32.uh,$Rt32.uh)", +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001100; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vmpyuhv : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32985,7 +34036,7 @@ def V6_vmpyuhv_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -32999,7 +34050,7 @@ def V6_vmpyuhv_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vxx32 += vmpyuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33012,7 +34063,7 @@ def V6_vmpyuhv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vmpyuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33023,7 +34074,7 @@ def V6_vmux : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vmux($Qt4,$Vu32,$Vv32)", -tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110111; @@ -33031,11 +34082,34 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vnavgb : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.b = vnavg($Vu32.b,$Vv32.b)", +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111000; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vnavgb_alt : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32 = vnavgb($Vu32,$Vv32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vnavgh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vnavg($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -33047,7 +34121,7 @@ def V6_vnavgh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vnavgh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33058,7 +34132,7 @@ def V6_vnavgub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -33070,7 +34144,7 @@ def V6_vnavgub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vnavgub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33081,7 +34155,7 @@ def V6_vnavgw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vnavg($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -33093,7 +34167,7 @@ def V6_vnavgw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vnavgw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33104,7 +34178,7 @@ def V6_vnccombine : HInst< (outs HvxWR:$Vdd32), (ins PredRegs:$Ps4, HvxVR:$Vu32, HvxVR:$Vv32), "if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010010; @@ -33118,7 +34192,7 @@ def V6_vncmov : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Ps4, HvxVR:$Vu32), "if (!$Ps4) $Vd32 = $Vu32", -tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000100000; @@ -33132,7 +34206,7 @@ def V6_vnormamth : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vnormamt($Vu32.h)", -tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -33144,7 +34218,7 @@ def V6_vnormamth_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vnormamth($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33155,7 +34229,7 @@ def V6_vnormamtw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.w = vnormamt($Vu32.w)", -tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -33167,7 +34241,7 @@ def V6_vnormamtw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vnormamtw($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33178,7 +34252,7 @@ def V6_vnot : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vnot($Vu32)", -tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -33190,7 +34264,7 @@ def V6_vor : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vor($Vu32,$Vv32)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -33202,7 +34276,7 @@ def V6_vpackeb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vpacke($Vu32.h,$Vv32.h)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -33214,7 +34288,7 @@ def V6_vpackeb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackeb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33225,7 +34299,7 @@ def V6_vpackeh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vpacke($Vu32.w,$Vv32.w)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -33237,7 +34311,7 @@ def V6_vpackeh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackeh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33248,7 +34322,7 @@ def V6_vpackhb_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vpack($Vu32.h,$Vv32.h):sat", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -33260,7 +34334,7 @@ def V6_vpackhb_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackhb($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33271,7 +34345,7 @@ def V6_vpackhub_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -33283,7 +34357,7 @@ def V6_vpackhub_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackhub($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33294,7 +34368,7 @@ def V6_vpackob : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vpacko($Vu32.h,$Vv32.h)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -33306,7 +34380,7 @@ def V6_vpackob_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackob($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33317,7 +34391,7 @@ def V6_vpackoh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vpacko($Vu32.w,$Vv32.w)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -33329,7 +34403,7 @@ def V6_vpackoh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackoh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33340,7 +34414,7 @@ def V6_vpackwh_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vpack($Vu32.w,$Vv32.w):sat", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -33352,7 +34426,7 @@ def V6_vpackwh_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackwh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33363,7 +34437,7 @@ def V6_vpackwuh_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -33375,7 +34449,7 @@ def V6_vpackwuh_sat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vpackwuh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33386,7 +34460,7 @@ def V6_vpopcounth : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vpopcount($Vu32.h)", -tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -33398,18 +34472,54 @@ def V6_vpopcounth_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vpopcounth($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vprefixqb : HInst< +(outs HvxVR:$Vd32), +(ins HvxQR:$Qv4), +"$Vd32.b = prefixsum($Qv4)", +tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> { +let Inst{13-5} = 0b100000010; +let Inst{21-16} = 0b000011; +let Inst{31-24} = 0b00011110; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vprefixqh : HInst< +(outs HvxVR:$Vd32), +(ins HvxQR:$Qv4), +"$Vd32.h = prefixsum($Qv4)", +tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> { +let Inst{13-5} = 0b100001010; +let Inst{21-16} = 0b000011; +let Inst{31-24} = 0b00011110; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vprefixqw : HInst< +(outs HvxVR:$Vd32), +(ins HvxQR:$Qv4), +"$Vd32.w = prefixsum($Qv4)", +tc_d2cb81ea, TypeCVI_VS>, Enc_6f83e7, Requires<[UseHVXV65]> { +let Inst{13-5} = 0b100010010; +let Inst{21-16} = 0b000011; +let Inst{31-24} = 0b00011110; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vrdelta : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrdelta($Vu32,$Vv32)", -tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -33417,11 +34527,61 @@ let hasNewValue = 1; let opNewValue = 0; let DecoderNamespace = "EXT_mmvec"; } +def V6_vrmpybub_rtt : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)", +tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011001110; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vrmpybub_rtt_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)", +tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001101; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vrmpybub_rtt_acc_alt : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vrmpybub_rtt_alt : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vrmpybus : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -33433,7 +34593,7 @@ def V6_vrmpybus_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -33447,7 +34607,7 @@ def V6_vrmpybus_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vrmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33460,7 +34620,7 @@ def V6_vrmpybus_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vrmpybus($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33471,7 +34631,7 @@ def V6_vrmpybusi : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -33483,7 +34643,7 @@ def V6_vrmpybusi_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -33497,7 +34657,7 @@ def V6_vrmpybusi_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33510,7 +34670,7 @@ def V6_vrmpybusi_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33521,7 +34681,7 @@ def V6_vrmpybusv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -33533,7 +34693,7 @@ def V6_vrmpybusv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -33547,7 +34707,7 @@ def V6_vrmpybusv_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vrmpybus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33560,7 +34720,7 @@ def V6_vrmpybusv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrmpybus($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33571,7 +34731,7 @@ def V6_vrmpybv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vrmpy($Vu32.b,$Vv32.b)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -33583,7 +34743,7 @@ def V6_vrmpybv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -33597,7 +34757,7 @@ def V6_vrmpybv_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vrmpyb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33610,7 +34770,7 @@ def V6_vrmpybv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrmpyb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33621,7 +34781,7 @@ def V6_vrmpyub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)", -tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -33633,7 +34793,7 @@ def V6_vrmpyub_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)", -tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -33647,7 +34807,7 @@ def V6_vrmpyub_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32 += vrmpyub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33660,7 +34820,57 @@ def V6_vrmpyub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vrmpyub($Vu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vrmpyub_rtt : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)", +tc_a807365d, TypeCVI_VS_VX>, Enc_cb785b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011001110; +let hasNewValue = 1; +let opNewValue = 0; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vrmpyub_rtt_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)", +tc_ee927c0e, TypeCVI_VS_VX>, Enc_ad9bef, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011001101; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vrmpyub_rtt_acc_alt : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vrmpyub_rtt_alt : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, DoubleRegs:$Rtt32), +"$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33671,7 +34881,7 @@ def V6_vrmpyubi : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -33683,7 +34893,7 @@ def V6_vrmpyubi_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33697,7 +34907,7 @@ def V6_vrmpyubi_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33710,7 +34920,7 @@ def V6_vrmpyubi_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33721,7 +34931,7 @@ def V6_vrmpyubv : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)", -tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -33733,7 +34943,7 @@ def V6_vrmpyubv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)", -tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -33747,7 +34957,7 @@ def V6_vrmpyubv_acc_alt : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32 += vrmpyub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33760,7 +34970,7 @@ def V6_vrmpyubv_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrmpyub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33771,7 +34981,7 @@ def V6_vror : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32 = vror($Vu32,$Rt32)", -tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { +tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33783,7 +34993,7 @@ def V6_vroundhb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vround($Vu32.h,$Vv32.h):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -33795,7 +35005,7 @@ def V6_vroundhb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vroundhb($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33806,7 +35016,7 @@ def V6_vroundhub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vround($Vu32.h,$Vv32.h):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -33818,7 +35028,7 @@ def V6_vroundhub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vroundhub($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33829,7 +35039,7 @@ def V6_vrounduhub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -33841,7 +35051,7 @@ def V6_vrounduhub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrounduhub($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33852,7 +35062,7 @@ def V6_vrounduwuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -33864,7 +35074,7 @@ def V6_vrounduwuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vrounduwuh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33875,7 +35085,7 @@ def V6_vroundwh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vround($Vu32.w,$Vv32.w):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -33887,7 +35097,7 @@ def V6_vroundwh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vroundwh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33898,7 +35108,7 @@ def V6_vroundwuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vround($Vu32.w,$Vv32.w):sat", -tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -33910,7 +35120,7 @@ def V6_vroundwuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vroundwuh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33921,7 +35131,7 @@ def V6_vrsadubi : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[UseHVXV60]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -33933,7 +35143,7 @@ def V6_vrsadubi_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[UseHVXV60]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -33947,7 +35157,7 @@ def V6_vrsadubi_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -33960,7 +35170,7 @@ def V6_vrsadubi_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33971,7 +35181,7 @@ def V6_vsathub : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vsat($Vu32.h,$Vv32.h)", -tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -33983,7 +35193,7 @@ def V6_vsathub_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsathub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33994,7 +35204,7 @@ def V6_vsatuwuh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34006,7 +35216,7 @@ def V6_vsatuwuh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsatuwuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34017,7 +35227,7 @@ def V6_vsatwh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vsat($Vu32.w,$Vv32.w)", -tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -34029,7 +35239,7 @@ def V6_vsatwh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsatwh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34040,7 +35250,7 @@ def V6_vsb : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.h = vsxt($Vu32.b)", -tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34052,18 +35262,204 @@ def V6_vsb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vsxtb($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vscattermh : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32", +tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b001; +let Inst{31-21} = 0b00101111001; +let accessSize = HalfWordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermh_add : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.h).h += $Vw32", +tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b101; +let Inst{31-21} = 0b00101111001; +let accessSize = HalfWordAccess; +let isAccumulator = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermh_add_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.h) += $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermh_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermhq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h).h = $Vw32", +tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> { +let Inst{7-7} = 0b1; +let Inst{31-21} = 0b00101111100; +let accessSize = HalfWordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermhq_alt : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermhw : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32", +tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b010; +let Inst{31-21} = 0b00101111001; +let accessSize = HalfWordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermhw_add : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vvv32.w).h += $Vw32", +tc_ec58f88a, TypeCVI_SCATTER_DV>, Enc_a641d0, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b110; +let Inst{31-21} = 0b00101111001; +let accessSize = HalfWordAccess; +let isAccumulator = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermhwq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w).h = $Vw32", +tc_94f43c04, TypeCVI_SCATTER_DV>, Enc_3d6d37, Requires<[UseHVXV65]> { +let Inst{7-7} = 0b0; +let Inst{31-21} = 0b00101111101; +let accessSize = HalfWordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermw : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32", +tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b000; +let Inst{31-21} = 0b00101111001; +let accessSize = WordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermw_add : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.w).w += $Vw32", +tc_4f190ba3, TypeCVI_SCATTER>, Enc_16c48b, Requires<[UseHVXV65]> { +let Inst{7-5} = 0b100; +let Inst{31-21} = 0b00101111001; +let accessSize = WordAccess; +let isAccumulator = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermw_add_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.w) += $Vw32.w", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermw_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermwh_add_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vvv32.w) += $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isAccumulator = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermwh_alt : HInst< +(outs), +(ins IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermwhq_alt : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermwq : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w).w = $Vw32", +tc_df54ad52, TypeCVI_SCATTER>, Enc_9be1de, Requires<[UseHVXV65]> { +let Inst{7-7} = 0b0; +let Inst{31-21} = 0b00101111100; +let accessSize = WordAccess; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vscattermwq_alt : HInst< +(outs), +(ins HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32), +"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.w = vsxt($Vu32.h)", -tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34075,7 +35471,7 @@ def V6_vsh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vsxth($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34086,7 +35482,7 @@ def V6_vshufeh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vshuffe($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34098,7 +35494,7 @@ def V6_vshufeh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vshuffeh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34109,7 +35505,7 @@ def V6_vshuff : HInst< (outs HvxVR:$Vy32, HvxVR:$Vx32), (ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32), "vshuff($Vy32,$Vx32,$Rt32)", -tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -34124,7 +35520,7 @@ def V6_vshuffb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.b = vshuff($Vu32.b)", -tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34136,7 +35532,7 @@ def V6_vshuffb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vshuffb($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34147,7 +35543,7 @@ def V6_vshuffeb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vshuffe($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34159,7 +35555,7 @@ def V6_vshuffeb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vshuffeb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34170,7 +35566,7 @@ def V6_vshuffh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.h = vshuff($Vu32.h)", -tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -34182,7 +35578,7 @@ def V6_vshuffh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32 = vshuffh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34193,7 +35589,7 @@ def V6_vshuffob : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vshuffo($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34205,7 +35601,7 @@ def V6_vshuffob_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vshuffob($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34216,7 +35612,7 @@ def V6_vshuffvdd : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)", -tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -34228,7 +35624,7 @@ def V6_vshufoeb : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34240,7 +35636,7 @@ def V6_vshufoeb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vshuffoeb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34251,7 +35647,7 @@ def V6_vshufoeh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34263,7 +35659,7 @@ def V6_vshufoeh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vshuffoeh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34274,7 +35670,7 @@ def V6_vshufoh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vshuffo($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34286,7 +35682,7 @@ def V6_vshufoh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vshuffoh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34297,7 +35693,7 @@ def V6_vsubb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -34309,7 +35705,7 @@ def V6_vsubb_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubb($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34320,7 +35716,7 @@ def V6_vsubb_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -34332,7 +35728,7 @@ def V6_vsubb_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubb($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34343,7 +35739,7 @@ def V6_vsubbnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.b -= $Vu32.b", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -34357,7 +35753,7 @@ def V6_vsubbnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.b) $Vx32.b -= $Vu32.b", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34369,7 +35765,7 @@ def V6_vsubbq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.b -= $Vu32.b", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -34383,7 +35779,7 @@ def V6_vsubbq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.b) $Vx32.b -= $Vu32.b", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34395,7 +35791,7 @@ def V6_vsubbsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34407,7 +35803,7 @@ def V6_vsubbsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubb($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34418,7 +35814,7 @@ def V6_vsubbsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -34430,7 +35826,7 @@ def V6_vsubbsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubb($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34441,14 +35837,12 @@ def V6_vsubcarry : HInst< (outs HvxVR:$Vd32, HvxQR:$Qx4), (ins HvxVR:$Vu32, HvxVR:$Vv32, HvxQR:$Qx4in), "$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry", -tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[UseHVXV62]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -34456,7 +35850,7 @@ def V6_vsubh : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -34468,7 +35862,7 @@ def V6_vsubh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34479,7 +35873,7 @@ def V6_vsubh_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -34491,7 +35885,7 @@ def V6_vsubh_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubh($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34502,7 +35896,7 @@ def V6_vsubhnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.h -= $Vu32.h", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -34516,7 +35910,7 @@ def V6_vsubhnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.h) $Vx32.h -= $Vu32.h", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34528,7 +35922,7 @@ def V6_vsubhq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.h -= $Vu32.h", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -34542,7 +35936,7 @@ def V6_vsubhq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.h) $Vx32.h -= $Vu32.h", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34554,7 +35948,7 @@ def V6_vsubhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -34566,7 +35960,7 @@ def V6_vsubhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34577,7 +35971,7 @@ def V6_vsubhsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -34589,7 +35983,7 @@ def V6_vsubhsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubh($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34600,7 +35994,7 @@ def V6_vsubhw : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vsub($Vu32.h,$Vv32.h)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -34612,7 +36006,7 @@ def V6_vsubhw_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vsubh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34623,7 +36017,7 @@ def V6_vsububh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -34635,7 +36029,7 @@ def V6_vsububh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vsubub($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34646,7 +36040,7 @@ def V6_vsububsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -34658,7 +36052,7 @@ def V6_vsububsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubub($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34669,7 +36063,7 @@ def V6_vsububsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -34681,7 +36075,7 @@ def V6_vsububsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubub($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34692,7 +36086,7 @@ def V6_vsubububb_sat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -34704,7 +36098,7 @@ def V6_vsubuhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -34716,7 +36110,7 @@ def V6_vsubuhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubuh($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34727,7 +36121,7 @@ def V6_vsubuhsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -34739,7 +36133,7 @@ def V6_vsubuhsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubuh($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34750,7 +36144,7 @@ def V6_vsubuhw : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)", -tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV60]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -34762,7 +36156,7 @@ def V6_vsubuhw_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vsubuh($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34773,7 +36167,7 @@ def V6_vsubuwsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV62]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -34785,7 +36179,7 @@ def V6_vsubuwsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubuw($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34796,7 +36190,7 @@ def V6_vsubuwsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -34808,7 +36202,7 @@ def V6_vsubuwsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubuw($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34819,7 +36213,7 @@ def V6_vsubw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -34831,7 +36225,7 @@ def V6_vsubw_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubw($Vu32,$Vv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34842,7 +36236,7 @@ def V6_vsubw_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -34854,7 +36248,7 @@ def V6_vsubw_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubw($Vuu32,$Vvv32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34865,7 +36259,7 @@ def V6_vsubwnq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4) $Vx32.w -= $Vu32.w", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -34879,7 +36273,7 @@ def V6_vsubwnq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if (!$Qv4.w) $Vx32.w -= $Vu32.w", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34891,7 +36285,7 @@ def V6_vsubwq : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4) $Vx32.w -= $Vu32.w", -tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -34905,7 +36299,7 @@ def V6_vsubwq_alt : HInst< (outs HvxVR:$Vx32), (ins HvxQR:$Qv4, HvxVR:$Vx32in, HvxVR:$Vu32), "if ($Qv4.w) $Vx32.w -= $Vu32.w", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34917,7 +36311,7 @@ def V6_vsubwsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w):sat", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -34929,7 +36323,7 @@ def V6_vsubwsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vsubw($Vu32,$Vv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34940,7 +36334,7 @@ def V6_vsubwsat_dv : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat", -tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -34952,7 +36346,7 @@ def V6_vsubwsat_dv_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32), "$Vdd32 = vsubw($Vuu32,$Vvv32):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -34963,7 +36357,7 @@ def V6_vswap : HInst< (outs HvxWR:$Vdd32), (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32 = vswap($Qt4,$Vu32,$Vv32)", -tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[HasV60T,UseHVX]> { +tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[UseHVXV60]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110101; @@ -34975,7 +36369,7 @@ def V6_vtmpyb : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34987,7 +36381,7 @@ def V6_vtmpyb_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -35001,7 +36395,7 @@ def V6_vtmpyb_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vtmpyb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -35014,7 +36408,7 @@ def V6_vtmpyb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vtmpyb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35025,7 +36419,7 @@ def V6_vtmpybus : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -35037,7 +36431,7 @@ def V6_vtmpybus_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -35051,7 +36445,7 @@ def V6_vtmpybus_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vtmpybus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -35064,7 +36458,7 @@ def V6_vtmpybus_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vtmpybus($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35075,7 +36469,7 @@ def V6_vtmpyhb : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)", -tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[UseHVXV60]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -35087,7 +36481,7 @@ def V6_vtmpyhb_acc : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)", -tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -35101,7 +36495,7 @@ def V6_vtmpyhb_acc_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxWR:$Vuu32, IntRegs:$Rt32), "$Vxx32 += vtmpyhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -35114,7 +36508,7 @@ def V6_vtmpyhb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, IntRegs:$Rt32), "$Vdd32 = vtmpyhb($Vuu32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35125,7 +36519,7 @@ def V6_vtran2x2_map : HInst< (outs HvxVR:$Vy32, HvxVR:$Vx32), (ins HvxVR:$Vy32in, HvxVR:$Vx32in, IntRegs:$Rt32), "vtrans2x2($Vy32,$Vx32,$Rt32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let hasNewValue2 = 1; @@ -35139,7 +36533,7 @@ def V6_vunpackb : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.h = vunpack($Vu32.b)", -tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -35151,7 +36545,7 @@ def V6_vunpackb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vunpackb($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35162,7 +36556,7 @@ def V6_vunpackh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.w = vunpack($Vu32.h)", -tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -35174,7 +36568,7 @@ def V6_vunpackh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vunpackh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35185,7 +36579,7 @@ def V6_vunpackob : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32), "$Vxx32.h |= vunpacko($Vu32.b)", -tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -35199,7 +36593,7 @@ def V6_vunpackob_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32), "$Vxx32 |= vunpackob($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -35211,7 +36605,7 @@ def V6_vunpackoh : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32), "$Vxx32.w |= vunpacko($Vu32.h)", -tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -35225,7 +36619,7 @@ def V6_vunpackoh_alt : HInst< (outs HvxWR:$Vxx32), (ins HvxWR:$Vxx32in, HvxVR:$Vu32), "$Vxx32 |= vunpackoh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isAccumulator = 1; @@ -35238,7 +36632,7 @@ def V6_vunpackub : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.uh = vunpack($Vu32.ub)", -tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -35250,7 +36644,7 @@ def V6_vunpackub_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vunpackub($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35261,7 +36655,7 @@ def V6_vunpackuh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.uw = vunpack($Vu32.uh)", -tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -35273,7 +36667,7 @@ def V6_vunpackuh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vunpackuh($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35284,7 +36678,7 @@ def V6_vwhist128 : HInst< (outs), (ins), "vwhist128", -tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10010010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -35293,7 +36687,7 @@ def V6_vwhist128m : HInst< (outs), (ins u1_0Imm:$Ii), "vwhist128(#$Ii)", -tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[HasV62T,UseHVX]> { +tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[UseHVXV62]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{31-16} = 0b0001111000000000; @@ -35303,7 +36697,7 @@ def V6_vwhist128q : HInst< (outs), (ins HvxQR:$Qv4), "vwhist128($Qv4)", -tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10010010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -35313,7 +36707,7 @@ def V6_vwhist128qm : HInst< (outs), (ins HvxQR:$Qv4, u1_0Imm:$Ii), "vwhist128($Qv4,#$Ii)", -tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[HasV62T,UseHVX]> { +tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[UseHVXV62]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{21-16} = 0b000010; @@ -35324,7 +36718,7 @@ def V6_vwhist256 : HInst< (outs), (ins), "vwhist256", -tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10001010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -35333,7 +36727,7 @@ def V6_vwhist256_sat : HInst< (outs), (ins), "vwhist256:sat", -tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10001110000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -35342,7 +36736,7 @@ def V6_vwhist256q : HInst< (outs), (ins HvxQR:$Qv4), "vwhist256($Qv4)", -tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10001010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -35352,7 +36746,7 @@ def V6_vwhist256q_sat : HInst< (outs), (ins HvxQR:$Qv4), "vwhist256($Qv4):sat", -tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[UseHVXV62]> { let Inst{13-0} = 0b10001110000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -35362,7 +36756,7 @@ def V6_vxor : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32 = vxor($Vu32,$Vv32)", -tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -35374,7 +36768,7 @@ def V6_vzb : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.uh = vzxt($Vu32.ub)", -tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -35386,7 +36780,7 @@ def V6_vzb_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vzxtb($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35397,7 +36791,7 @@ def V6_vzh : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32.uw = vzxt($Vu32.uh)", -tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -35409,7 +36803,7 @@ def V6_vzh_alt : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32), "$Vdd32 = vzxth($Vu32)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> { +PSEUDO, TypeMAPPING>, Requires<[UseHVXV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -35420,7 +36814,7 @@ def Y2_barrier : HInst< (outs), (ins), "barrier", -tc_ef2676fd, TypeST>, Enc_e3b0c4 { +tc_367f7f3d, TypeST>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b1010100000000000; let isSoloAX = 1; @@ -35430,7 +36824,7 @@ def Y2_break : HInst< (outs), (ins), "brkpt", -tc_bcf0e36e, TypeCR>, Enc_e3b0c4 { +tc_4ca572d4, TypeCR>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0110110000100000; let isSolo = 1; @@ -35439,27 +36833,27 @@ def Y2_dccleana : HInst< (outs), (ins IntRegs:$Rs32), "dccleana($Rs32)", -tc_30665cb0, TypeST>, Enc_ecbcc8 { +tc_00e7c26e, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000000; -let isSoloAin1 = 1; +let isRestrictSlot1AOK = 1; let hasSideEffects = 1; } def Y2_dccleaninva : HInst< (outs), (ins IntRegs:$Rs32), "dccleaninva($Rs32)", -tc_30665cb0, TypeST>, Enc_ecbcc8 { +tc_00e7c26e, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000010; -let isSoloAin1 = 1; +let isRestrictSlot1AOK = 1; let hasSideEffects = 1; } def Y2_dcfetch : HInst< (outs), (ins IntRegs:$Rs32), "dcfetch($Rs32)", -tc_34e882a4, TypeMAPPING> { +tc_3da80ba5, TypeMAPPING> { let hasSideEffects = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -35468,38 +36862,39 @@ def Y2_dcfetchbo : HInst< (outs), (ins IntRegs:$Rs32, u11_3Imm:$Ii), "dcfetch($Rs32+#$Ii)", -tc_ef0ebaaa, TypeLD>, Enc_2d829e { +tc_4d9914c9, TypeLD>, Enc_2d829e { let Inst{13-11} = 0b000; let Inst{31-21} = 0b10010100000; let addrMode = BaseImmOffset; +let isRestrictNoSlot1Store = 1; let hasSideEffects = 1; } def Y2_dcinva : HInst< (outs), (ins IntRegs:$Rs32), "dcinva($Rs32)", -tc_30665cb0, TypeST>, Enc_ecbcc8 { +tc_00e7c26e, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000001; -let isSoloAin1 = 1; +let isRestrictSlot1AOK = 1; let hasSideEffects = 1; } def Y2_dczeroa : HInst< (outs), (ins IntRegs:$Rs32), "dczeroa($Rs32)", -tc_30665cb0, TypeST>, Enc_ecbcc8 { +tc_00e7c26e, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000110; -let isSoloAin1 = 1; -let hasSideEffects = 1; +let isRestrictSlot1AOK = 1; let mayStore = 1; +let hasSideEffects = 1; } def Y2_icinva : HInst< (outs), (ins IntRegs:$Rs32), "icinva($Rs32)", -tc_049dfb74, TypeJ>, Enc_ecbcc8 { +tc_999d32db, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010110110; let isSolo = 1; @@ -35508,7 +36903,7 @@ def Y2_isync : HInst< (outs), (ins), "isync", -tc_d267fa19, TypeJ>, Enc_e3b0c4 { +tc_b13761ae, TypeJ>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000010; let Inst{31-16} = 0b0101011111000000; let isSolo = 1; @@ -35517,7 +36912,7 @@ def Y2_syncht : HInst< (outs), (ins), "syncht", -tc_ef2676fd, TypeST>, Enc_e3b0c4 { +tc_367f7f3d, TypeST>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b1010100001000000; let isSolo = 1; @@ -35526,7 +36921,7 @@ def Y4_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "l2fetch($Rs32,$Rt32)", -tc_f4608adc, TypeST>, Enc_ca3887 { +tc_daa058fa, TypeST>, Enc_ca3887 { let Inst{7-0} = 0b00000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100110000; @@ -35538,7 +36933,7 @@ def Y4_trace : HInst< (outs), (ins IntRegs:$Rs32), "trace($Rs32)", -tc_4997da4a, TypeCR>, Enc_ecbcc8 { +tc_c82dc1ff, TypeCR>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01100010010; let isSoloAX = 1; @@ -35547,7 +36942,7 @@ def Y5_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "l2fetch($Rs32,$Rtt32)", -tc_f4608adc, TypeST>, Enc_e6abcf, Requires<[HasV5T]> { +tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5T]> { let Inst{7-0} = 0b00000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100110100; @@ -35559,7 +36954,7 @@ def dep_A2_addsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32):sat:deprecated", -tc_47ab9233, TypeALU64>, Enc_5ab2be { +tc_b44c6e2a, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101100; @@ -35572,7 +36967,7 @@ def dep_A2_subsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32):sat:deprecated", -tc_47ab9233, TypeALU64>, Enc_bd6011 { +tc_b44c6e2a, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101100; @@ -35585,7 +36980,7 @@ def dep_S2_packhl : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = packhl($Rs32,$Rt32):deprecated", -tc_9c18c9a5, TypeALU64>, Enc_be32a5 { +tc_540fdfbc, TypeALU64>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010100000; diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td index ebef4f10acb8..7a156c39da9c 100644 --- a/lib/Target/Hexagon/HexagonDepMappings.td +++ b/lib/Target/Hexagon/HexagonDepMappings.td @@ -1,4 +1,4 @@ -//===--- HexagonDepMappings.td --------------------------------------------===// +//===- HexagonDepMappings.td ----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,397 +6,470 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + -def A2_negAlias : InstAlias<"$Rd32=neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>; -def A2_notAlias : InstAlias<"$Rd32=not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>; -def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32=$Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; -def A2_tfrfnewAlias : InstAlias<"if (!$Pu4.new) $Rd32=$Rs32", (A2_paddifnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; -def A2_tfrtAlias : InstAlias<"if ($Pu4) $Rd32=$Rs32", (A2_paddit IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; -def A2_tfrtnewAlias : InstAlias<"if ($Pu4.new) $Rd32=$Rs32", (A2_padditnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; -def A2_vaddb_mapAlias : InstAlias<"$Rdd32=vaddb($Rss32,$Rtt32)", (A2_vaddub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>; -def A2_vsubb_mapAlias : InstAlias<"$Rdd32=vsubb($Rss32,$Rtt32)", (A2_vsubub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>; -def A2_zxtbAlias : InstAlias<"$Rd32=zxtb($Rs32)", (A2_andir IntRegs:$Rd32, IntRegs:$Rs32, 255)>; -def C2_cmpltAlias : InstAlias<"$Pd4=cmp.lt($Rs32,$Rt32)", (C2_cmpgt PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>; -def C2_cmpltuAlias : InstAlias<"$Pd4=cmp.ltu($Rs32,$Rt32)", (C2_cmpgtu PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>; -def C2_pxfer_mapAlias : InstAlias<"$Pd4=$Ps4", (C2_or PredRegs:$Pd4, PredRegs:$Ps4, PredRegs:$Ps4)>; +def A2_negAlias : InstAlias<"$Rd32 = neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>; +def A2_notAlias : InstAlias<"$Rd32 = not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>; +def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32 = $Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrfnewAlias : InstAlias<"if (!$Pu4.new) $Rd32 = $Rs32", (A2_paddifnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrtAlias : InstAlias<"if ($Pu4) $Rd32 = $Rs32", (A2_paddit IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_tfrtnewAlias : InstAlias<"if ($Pu4.new) $Rd32 = $Rs32", (A2_padditnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>; +def A2_vaddb_mapAlias : InstAlias<"$Rdd32 = vaddb($Rss32,$Rtt32)", (A2_vaddub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>; +def A2_vsubb_mapAlias : InstAlias<"$Rdd32 = vsubb($Rss32,$Rtt32)", (A2_vsubub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>; +def A2_zxtbAlias : InstAlias<"$Rd32 = zxtb($Rs32)", (A2_andir IntRegs:$Rd32, IntRegs:$Rs32, 255)>; +def C2_cmpltAlias : InstAlias<"$Pd4 = cmp.lt($Rs32,$Rt32)", (C2_cmpgt PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>; +def C2_cmpltuAlias : InstAlias<"$Pd4 = cmp.ltu($Rs32,$Rt32)", (C2_cmpgtu PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>; +def C2_pxfer_mapAlias : InstAlias<"$Pd4 = $Ps4", (C2_or PredRegs:$Pd4, PredRegs:$Ps4, PredRegs:$Ps4)>; def J2_jumpf_nopred_mapAlias : InstAlias<"if (!$Pu4) jump $Ii", (J2_jumpf PredRegs:$Pu4, b30_2Imm:$Ii)>; def J2_jumprf_nopred_mapAlias : InstAlias<"if (!$Pu4) jumpr $Rs32", (J2_jumprf PredRegs:$Pu4, IntRegs:$Rs32)>; def J2_jumprt_nopred_mapAlias : InstAlias<"if ($Pu4) jumpr $Rs32", (J2_jumprt PredRegs:$Pu4, IntRegs:$Rs32)>; def J2_jumpt_nopred_mapAlias : InstAlias<"if ($Pu4) jump $Ii", (J2_jumpt PredRegs:$Pu4, b30_2Imm:$Ii)>; -def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32=memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; -def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32=memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; -def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32=membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadbsw4_zomapAlias : InstAlias<"$Rdd32=membh($Rs32)", (L2_loadbsw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; -def L2_loadbzw2_zomapAlias : InstAlias<"$Rd32=memubh($Rs32)", (L2_loadbzw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadbzw4_zomapAlias : InstAlias<"$Rdd32=memubh($Rs32)", (L2_loadbzw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; -def L2_loadrb_zomapAlias : InstAlias<"$Rd32=memb($Rs32)", (L2_loadrb_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadrd_zomapAlias : InstAlias<"$Rdd32=memd($Rs32)", (L2_loadrd_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; -def L2_loadrh_zomapAlias : InstAlias<"$Rd32=memh($Rs32)", (L2_loadrh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadri_zomapAlias : InstAlias<"$Rd32=memw($Rs32)", (L2_loadri_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadrub_zomapAlias : InstAlias<"$Rd32=memub($Rs32)", (L2_loadrub_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_loadruh_zomapAlias : InstAlias<"$Rd32=memuh($Rs32)", (L2_loadruh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; -def L2_ploadrbf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memb($Rs32)", (L2_ploadrbf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrbfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrbt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memb($Rs32)", (L2_ploadrbt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrbtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrdf_zomapAlias : InstAlias<"if (!$Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdf_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrdfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdfnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrdt_zomapAlias : InstAlias<"if ($Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdt_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrdtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdtnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memh($Rs32)", (L2_ploadrhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memh($Rs32)", (L2_ploadrht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrif_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memw($Rs32)", (L2_ploadrif_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrifnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memw($Rs32)", (L2_ploadrifnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrit_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memw($Rs32)", (L2_ploadrit_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadritnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memw($Rs32)", (L2_ploadritnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrubf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memub($Rs32)", (L2_ploadrubf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrubfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrubt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memub($Rs32)", (L2_ploadrubt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadrubtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadruhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memuh($Rs32)", (L2_ploadruhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadruhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadruht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memuh($Rs32)", (L2_ploadruht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L2_ploadruhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; -def L4_add_memopb_zomapAlias : InstAlias<"memb($Rs32)+=$Rt32", (L4_add_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_add_memoph_zomapAlias : InstAlias<"memh($Rs32)+=$Rt32", (L4_add_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_add_memopw_zomapAlias : InstAlias<"memw($Rs32)+=$Rt32", (L4_add_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_and_memopb_zomapAlias : InstAlias<"memb($Rs32)&=$Rt32", (L4_and_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_and_memoph_zomapAlias : InstAlias<"memh($Rs32)&=$Rt32", (L4_and_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_and_memopw_zomapAlias : InstAlias<"memw($Rs32)&=$Rt32", (L4_and_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_iadd_memopb_zomapAlias : InstAlias<"memb($Rs32)+=#$II", (L4_iadd_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_iadd_memoph_zomapAlias : InstAlias<"memh($Rs32)+=#$II", (L4_iadd_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_iadd_memopw_zomapAlias : InstAlias<"memw($Rs32)+=#$II", (L4_iadd_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_iand_memopb_zomapAlias : InstAlias<"memb($Rs32)=clrbit(#$II)", (L4_iand_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_iand_memoph_zomapAlias : InstAlias<"memh($Rs32)=clrbit(#$II)", (L4_iand_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_iand_memopw_zomapAlias : InstAlias<"memw($Rs32)=clrbit(#$II)", (L4_iand_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_ior_memopb_zomapAlias : InstAlias<"memb($Rs32)=setbit(#$II)", (L4_ior_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_ior_memoph_zomapAlias : InstAlias<"memh($Rs32)=setbit(#$II)", (L4_ior_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_ior_memopw_zomapAlias : InstAlias<"memw($Rs32)=setbit(#$II)", (L4_ior_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_isub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=#$II", (L4_isub_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_isub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=#$II", (L4_isub_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_isub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=#$II", (L4_isub_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; -def L4_or_memopb_zomapAlias : InstAlias<"memb($Rs32)|=$Rt32", (L4_or_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_or_memoph_zomapAlias : InstAlias<"memh($Rs32)|=$Rt32", (L4_or_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_or_memopw_zomapAlias : InstAlias<"memw($Rs32)|=$Rt32", (L4_or_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_sub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=$Rt32", (L4_sub_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_sub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=$Rt32", (L4_sub_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def L4_sub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=$Rt32", (L4_sub_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def M2_mpyuiAlias : InstAlias<"$Rd32=mpyui($Rs32,$Rt32)", (M2_mpyi IntRegs:$Rd32, IntRegs:$Rs32, IntRegs:$Rt32)>; -def S2_pstorerbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Rt32", (S2_pstorerbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerbnewf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerbnewt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Rt32", (S2_pstorerbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerdf_zomapAlias : InstAlias<"if (!$Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; -def S2_pstorerdt_zomapAlias : InstAlias<"if ($Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; -def S2_pstorerff_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerff_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerft_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerft_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32", (S2_pstorerhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerhnewf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerhnewt_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32", (S2_pstorerht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Rt32", (S2_pstorerif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_pstorerinewf_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerinewt_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_pstorerit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Rt32", (S2_pstorerit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_storerb_zomapAlias : InstAlias<"memb($Rs32)=$Rt32", (S2_storerb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_storerbnew_zomapAlias : InstAlias<"memb($Rs32)=$Nt8.new", (S2_storerbnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_storerd_zomapAlias : InstAlias<"memd($Rs32)=$Rtt32", (S2_storerd_io IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; -def S2_storerf_zomapAlias : InstAlias<"memh($Rs32)=$Rt32.h", (S2_storerf_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_storerh_zomapAlias : InstAlias<"memh($Rs32)=$Rt32", (S2_storerh_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_storerhnew_zomapAlias : InstAlias<"memh($Rs32)=$Nt8.new", (S2_storerhnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_storeri_zomapAlias : InstAlias<"memw($Rs32)=$Rt32", (S2_storeri_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S2_storerinew_zomapAlias : InstAlias<"memw($Rs32)=$Nt8.new", (S2_storerinew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S2_tableidxb_goodsyntaxAlias : InstAlias<"$Rx32=tableidxb($Rs32,#$Ii,#$II)", (S2_tableidxb IntRegs:$Rx32, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II)>; -def S4_pstorerbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerbnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstorerbnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstorerbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerdfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; -def S4_pstorerdtnew_zomapAlias : InstAlias<"if ($Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; -def S4_pstorerffnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerffnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerftnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerftnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerhnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstorerhnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstorerhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Rt32", (S4_pstorerifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_pstorerinewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstorerinewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; -def S4_pstoreritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Rt32", (S4_pstoreritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; -def S4_storeirb_zomapAlias : InstAlias<"memb($Rs32)=#$II", (S4_storeirb_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=#$II", (S4_storeirbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=#$II", (S4_storeirbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=#$II", (S4_storeirbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=#$II", (S4_storeirbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirh_zomapAlias : InstAlias<"memh($Rs32)=#$II", (S4_storeirh_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=#$II", (S4_storeirhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=#$II", (S4_storeirhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=#$II", (S4_storeirht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=#$II", (S4_storeirhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeiri_zomapAlias : InstAlias<"memw($Rs32)=#$II", (S4_storeiri_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=#$II", (S4_storeirif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=#$II", (S4_storeirifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeirit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=#$II", (S4_storeirit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def S4_storeiritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=#$II", (S4_storeiritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; -def V6_MAP_equbAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equb_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equb_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equb_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equhAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equh_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equh_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equh_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equwAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equw_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equw_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_MAP_equw_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_extractw_altAlias : InstAlias<"$Rd32.w=vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, HvxVR:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>; -def V6_ld0Alias : InstAlias<"$Vd32=vmem($Rt32)", (V6_vL32b_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; -def V6_ldnt0Alias : InstAlias<"$Vd32=vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; -def V6_ldu0Alias : InstAlias<"$Vd32=vmemu($Rt32)", (V6_vL32Ub_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; -def V6_st0Alias : InstAlias<"vmem($Rt32)=$Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stn0Alias : InstAlias<"vmem($Rt32)=$Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, HvxVR:$Os8)>, Requires<[UseHVX]>; -def V6_stnnt0Alias : InstAlias<"vmem($Rt32):nt=$Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, HvxVR:$Os8)>, Requires<[UseHVX]>; -def V6_stnp0Alias : InstAlias<"if (!$Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stnpnt0Alias : InstAlias<"if (!$Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stnq0Alias : InstAlias<"if (!$Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_nqpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stnqnt0Alias : InstAlias<"if (!$Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_nqpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stnt0Alias : InstAlias<"vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stp0Alias : InstAlias<"if ($Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stpnt0Alias : InstAlias<"if ($Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stq0Alias : InstAlias<"if ($Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_qpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stqnt0Alias : InstAlias<"if ($Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_qpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stu0Alias : InstAlias<"vmemu($Rt32)=$Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stunp0Alias : InstAlias<"if (!$Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_stup0Alias : InstAlias<"if ($Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; -def V6_vabsdiffh_altAlias : InstAlias<"$Vd32=vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vabsdiffub_altAlias : InstAlias<"$Vd32=vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vabsdiffuh_altAlias : InstAlias<"$Vd32=vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vabsdiffw_altAlias : InstAlias<"$Vd32=vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vabsh_altAlias : InstAlias<"$Vd32=vabsh($Vu32)", (V6_vabsh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vabsh_sat_altAlias : InstAlias<"$Vd32=vabsh($Vu32):sat", (V6_vabsh_sat HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vabsuh_altAlias : InstAlias<"$Vd32.uh=vabs($Vu32.h)", (V6_vabsh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vabsuw_altAlias : InstAlias<"$Vd32.uw=vabs($Vu32.w)", (V6_vabsw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vabsw_altAlias : InstAlias<"$Vd32=vabsw($Vu32)", (V6_vabsw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vabsw_sat_altAlias : InstAlias<"$Vd32=vabsw($Vu32):sat", (V6_vabsw_sat HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddb_altAlias : InstAlias<"$Vd32=vaddb($Vu32,$Vv32)", (V6_vaddb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddb_dv_altAlias : InstAlias<"$Vdd32=vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vaddbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddh_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32)", (V6_vaddh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddh_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vaddhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddhsat_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32):sat", (V6_vaddhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddhsat_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vaddhw_altAlias : InstAlias<"$Vdd32=vaddh($Vu32,$Vv32)", (V6_vaddhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddubh_altAlias : InstAlias<"$Vdd32=vaddub($Vu32,$Vv32)", (V6_vaddubh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddubsat_altAlias : InstAlias<"$Vd32=vaddub($Vu32,$Vv32):sat", (V6_vaddubsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddubsat_dv_altAlias : InstAlias<"$Vdd32=vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vadduhsat_altAlias : InstAlias<"$Vd32=vadduh($Vu32,$Vv32):sat", (V6_vadduhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vadduhsat_dv_altAlias : InstAlias<"$Vdd32=vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vadduhw_altAlias : InstAlias<"$Vdd32=vadduh($Vu32,$Vv32)", (V6_vadduhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddw_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32)", (V6_vaddw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddw_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vaddwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vaddwsat_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32):sat", (V6_vaddwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaddwsat_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vandqrt_acc_altAlias : InstAlias<"$Vx32.ub|=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc HvxVR:$Vx32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vandqrt_altAlias : InstAlias<"$Vd32.ub=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt HvxVR:$Vd32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vandvrt_acc_altAlias : InstAlias<"$Qx4.ub|=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc HvxQR:$Qx4, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vandvrt_altAlias : InstAlias<"$Qd4.ub=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt HvxQR:$Qd4, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vaslh_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Rt32)", (V6_vaslh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vaslhv_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Vv32)", (V6_vaslhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vaslw_acc_altAlias : InstAlias<"$Vx32+=vaslw($Vu32,$Rt32)", (V6_vaslw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vaslw_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Rt32)", (V6_vaslw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vaslwv_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Vv32)", (V6_vaslwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vasrh_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32=vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrhubrndsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhubrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrhubsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):sat", (V6_vasrhubsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrhv_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Vv32)", (V6_vasrhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vasrw_acc_altAlias : InstAlias<"$Vx32+=vasrw($Vu32,$Rt32)", (V6_vasrw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vasrw_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Rt32)", (V6_vasrw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vasrwh_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8)", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrwhrndsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrwhrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrwhsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrwuhsat_altAlias : InstAlias<"$Vd32=vasrwuh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwuhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; -def V6_vasrwv_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Vv32)", (V6_vasrwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavgh_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32)", (V6_vavgh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavghrnd_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavgub_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32)", (V6_vavgub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavgubrnd_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavguh_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32)", (V6_vavguh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavguhrnd_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavgw_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32)", (V6_vavgw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vavgwrnd_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vcl0h_altAlias : InstAlias<"$Vd32=vcl0h($Vu32)", (V6_vcl0h HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vcl0w_altAlias : InstAlias<"$Vd32=vcl0w($Vu32)", (V6_vcl0w HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vd0Alias : InstAlias<"$Vd32=#0", (V6_vxor HvxVR:$Vd32, HvxVR:$Vd32, HvxVR:$Vd32)>, Requires<[UseHVX]>; -def V6_vdd0Alias : InstAlias<"$Vdd32=#0", (V6_vsubw_dv HvxWR:$Vdd32, W15, W15)>, Requires<[UseHVX]>; -def V6_vdealb4w_altAlias : InstAlias<"$Vd32=vdealb4w($Vu32,$Vv32)", (V6_vdealb4w HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vdealb_altAlias : InstAlias<"$Vd32=vdealb($Vu32)", (V6_vdealb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vdealh_altAlias : InstAlias<"$Vd32=vdealh($Vu32)", (V6_vdealh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vdmpybus_acc_altAlias : InstAlias<"$Vx32+=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpybus_altAlias : InstAlias<"$Vd32=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpybus_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpybus_dv_altAlias : InstAlias<"$Vdd32=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhb_acc_altAlias : InstAlias<"$Vx32+=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhb_altAlias : InstAlias<"$Vd32=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhb_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhb_dv_altAlias : InstAlias<"$Vdd32=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc HvxVR:$Vx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhisat_altAlias : InstAlias<"$Vd32=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat HvxVR:$Vd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsuisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc HvxVR:$Vx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsuisat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat HvxVR:$Vd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsusat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhsusat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdmpyhvsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vdmpyhvsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vdsaduh_acc_altAlias : InstAlias<"$Vxx32+=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vdsaduh_altAlias : InstAlias<"$Vdd32=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vlsrh_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Rt32)", (V6_vlsrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vlsrhv_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Vv32)", (V6_vlsrhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vlsrw_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Rt32)", (V6_vlsrw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vlsrwv_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Vv32)", (V6_vlsrwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmaxh_altAlias : InstAlias<"$Vd32=vmaxh($Vu32,$Vv32)", (V6_vmaxh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmaxub_altAlias : InstAlias<"$Vd32=vmaxub($Vu32,$Vv32)", (V6_vmaxub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmaxuh_altAlias : InstAlias<"$Vd32=vmaxuh($Vu32,$Vv32)", (V6_vmaxuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmaxw_altAlias : InstAlias<"$Vd32=vmaxw($Vu32,$Vv32)", (V6_vmaxw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vminh_altAlias : InstAlias<"$Vd32=vminh($Vu32,$Vv32)", (V6_vminh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vminub_altAlias : InstAlias<"$Vd32=vminub($Vu32,$Vv32)", (V6_vminub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vminuh_altAlias : InstAlias<"$Vd32=vminuh($Vu32,$Vv32)", (V6_vminuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vminw_altAlias : InstAlias<"$Vd32=vminw($Vu32,$Vv32)", (V6_vminw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpabus_acc_altAlias : InstAlias<"$Vxx32+=vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpabus_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Rt32)", (V6_vmpabus HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpabusv_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vmpabuuv_altAlias : InstAlias<"$Vdd32=vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vmpahb_acc_altAlias : InstAlias<"$Vxx32+=vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpahb_altAlias : InstAlias<"$Vdd32=vmpahb($Vuu32,$Rt32)", (V6_vmpahb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpybus_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpybus_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Rt32)", (V6_vmpybus HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpybusv_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpybusv_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Vv32)", (V6_vmpybusv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpybv_acc_altAlias : InstAlias<"$Vxx32+=vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpybv_altAlias : InstAlias<"$Vdd32=vmpyb($Vu32,$Vv32)", (V6_vmpybv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyewuh_altAlias : InstAlias<"$Vd32=vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyh_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Rt32)", (V6_vmpyh HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyhsat_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyhsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyhss_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyhus_acc_altAlias : InstAlias<"$Vxx32+=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyhus_altAlias : InstAlias<"$Vdd32=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyhv_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Vv32)", (V6_vmpyhv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyhvsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyiewh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyiewuh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyiewuh_altAlias : InstAlias<"$Vd32=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyih_acc_altAlias : InstAlias<"$Vx32+=vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyih_altAlias : InstAlias<"$Vd32=vmpyih($Vu32,$Vv32)", (V6_vmpyih HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyihb_acc_altAlias : InstAlias<"$Vx32+=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyihb_altAlias : InstAlias<"$Vd32=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyiowh_altAlias : InstAlias<"$Vd32=vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyiwb_acc_altAlias : InstAlias<"$Vx32+=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyiwb_altAlias : InstAlias<"$Vd32=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyiwh_acc_altAlias : InstAlias<"$Vx32+=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyiwh_altAlias : InstAlias<"$Vd32=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyowh_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyowh_rnd_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyub_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyub_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Rt32)", (V6_vmpyub HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyubv_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyubv_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Vv32)", (V6_vmpyubv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyuh_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyuh_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vmpyuhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vmpyuhv_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vnavgh_altAlias : InstAlias<"$Vd32=vnavgh($Vu32,$Vv32)", (V6_vnavgh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vnavgub_altAlias : InstAlias<"$Vd32=vnavgub($Vu32,$Vv32)", (V6_vnavgub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vnavgw_altAlias : InstAlias<"$Vd32=vnavgw($Vu32,$Vv32)", (V6_vnavgw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vnormamth_altAlias : InstAlias<"$Vd32=vnormamth($Vu32)", (V6_vnormamth HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vnormamtw_altAlias : InstAlias<"$Vd32=vnormamtw($Vu32)", (V6_vnormamtw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vpackeb_altAlias : InstAlias<"$Vd32=vpackeb($Vu32,$Vv32)", (V6_vpackeb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackeh_altAlias : InstAlias<"$Vd32=vpackeh($Vu32,$Vv32)", (V6_vpackeh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackhb_sat_altAlias : InstAlias<"$Vd32=vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackhub_sat_altAlias : InstAlias<"$Vd32=vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackob_altAlias : InstAlias<"$Vd32=vpackob($Vu32,$Vv32)", (V6_vpackob HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackoh_altAlias : InstAlias<"$Vd32=vpackoh($Vu32,$Vv32)", (V6_vpackoh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackwh_sat_altAlias : InstAlias<"$Vd32=vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpackwuh_sat_altAlias : InstAlias<"$Vd32=vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vpopcounth_altAlias : InstAlias<"$Vd32=vpopcounth($Vu32)", (V6_vpopcounth HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vrmpybus_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vrmpybus_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vrmpybusi_acc_altAlias : InstAlias<"$Vxx32+=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vrmpybusi_altAlias : InstAlias<"$Vdd32=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vrmpybusv_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrmpybusv_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrmpybv_acc_altAlias : InstAlias<"$Vx32+=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrmpybv_altAlias : InstAlias<"$Vd32=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrmpyub_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vrmpyub_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32+=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrmpyubv_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vroundhb_altAlias : InstAlias<"$Vd32=vroundhb($Vu32,$Vv32):sat", (V6_vroundhb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vroundhub_altAlias : InstAlias<"$Vd32=vroundhub($Vu32,$Vv32):sat", (V6_vroundhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vroundwh_altAlias : InstAlias<"$Vd32=vroundwh($Vu32,$Vv32):sat", (V6_vroundwh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vroundwuh_altAlias : InstAlias<"$Vd32=vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vrsadubi_acc_altAlias : InstAlias<"$Vxx32+=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vrsadubi_altAlias : InstAlias<"$Vdd32=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; -def V6_vsathub_altAlias : InstAlias<"$Vd32=vsathub($Vu32,$Vv32)", (V6_vsathub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsatwh_altAlias : InstAlias<"$Vd32=vsatwh($Vu32,$Vv32)", (V6_vsatwh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsb_altAlias : InstAlias<"$Vdd32=vsxtb($Vu32)", (V6_vsb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsh_altAlias : InstAlias<"$Vdd32=vsxth($Vu32)", (V6_vsh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vshufeh_altAlias : InstAlias<"$Vd32=vshuffeh($Vu32,$Vv32)", (V6_vshufeh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vshuffb_altAlias : InstAlias<"$Vd32=vshuffb($Vu32)", (V6_vshuffb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vshuffeb_altAlias : InstAlias<"$Vd32=vshuffeb($Vu32,$Vv32)", (V6_vshuffeb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vshuffh_altAlias : InstAlias<"$Vd32=vshuffh($Vu32)", (V6_vshuffh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vshuffob_altAlias : InstAlias<"$Vd32=vshuffob($Vu32,$Vv32)", (V6_vshuffob HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vshufoeb_altAlias : InstAlias<"$Vdd32=vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vshufoeh_altAlias : InstAlias<"$Vdd32=vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vshufoh_altAlias : InstAlias<"$Vd32=vshuffoh($Vu32,$Vv32)", (V6_vshufoh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubb_altAlias : InstAlias<"$Vd32=vsubb($Vu32,$Vv32)", (V6_vsubb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubb_dv_altAlias : InstAlias<"$Vdd32=vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubh_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32)", (V6_vsubh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubh_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubhsat_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32):sat", (V6_vsubhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubhsat_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubhw_altAlias : InstAlias<"$Vdd32=vsubh($Vu32,$Vv32)", (V6_vsubhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsububh_altAlias : InstAlias<"$Vdd32=vsubub($Vu32,$Vv32)", (V6_vsububh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsububsat_altAlias : InstAlias<"$Vd32=vsubub($Vu32,$Vv32):sat", (V6_vsububsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsububsat_dv_altAlias : InstAlias<"$Vdd32=vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubuhsat_altAlias : InstAlias<"$Vd32=vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubuhsat_dv_altAlias : InstAlias<"$Vdd32=vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubuhw_altAlias : InstAlias<"$Vdd32=vsubuh($Vu32,$Vv32)", (V6_vsubuhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubw_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32)", (V6_vsubw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubw_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vsubwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vsubwsat_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32):sat", (V6_vsubwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; -def V6_vsubwsat_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; -def V6_vtmpyb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vtmpyb_altAlias : InstAlias<"$Vdd32=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vtmpybus_acc_altAlias : InstAlias<"$Vxx32+=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vtmpybus_altAlias : InstAlias<"$Vdd32=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vtmpyhb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vtmpyhb_altAlias : InstAlias<"$Vdd32=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32 = memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; +def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32 = memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; +def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32 = membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadbsw4_zomapAlias : InstAlias<"$Rdd32 = membh($Rs32)", (L2_loadbsw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; +def L2_loadbzw2_zomapAlias : InstAlias<"$Rd32 = memubh($Rs32)", (L2_loadbzw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadbzw4_zomapAlias : InstAlias<"$Rdd32 = memubh($Rs32)", (L2_loadbzw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; +def L2_loadrb_zomapAlias : InstAlias<"$Rd32 = memb($Rs32)", (L2_loadrb_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadrd_zomapAlias : InstAlias<"$Rdd32 = memd($Rs32)", (L2_loadrd_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>; +def L2_loadrh_zomapAlias : InstAlias<"$Rd32 = memh($Rs32)", (L2_loadrh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadri_zomapAlias : InstAlias<"$Rd32 = memw($Rs32)", (L2_loadri_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadrub_zomapAlias : InstAlias<"$Rd32 = memub($Rs32)", (L2_loadrub_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_loadruh_zomapAlias : InstAlias<"$Rd32 = memuh($Rs32)", (L2_loadruh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; +def L2_ploadrbf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32 = memb($Rs32)", (L2_ploadrbf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrbfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32 = memb($Rs32)", (L2_ploadrbfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrbt_zomapAlias : InstAlias<"if ($Pt4) $Rd32 = memb($Rs32)", (L2_ploadrbt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrbtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32 = memb($Rs32)", (L2_ploadrbtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrdf_zomapAlias : InstAlias<"if (!$Pt4) $Rdd32 = memd($Rs32)", (L2_ploadrdf_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrdfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rdd32 = memd($Rs32)", (L2_ploadrdfnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrdt_zomapAlias : InstAlias<"if ($Pt4) $Rdd32 = memd($Rs32)", (L2_ploadrdt_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrdtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rdd32 = memd($Rs32)", (L2_ploadrdtnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32 = memh($Rs32)", (L2_ploadrhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32 = memh($Rs32)", (L2_ploadrhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrht_zomapAlias : InstAlias<"if ($Pt4) $Rd32 = memh($Rs32)", (L2_ploadrht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32 = memh($Rs32)", (L2_ploadrhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrif_zomapAlias : InstAlias<"if (!$Pt4) $Rd32 = memw($Rs32)", (L2_ploadrif_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrifnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32 = memw($Rs32)", (L2_ploadrifnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrit_zomapAlias : InstAlias<"if ($Pt4) $Rd32 = memw($Rs32)", (L2_ploadrit_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadritnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32 = memw($Rs32)", (L2_ploadritnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrubf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32 = memub($Rs32)", (L2_ploadrubf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrubfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32 = memub($Rs32)", (L2_ploadrubfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrubt_zomapAlias : InstAlias<"if ($Pt4) $Rd32 = memub($Rs32)", (L2_ploadrubt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadrubtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32 = memub($Rs32)", (L2_ploadrubtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadruhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32 = memuh($Rs32)", (L2_ploadruhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadruhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32 = memuh($Rs32)", (L2_ploadruhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadruht_zomapAlias : InstAlias<"if ($Pt4) $Rd32 = memuh($Rs32)", (L2_ploadruht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L2_ploadruhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32 = memuh($Rs32)", (L2_ploadruhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>; +def L4_add_memopb_zomapAlias : InstAlias<"memb($Rs32) += $Rt32", (L4_add_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_add_memoph_zomapAlias : InstAlias<"memh($Rs32) += $Rt32", (L4_add_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_add_memopw_zomapAlias : InstAlias<"memw($Rs32) += $Rt32", (L4_add_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_and_memopb_zomapAlias : InstAlias<"memb($Rs32) &= $Rt32", (L4_and_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_and_memoph_zomapAlias : InstAlias<"memh($Rs32) &= $Rt32", (L4_and_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_and_memopw_zomapAlias : InstAlias<"memw($Rs32) &= $Rt32", (L4_and_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_iadd_memopb_zomapAlias : InstAlias<"memb($Rs32) += #$II", (L4_iadd_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_iadd_memoph_zomapAlias : InstAlias<"memh($Rs32) += #$II", (L4_iadd_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_iadd_memopw_zomapAlias : InstAlias<"memw($Rs32) += #$II", (L4_iadd_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_iand_memopb_zomapAlias : InstAlias<"memb($Rs32) = clrbit(#$II)", (L4_iand_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_iand_memoph_zomapAlias : InstAlias<"memh($Rs32) = clrbit(#$II)", (L4_iand_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_iand_memopw_zomapAlias : InstAlias<"memw($Rs32) = clrbit(#$II)", (L4_iand_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_ior_memopb_zomapAlias : InstAlias<"memb($Rs32) = setbit(#$II)", (L4_ior_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_ior_memoph_zomapAlias : InstAlias<"memh($Rs32) = setbit(#$II)", (L4_ior_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_ior_memopw_zomapAlias : InstAlias<"memw($Rs32) = setbit(#$II)", (L4_ior_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_isub_memopb_zomapAlias : InstAlias<"memb($Rs32) -= #$II", (L4_isub_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_isub_memoph_zomapAlias : InstAlias<"memh($Rs32) -= #$II", (L4_isub_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_isub_memopw_zomapAlias : InstAlias<"memw($Rs32) -= #$II", (L4_isub_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>; +def L4_or_memopb_zomapAlias : InstAlias<"memb($Rs32) |= $Rt32", (L4_or_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_or_memoph_zomapAlias : InstAlias<"memh($Rs32) |= $Rt32", (L4_or_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_or_memopw_zomapAlias : InstAlias<"memw($Rs32) |= $Rt32", (L4_or_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_return_map_to_raw_fAlias : InstAlias<"if (!$Pv4) dealloc_return", (L4_return_f D15, PredRegs:$Pv4, R30)>; +def L4_return_map_to_raw_fnew_pntAlias : InstAlias<"if (!$Pv4.new) dealloc_return:nt", (L4_return_fnew_pnt D15, PredRegs:$Pv4, R30)>; +def L4_return_map_to_raw_fnew_ptAlias : InstAlias<"if (!$Pv4.new) dealloc_return:t", (L4_return_fnew_pt D15, PredRegs:$Pv4, R30)>; +def L4_return_map_to_raw_tAlias : InstAlias<"if ($Pv4) dealloc_return", (L4_return_t D15, PredRegs:$Pv4, R30)>; +def L4_return_map_to_raw_tnew_pntAlias : InstAlias<"if ($Pv4.new) dealloc_return:nt", (L4_return_tnew_pnt D15, PredRegs:$Pv4, R30)>; +def L4_return_map_to_raw_tnew_ptAlias : InstAlias<"if ($Pv4.new) dealloc_return:t", (L4_return_tnew_pt D15, PredRegs:$Pv4, R30)>; +def L4_sub_memopb_zomapAlias : InstAlias<"memb($Rs32) -= $Rt32", (L4_sub_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_sub_memoph_zomapAlias : InstAlias<"memh($Rs32) -= $Rt32", (L4_sub_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L4_sub_memopw_zomapAlias : InstAlias<"memw($Rs32) -= $Rt32", (L4_sub_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def L6_deallocframe_map_to_rawAlias : InstAlias<"deallocframe", (L2_deallocframe D15, R30)>; +def L6_return_map_to_rawAlias : InstAlias<"dealloc_return", (L4_return D15, R30)>; +def M2_mpyuiAlias : InstAlias<"$Rd32 = mpyui($Rs32,$Rt32)", (M2_mpyi IntRegs:$Rd32, IntRegs:$Rs32, IntRegs:$Rt32)>; +def S2_pstorerbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32) = $Rt32", (S2_pstorerbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerbnewf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32) = $Nt8.new", (S2_pstorerbnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerbnewt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32) = $Nt8.new", (S2_pstorerbnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32) = $Rt32", (S2_pstorerbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerdf_zomapAlias : InstAlias<"if (!$Pv4) memd($Rs32) = $Rtt32", (S2_pstorerdf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; +def S2_pstorerdt_zomapAlias : InstAlias<"if ($Pv4) memd($Rs32) = $Rtt32", (S2_pstorerdt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; +def S2_pstorerff_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32) = $Rt32.h", (S2_pstorerff_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerft_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32) = $Rt32.h", (S2_pstorerft_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32) = $Rt32", (S2_pstorerhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerhnewf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32) = $Nt8.new", (S2_pstorerhnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerhnewt_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32) = $Nt8.new", (S2_pstorerhnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32) = $Rt32", (S2_pstorerht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32) = $Rt32", (S2_pstorerif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_pstorerinewf_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32) = $Nt8.new", (S2_pstorerinewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerinewt_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32) = $Nt8.new", (S2_pstorerinewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_pstorerit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32) = $Rt32", (S2_pstorerit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_storerb_zomapAlias : InstAlias<"memb($Rs32) = $Rt32", (S2_storerb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_storerbnew_zomapAlias : InstAlias<"memb($Rs32) = $Nt8.new", (S2_storerbnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_storerd_zomapAlias : InstAlias<"memd($Rs32) = $Rtt32", (S2_storerd_io IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; +def S2_storerf_zomapAlias : InstAlias<"memh($Rs32) = $Rt32.h", (S2_storerf_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_storerh_zomapAlias : InstAlias<"memh($Rs32) = $Rt32", (S2_storerh_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_storerhnew_zomapAlias : InstAlias<"memh($Rs32) = $Nt8.new", (S2_storerhnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_storeri_zomapAlias : InstAlias<"memw($Rs32) = $Rt32", (S2_storeri_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S2_storerinew_zomapAlias : InstAlias<"memw($Rs32) = $Nt8.new", (S2_storerinew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S2_tableidxb_goodsyntaxAlias : InstAlias<"$Rx32 = tableidxb($Rs32,#$Ii,#$II)", (S2_tableidxb IntRegs:$Rx32, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II)>; +def S4_pstorerbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32) = $Rt32", (S4_pstorerbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerbnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32) = $Nt8.new", (S4_pstorerbnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstorerbnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32) = $Nt8.new", (S4_pstorerbnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstorerbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32) = $Rt32", (S4_pstorerbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerdfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memd($Rs32) = $Rtt32", (S4_pstorerdfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; +def S4_pstorerdtnew_zomapAlias : InstAlias<"if ($Pv4.new) memd($Rs32) = $Rtt32", (S4_pstorerdtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>; +def S4_pstorerffnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32) = $Rt32.h", (S4_pstorerffnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerftnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32) = $Rt32.h", (S4_pstorerftnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32) = $Rt32", (S4_pstorerhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerhnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32) = $Nt8.new", (S4_pstorerhnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstorerhnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32) = $Nt8.new", (S4_pstorerhnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstorerhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32) = $Rt32", (S4_pstorerhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32) = $Rt32", (S4_pstorerifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_pstorerinewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32) = $Nt8.new", (S4_pstorerinewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstorerinewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32) = $Nt8.new", (S4_pstorerinewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>; +def S4_pstoreritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32) = $Rt32", (S4_pstoreritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>; +def S4_storeirb_zomapAlias : InstAlias<"memb($Rs32) = #$II", (S4_storeirb_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32) = #$II", (S4_storeirbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32) = #$II", (S4_storeirbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32) = #$II", (S4_storeirbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32) = #$II", (S4_storeirbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirh_zomapAlias : InstAlias<"memh($Rs32) = #$II", (S4_storeirh_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32) = #$II", (S4_storeirhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32) = #$II", (S4_storeirhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32) = #$II", (S4_storeirht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32) = #$II", (S4_storeirhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeiri_zomapAlias : InstAlias<"memw($Rs32) = #$II", (S4_storeiri_io IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32) = #$II", (S4_storeirif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32) = #$II", (S4_storeirifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeirit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32) = #$II", (S4_storeirit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S4_storeiritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32) = #$II", (S4_storeiritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>; +def S6_allocframe_to_rawAlias : InstAlias<"allocframe(#$Ii)", (S2_allocframe R29, u11_3Imm:$Ii)>; +def V6_MAP_equbAlias : InstAlias<"$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equb_andAlias : InstAlias<"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equb_iorAlias : InstAlias<"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equb_xorAlias : InstAlias<"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equhAlias : InstAlias<"$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equh_andAlias : InstAlias<"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equh_iorAlias : InstAlias<"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equh_xorAlias : InstAlias<"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equwAlias : InstAlias<"$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw HvxQR:$Qd4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equw_andAlias : InstAlias<"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equw_iorAlias : InstAlias<"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_MAP_equw_xorAlias : InstAlias<"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_extractw_altAlias : InstAlias<"$Rd32.w = vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, HvxVR:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>; +def V6_ld0Alias : InstAlias<"$Vd32 = vmem($Rt32)", (V6_vL32b_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldcnp0Alias : InstAlias<"if (!$Pv4) $Vd32.cur = vmem($Rt32)", (V6_vL32b_cur_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldcnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32.cur = vmem($Rt32):nt", (V6_vL32b_nt_cur_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldcp0Alias : InstAlias<"if ($Pv4) $Vd32.cur = vmem($Rt32)", (V6_vL32b_cur_pred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldcpnt0Alias : InstAlias<"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", (V6_vL32b_nt_cur_pred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldnp0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; +def V6_ldnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldntnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>; +def V6_ldp0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldpnt0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldtnp0Alias : InstAlias<"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", (V6_vL32b_npred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldtnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32.tmp = vmem($Rt32):nt", (V6_vL32b_nt_npred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldtp0Alias : InstAlias<"if ($Pv4) $Vd32.tmp = vmem($Rt32)", (V6_vL32b_tmp_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldtpnt0Alias : InstAlias<"if ($Pv4) $Vd32.tmp = vmem($Rt32):nt", (V6_vL32b_nt_tmp_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_ldu0Alias : InstAlias<"$Vd32 = vmemu($Rt32)", (V6_vL32Ub_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_st0Alias : InstAlias<"vmem($Rt32) = $Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stn0Alias : InstAlias<"vmem($Rt32) = $Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, HvxVR:$Os8)>, Requires<[UseHVX]>; +def V6_stnnt0Alias : InstAlias<"vmem($Rt32):nt = $Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, HvxVR:$Os8)>, Requires<[UseHVX]>; +def V6_stnp0Alias : InstAlias<"if (!$Pv4) vmem($Rt32) = $Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stnpnt0Alias : InstAlias<"if (!$Pv4) vmem($Rt32):nt = $Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stnq0Alias : InstAlias<"if (!$Qv4) vmem($Rt32) = $Vs32", (V6_vS32b_nqpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stnqnt0Alias : InstAlias<"if (!$Qv4) vmem($Rt32):nt = $Vs32", (V6_vS32b_nt_nqpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stnt0Alias : InstAlias<"vmem($Rt32):nt = $Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stp0Alias : InstAlias<"if ($Pv4) vmem($Rt32) = $Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stpnt0Alias : InstAlias<"if ($Pv4) vmem($Rt32):nt = $Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stq0Alias : InstAlias<"if ($Qv4) vmem($Rt32) = $Vs32", (V6_vS32b_qpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stqnt0Alias : InstAlias<"if ($Qv4) vmem($Rt32):nt = $Vs32", (V6_vS32b_nt_qpred_ai HvxQR:$Qv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stu0Alias : InstAlias<"vmemu($Rt32) = $Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stunp0Alias : InstAlias<"if (!$Pv4) vmemu($Rt32) = $Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_stup0Alias : InstAlias<"if ($Pv4) vmemu($Rt32) = $Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, HvxVR:$Vs32)>, Requires<[UseHVX]>; +def V6_vabsb_altAlias : InstAlias<"$Vd32 = vabsb($Vu32)", (V6_vabsb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsb_sat_altAlias : InstAlias<"$Vd32 = vabsb($Vu32):sat", (V6_vabsb_sat HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsdiffh_altAlias : InstAlias<"$Vd32 = vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vabsdiffub_altAlias : InstAlias<"$Vd32 = vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vabsdiffuh_altAlias : InstAlias<"$Vd32 = vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vabsdiffw_altAlias : InstAlias<"$Vd32 = vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vabsh_altAlias : InstAlias<"$Vd32 = vabsh($Vu32)", (V6_vabsh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsh_sat_altAlias : InstAlias<"$Vd32 = vabsh($Vu32):sat", (V6_vabsh_sat HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsub_altAlias : InstAlias<"$Vd32.ub = vabs($Vu32.b)", (V6_vabsb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsuh_altAlias : InstAlias<"$Vd32.uh = vabs($Vu32.h)", (V6_vabsh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsuw_altAlias : InstAlias<"$Vd32.uw = vabs($Vu32.w)", (V6_vabsw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsw_altAlias : InstAlias<"$Vd32 = vabsw($Vu32)", (V6_vabsw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vabsw_sat_altAlias : InstAlias<"$Vd32 = vabsw($Vu32):sat", (V6_vabsw_sat HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddb_altAlias : InstAlias<"$Vd32 = vaddb($Vu32,$Vv32)", (V6_vaddb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddb_dv_altAlias : InstAlias<"$Vdd32 = vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b += $Vu32.b", (V6_vaddbnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b += $Vu32.b", (V6_vaddbq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddbsat_altAlias : InstAlias<"$Vd32 = vaddb($Vu32,$Vv32):sat", (V6_vaddbsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddbsat_dv_altAlias : InstAlias<"$Vdd32 = vaddb($Vuu32,$Vvv32):sat", (V6_vaddbsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddh_altAlias : InstAlias<"$Vd32 = vaddh($Vu32,$Vv32)", (V6_vaddh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddh_dv_altAlias : InstAlias<"$Vdd32 = vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h += $Vu32.h", (V6_vaddhnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h += $Vu32.h", (V6_vaddhq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddhsat_altAlias : InstAlias<"$Vd32 = vaddh($Vu32,$Vv32):sat", (V6_vaddhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddhsat_dv_altAlias : InstAlias<"$Vdd32 = vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddhw_acc_altAlias : InstAlias<"$Vxx32 += vaddh($Vu32,$Vv32)", (V6_vaddhw_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddhw_altAlias : InstAlias<"$Vdd32 = vaddh($Vu32,$Vv32)", (V6_vaddhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddubh_acc_altAlias : InstAlias<"$Vxx32 += vaddub($Vu32,$Vv32)", (V6_vaddubh_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddubh_altAlias : InstAlias<"$Vdd32 = vaddub($Vu32,$Vv32)", (V6_vaddubh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddubsat_altAlias : InstAlias<"$Vd32 = vaddub($Vu32,$Vv32):sat", (V6_vaddubsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddubsat_dv_altAlias : InstAlias<"$Vdd32 = vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vadduhsat_altAlias : InstAlias<"$Vd32 = vadduh($Vu32,$Vv32):sat", (V6_vadduhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vadduhsat_dv_altAlias : InstAlias<"$Vdd32 = vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vadduhw_acc_altAlias : InstAlias<"$Vxx32 += vadduh($Vu32,$Vv32)", (V6_vadduhw_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vadduhw_altAlias : InstAlias<"$Vdd32 = vadduh($Vu32,$Vv32)", (V6_vadduhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vadduwsat_altAlias : InstAlias<"$Vd32 = vadduw($Vu32,$Vv32):sat", (V6_vadduwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vadduwsat_dv_altAlias : InstAlias<"$Vdd32 = vadduw($Vuu32,$Vvv32):sat", (V6_vadduwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddw_altAlias : InstAlias<"$Vd32 = vaddw($Vu32,$Vv32)", (V6_vaddw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddw_dv_altAlias : InstAlias<"$Vdd32 = vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vaddwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w += $Vu32.w", (V6_vaddwnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w += $Vu32.w", (V6_vaddwq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vaddwsat_altAlias : InstAlias<"$Vd32 = vaddw($Vu32,$Vv32):sat", (V6_vaddwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaddwsat_dv_altAlias : InstAlias<"$Vdd32 = vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vandnqrt_acc_altAlias : InstAlias<"$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)", (V6_vandnqrt_acc HvxVR:$Vx32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vandnqrt_altAlias : InstAlias<"$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)", (V6_vandnqrt HvxVR:$Vd32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vandqrt_acc_altAlias : InstAlias<"$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc HvxVR:$Vx32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vandqrt_altAlias : InstAlias<"$Vd32.ub = vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt HvxVR:$Vd32, HvxQR:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vandvrt_acc_altAlias : InstAlias<"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc HvxQR:$Qx4, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vandvrt_altAlias : InstAlias<"$Qd4.ub = vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt HvxQR:$Qd4, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vaslh_acc_altAlias : InstAlias<"$Vx32 += vaslh($Vu32,$Rt32)", (V6_vaslh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vaslh_altAlias : InstAlias<"$Vd32 = vaslh($Vu32,$Rt32)", (V6_vaslh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vaslhv_altAlias : InstAlias<"$Vd32 = vaslh($Vu32,$Vv32)", (V6_vaslhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vaslw_acc_altAlias : InstAlias<"$Vx32 += vaslw($Vu32,$Rt32)", (V6_vaslw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vaslw_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Rt32)", (V6_vaslw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vaslwv_altAlias : InstAlias<"$Vd32 = vaslw($Vu32,$Vv32)", (V6_vaslwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vasrh_acc_altAlias : InstAlias<"$Vx32 += vasrh($Vu32,$Rt32)", (V6_vasrh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vasrh_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Rt32)", (V6_vasrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrhubrndsat_altAlias : InstAlias<"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhubrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrhubsat_altAlias : InstAlias<"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat", (V6_vasrhubsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrhv_altAlias : InstAlias<"$Vd32 = vasrh($Vu32,$Vv32)", (V6_vasrhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vasrw_acc_altAlias : InstAlias<"$Vx32 += vasrw($Vu32,$Rt32)", (V6_vasrw_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vasrw_altAlias : InstAlias<"$Vd32 = vasrw($Vu32,$Rt32)", (V6_vasrw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vasrwh_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrwhrndsat_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrwhrndsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrwhsat_altAlias : InstAlias<"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrwuhsat_altAlias : InstAlias<"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwuhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8)>; +def V6_vasrwv_altAlias : InstAlias<"$Vd32 = vasrw($Vu32,$Vv32)", (V6_vasrwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgb_altAlias : InstAlias<"$Vd32 = vavgb($Vu32,$Vv32)", (V6_vavgb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgbrnd_altAlias : InstAlias<"$Vd32 = vavgb($Vu32,$Vv32):rnd", (V6_vavgbrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgh_altAlias : InstAlias<"$Vd32 = vavgh($Vu32,$Vv32)", (V6_vavgh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavghrnd_altAlias : InstAlias<"$Vd32 = vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgub_altAlias : InstAlias<"$Vd32 = vavgub($Vu32,$Vv32)", (V6_vavgub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgubrnd_altAlias : InstAlias<"$Vd32 = vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavguh_altAlias : InstAlias<"$Vd32 = vavguh($Vu32,$Vv32)", (V6_vavguh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavguhrnd_altAlias : InstAlias<"$Vd32 = vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavguw_altAlias : InstAlias<"$Vd32 = vavguw($Vu32,$Vv32)", (V6_vavguw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavguwrnd_altAlias : InstAlias<"$Vd32 = vavguw($Vu32,$Vv32):rnd", (V6_vavguwrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgw_altAlias : InstAlias<"$Vd32 = vavgw($Vu32,$Vv32)", (V6_vavgw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vavgwrnd_altAlias : InstAlias<"$Vd32 = vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vcl0h_altAlias : InstAlias<"$Vd32 = vcl0h($Vu32)", (V6_vcl0h HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vcl0w_altAlias : InstAlias<"$Vd32 = vcl0w($Vu32)", (V6_vcl0w HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vd0Alias : InstAlias<"$Vd32 = #0", (V6_vxor HvxVR:$Vd32, HvxVR:$Vd32, HvxVR:$Vd32)>, Requires<[UseHVX]>; +def V6_vdd0Alias : InstAlias<"$Vdd32 = #0", (V6_vsubw_dv HvxWR:$Vdd32, W15, W15)>, Requires<[UseHVX]>; +def V6_vdealb4w_altAlias : InstAlias<"$Vd32 = vdealb4w($Vu32,$Vv32)", (V6_vdealb4w HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vdealb_altAlias : InstAlias<"$Vd32 = vdealb($Vu32)", (V6_vdealb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vdealh_altAlias : InstAlias<"$Vd32 = vdealh($Vu32)", (V6_vdealh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vdmpybus_acc_altAlias : InstAlias<"$Vx32 += vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpybus_altAlias : InstAlias<"$Vd32 = vdmpybus($Vu32,$Rt32)", (V6_vdmpybus HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpybus_dv_acc_altAlias : InstAlias<"$Vxx32 += vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpybus_dv_altAlias : InstAlias<"$Vdd32 = vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhb_acc_altAlias : InstAlias<"$Vx32 += vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhb_altAlias : InstAlias<"$Vd32 = vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhb_dv_acc_altAlias : InstAlias<"$Vxx32 += vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhb_dv_altAlias : InstAlias<"$Vdd32 = vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhisat_acc_altAlias : InstAlias<"$Vx32 += vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc HvxVR:$Vx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhisat_altAlias : InstAlias<"$Vd32 = vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat HvxVR:$Vd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsat_acc_altAlias : InstAlias<"$Vx32 += vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsat_altAlias : InstAlias<"$Vd32 = vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsuisat_acc_altAlias : InstAlias<"$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc HvxVR:$Vx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsuisat_altAlias : InstAlias<"$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat HvxVR:$Vd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsusat_acc_altAlias : InstAlias<"$Vx32 += vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhsusat_altAlias : InstAlias<"$Vd32 = vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdmpyhvsat_acc_altAlias : InstAlias<"$Vx32 += vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vdmpyhvsat_altAlias : InstAlias<"$Vd32 = vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vdsaduh_acc_altAlias : InstAlias<"$Vxx32 += vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vdsaduh_altAlias : InstAlias<"$Vdd32 = vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vlsrh_altAlias : InstAlias<"$Vd32 = vlsrh($Vu32,$Rt32)", (V6_vlsrh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vlsrhv_altAlias : InstAlias<"$Vd32 = vlsrh($Vu32,$Vv32)", (V6_vlsrhv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vlsrw_altAlias : InstAlias<"$Vd32 = vlsrw($Vu32,$Rt32)", (V6_vlsrw HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vlsrwv_altAlias : InstAlias<"$Vd32 = vlsrw($Vu32,$Vv32)", (V6_vlsrwv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmaxb_altAlias : InstAlias<"$Vd32 = vmaxb($Vu32,$Vv32)", (V6_vmaxb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmaxh_altAlias : InstAlias<"$Vd32 = vmaxh($Vu32,$Vv32)", (V6_vmaxh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmaxub_altAlias : InstAlias<"$Vd32 = vmaxub($Vu32,$Vv32)", (V6_vmaxub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmaxuh_altAlias : InstAlias<"$Vd32 = vmaxuh($Vu32,$Vv32)", (V6_vmaxuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmaxw_altAlias : InstAlias<"$Vd32 = vmaxw($Vu32,$Vv32)", (V6_vmaxw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vminb_altAlias : InstAlias<"$Vd32 = vminb($Vu32,$Vv32)", (V6_vminb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vminh_altAlias : InstAlias<"$Vd32 = vminh($Vu32,$Vv32)", (V6_vminh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vminub_altAlias : InstAlias<"$Vd32 = vminub($Vu32,$Vv32)", (V6_vminub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vminuh_altAlias : InstAlias<"$Vd32 = vminuh($Vu32,$Vv32)", (V6_vminuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vminw_altAlias : InstAlias<"$Vd32 = vminw($Vu32,$Vv32)", (V6_vminw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpabus_acc_altAlias : InstAlias<"$Vxx32 += vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpabus_altAlias : InstAlias<"$Vdd32 = vmpabus($Vuu32,$Rt32)", (V6_vmpabus HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpabusv_altAlias : InstAlias<"$Vdd32 = vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vmpabuu_acc_altAlias : InstAlias<"$Vxx32 += vmpabuu($Vuu32,$Rt32)", (V6_vmpabuu_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpabuu_altAlias : InstAlias<"$Vdd32 = vmpabuu($Vuu32,$Rt32)", (V6_vmpabuu HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpabuuv_altAlias : InstAlias<"$Vdd32 = vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vmpahb_acc_altAlias : InstAlias<"$Vxx32 += vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpahb_altAlias : InstAlias<"$Vdd32 = vmpahb($Vuu32,$Rt32)", (V6_vmpahb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpauhb_acc_altAlias : InstAlias<"$Vxx32 += vmpauhb($Vuu32,$Rt32)", (V6_vmpauhb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpauhb_altAlias : InstAlias<"$Vdd32 = vmpauhb($Vuu32,$Rt32)", (V6_vmpauhb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpybus_acc_altAlias : InstAlias<"$Vxx32 += vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpybus_altAlias : InstAlias<"$Vdd32 = vmpybus($Vu32,$Rt32)", (V6_vmpybus HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpybusv_acc_altAlias : InstAlias<"$Vxx32 += vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpybusv_altAlias : InstAlias<"$Vdd32 = vmpybus($Vu32,$Vv32)", (V6_vmpybusv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpybv_acc_altAlias : InstAlias<"$Vxx32 += vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpybv_altAlias : InstAlias<"$Vdd32 = vmpyb($Vu32,$Vv32)", (V6_vmpybv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyewuh_altAlias : InstAlias<"$Vd32 = vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyh_acc_altAlias : InstAlias<"$Vxx32 += vmpyh($Vu32,$Rt32)", (V6_vmpyh_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyh_altAlias : InstAlias<"$Vdd32 = vmpyh($Vu32,$Rt32)", (V6_vmpyh HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyhsat_acc_altAlias : InstAlias<"$Vxx32 += vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyhsrs_altAlias : InstAlias<"$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyhss_altAlias : InstAlias<"$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyhus_acc_altAlias : InstAlias<"$Vxx32 += vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyhus_altAlias : InstAlias<"$Vdd32 = vmpyhus($Vu32,$Vv32)", (V6_vmpyhus HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyhv_acc_altAlias : InstAlias<"$Vxx32 += vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyhv_altAlias : InstAlias<"$Vdd32 = vmpyh($Vu32,$Vv32)", (V6_vmpyhv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyhvsrs_altAlias : InstAlias<"$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyiewh_acc_altAlias : InstAlias<"$Vx32 += vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyiewuh_acc_altAlias : InstAlias<"$Vx32 += vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyiewuh_altAlias : InstAlias<"$Vd32 = vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyih_acc_altAlias : InstAlias<"$Vx32 += vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyih_altAlias : InstAlias<"$Vd32 = vmpyih($Vu32,$Vv32)", (V6_vmpyih HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyihb_acc_altAlias : InstAlias<"$Vx32 += vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyihb_altAlias : InstAlias<"$Vd32 = vmpyihb($Vu32,$Rt32)", (V6_vmpyihb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiowh_altAlias : InstAlias<"$Vd32 = vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyiwb_acc_altAlias : InstAlias<"$Vx32 += vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiwb_altAlias : InstAlias<"$Vd32 = vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiwh_acc_altAlias : InstAlias<"$Vx32 += vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiwh_altAlias : InstAlias<"$Vd32 = vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiwub_acc_altAlias : InstAlias<"$Vx32 += vmpyiwub($Vu32,$Rt32)", (V6_vmpyiwub_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyiwub_altAlias : InstAlias<"$Vd32 = vmpyiwub($Vu32,$Rt32)", (V6_vmpyiwub HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyowh_altAlias : InstAlias<"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyowh_rnd_altAlias : InstAlias<"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyub_acc_altAlias : InstAlias<"$Vxx32 += vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyub_altAlias : InstAlias<"$Vdd32 = vmpyub($Vu32,$Rt32)", (V6_vmpyub HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyubv_acc_altAlias : InstAlias<"$Vxx32 += vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyubv_altAlias : InstAlias<"$Vdd32 = vmpyub($Vu32,$Vv32)", (V6_vmpyubv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyuh_acc_altAlias : InstAlias<"$Vxx32 += vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc HvxWR:$Vxx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyuh_altAlias : InstAlias<"$Vdd32 = vmpyuh($Vu32,$Rt32)", (V6_vmpyuh HvxWR:$Vdd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vmpyuhv_acc_altAlias : InstAlias<"$Vxx32 += vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc HvxWR:$Vxx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vmpyuhv_altAlias : InstAlias<"$Vdd32 = vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vnavgb_altAlias : InstAlias<"$Vd32 = vnavgb($Vu32,$Vv32)", (V6_vnavgb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vnavgh_altAlias : InstAlias<"$Vd32 = vnavgh($Vu32,$Vv32)", (V6_vnavgh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vnavgub_altAlias : InstAlias<"$Vd32 = vnavgub($Vu32,$Vv32)", (V6_vnavgub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vnavgw_altAlias : InstAlias<"$Vd32 = vnavgw($Vu32,$Vv32)", (V6_vnavgw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vnormamth_altAlias : InstAlias<"$Vd32 = vnormamth($Vu32)", (V6_vnormamth HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vnormamtw_altAlias : InstAlias<"$Vd32 = vnormamtw($Vu32)", (V6_vnormamtw HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vpackeb_altAlias : InstAlias<"$Vd32 = vpackeb($Vu32,$Vv32)", (V6_vpackeb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackeh_altAlias : InstAlias<"$Vd32 = vpackeh($Vu32,$Vv32)", (V6_vpackeh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackhb_sat_altAlias : InstAlias<"$Vd32 = vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackhub_sat_altAlias : InstAlias<"$Vd32 = vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackob_altAlias : InstAlias<"$Vd32 = vpackob($Vu32,$Vv32)", (V6_vpackob HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackoh_altAlias : InstAlias<"$Vd32 = vpackoh($Vu32,$Vv32)", (V6_vpackoh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackwh_sat_altAlias : InstAlias<"$Vd32 = vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpackwuh_sat_altAlias : InstAlias<"$Vd32 = vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vpopcounth_altAlias : InstAlias<"$Vd32 = vpopcounth($Vu32)", (V6_vpopcounth HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vrmpybub_rtt_acc_altAlias : InstAlias<"$Vxx32.w += vrmpy($Vu32.b,$Rtt32.ub)", (V6_vrmpybub_rtt_acc HvxWR:$Vxx32, HvxVR:$Vu32, DoubleRegs:$Rtt32)>, Requires<[UseHVX]>; +def V6_vrmpybub_rtt_altAlias : InstAlias<"$Vdd32.w = vrmpy($Vu32.b,$Rtt32.ub)", (V6_vrmpybub_rtt HvxWR:$Vdd32, HvxVR:$Vu32, DoubleRegs:$Rtt32)>, Requires<[UseHVX]>; +def V6_vrmpybus_acc_altAlias : InstAlias<"$Vx32 += vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vrmpybus_altAlias : InstAlias<"$Vd32 = vrmpybus($Vu32,$Rt32)", (V6_vrmpybus HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vrmpybusi_acc_altAlias : InstAlias<"$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vrmpybusi_altAlias : InstAlias<"$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vrmpybusv_acc_altAlias : InstAlias<"$Vx32 += vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrmpybusv_altAlias : InstAlias<"$Vd32 = vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrmpybv_acc_altAlias : InstAlias<"$Vx32 += vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrmpybv_altAlias : InstAlias<"$Vd32 = vrmpyb($Vu32,$Vv32)", (V6_vrmpybv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrmpyub_acc_altAlias : InstAlias<"$Vx32 += vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc HvxVR:$Vx32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vrmpyub_altAlias : InstAlias<"$Vd32 = vrmpyub($Vu32,$Rt32)", (V6_vrmpyub HvxVR:$Vd32, HvxVR:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vrmpyub_rtt_acc_altAlias : InstAlias<"$Vxx32.uw += vrmpy($Vu32.ub,$Rtt32.ub)", (V6_vrmpyub_rtt_acc HvxWR:$Vxx32, HvxVR:$Vu32, DoubleRegs:$Rtt32)>, Requires<[UseHVX]>; +def V6_vrmpyub_rtt_altAlias : InstAlias<"$Vdd32.uw = vrmpy($Vu32.ub,$Rtt32.ub)", (V6_vrmpyub_rtt HvxWR:$Vdd32, HvxVR:$Vu32, DoubleRegs:$Rtt32)>, Requires<[UseHVX]>; +def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32 += vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc HvxVR:$Vx32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrmpyubv_altAlias : InstAlias<"$Vd32 = vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vroundhb_altAlias : InstAlias<"$Vd32 = vroundhb($Vu32,$Vv32):sat", (V6_vroundhb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vroundhub_altAlias : InstAlias<"$Vd32 = vroundhub($Vu32,$Vv32):sat", (V6_vroundhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrounduhub_altAlias : InstAlias<"$Vd32 = vrounduhub($Vu32,$Vv32):sat", (V6_vrounduhub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrounduwuh_altAlias : InstAlias<"$Vd32 = vrounduwuh($Vu32,$Vv32):sat", (V6_vrounduwuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vroundwh_altAlias : InstAlias<"$Vd32 = vroundwh($Vu32,$Vv32):sat", (V6_vroundwh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vroundwuh_altAlias : InstAlias<"$Vd32 = vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vrsadubi_acc_altAlias : InstAlias<"$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vrsadubi_altAlias : InstAlias<"$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>; +def V6_vsathub_altAlias : InstAlias<"$Vd32 = vsathub($Vu32,$Vv32)", (V6_vsathub HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsatuwuh_altAlias : InstAlias<"$Vd32 = vsatuwuh($Vu32,$Vv32)", (V6_vsatuwuh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsatwh_altAlias : InstAlias<"$Vd32 = vsatwh($Vu32,$Vv32)", (V6_vsatwh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsb_altAlias : InstAlias<"$Vdd32 = vsxtb($Vu32)", (V6_vsb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vscattermh_add_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vv32.h) += $Vw32.h", (V6_vscattermh_add IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermh_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h", (V6_vscattermh IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermhq_altAlias : InstAlias<"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.h) = $Vw32.h", (V6_vscattermhq HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermw_add_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vv32.w) += $Vw32.w", (V6_vscattermw_add IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermw_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w", (V6_vscattermw IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermwh_add_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vvv32.w) += $Vw32.h", (V6_vscattermhw_add IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermwh_altAlias : InstAlias<"vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h", (V6_vscattermhw IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermwhq_altAlias : InstAlias<"if ($Qs4) vscatter($Rt32,$Mu2,$Vvv32.w) = $Vw32.h", (V6_vscattermhwq HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxWR:$Vvv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vscattermwq_altAlias : InstAlias<"if ($Qs4) vscatter($Rt32,$Mu2,$Vv32.w) = $Vw32.w", (V6_vscattermwq HvxQR:$Qs4, IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32, HvxVR:$Vw32)>, Requires<[UseHVX]>; +def V6_vsh_altAlias : InstAlias<"$Vdd32 = vsxth($Vu32)", (V6_vsh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vshufeh_altAlias : InstAlias<"$Vd32 = vshuffeh($Vu32,$Vv32)", (V6_vshufeh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vshuffb_altAlias : InstAlias<"$Vd32 = vshuffb($Vu32)", (V6_vshuffb HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vshuffeb_altAlias : InstAlias<"$Vd32 = vshuffeb($Vu32,$Vv32)", (V6_vshuffeb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vshuffh_altAlias : InstAlias<"$Vd32 = vshuffh($Vu32)", (V6_vshuffh HvxVR:$Vd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vshuffob_altAlias : InstAlias<"$Vd32 = vshuffob($Vu32,$Vv32)", (V6_vshuffob HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vshufoeb_altAlias : InstAlias<"$Vdd32 = vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vshufoeh_altAlias : InstAlias<"$Vdd32 = vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vshufoh_altAlias : InstAlias<"$Vd32 = vshuffoh($Vu32,$Vv32)", (V6_vshufoh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubb_altAlias : InstAlias<"$Vd32 = vsubb($Vu32,$Vv32)", (V6_vsubb HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubb_dv_altAlias : InstAlias<"$Vdd32 = vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b -= $Vu32.b", (V6_vsubbnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b -= $Vu32.b", (V6_vsubbq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubbsat_altAlias : InstAlias<"$Vd32 = vsubb($Vu32,$Vv32):sat", (V6_vsubbsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubbsat_dv_altAlias : InstAlias<"$Vdd32 = vsubb($Vuu32,$Vvv32):sat", (V6_vsubbsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubh_altAlias : InstAlias<"$Vd32 = vsubh($Vu32,$Vv32)", (V6_vsubh HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubh_dv_altAlias : InstAlias<"$Vdd32 = vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h -= $Vu32.h", (V6_vsubhnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h -= $Vu32.h", (V6_vsubhq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubhsat_altAlias : InstAlias<"$Vd32 = vsubh($Vu32,$Vv32):sat", (V6_vsubhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubhsat_dv_altAlias : InstAlias<"$Vdd32 = vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubhw_altAlias : InstAlias<"$Vdd32 = vsubh($Vu32,$Vv32)", (V6_vsubhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsububh_altAlias : InstAlias<"$Vdd32 = vsubub($Vu32,$Vv32)", (V6_vsububh HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsububsat_altAlias : InstAlias<"$Vd32 = vsubub($Vu32,$Vv32):sat", (V6_vsububsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsububsat_dv_altAlias : InstAlias<"$Vdd32 = vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubuhsat_altAlias : InstAlias<"$Vd32 = vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubuhsat_dv_altAlias : InstAlias<"$Vdd32 = vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubuhw_altAlias : InstAlias<"$Vdd32 = vsubuh($Vu32,$Vv32)", (V6_vsubuhw HvxWR:$Vdd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubuwsat_altAlias : InstAlias<"$Vd32 = vsubuw($Vu32,$Vv32):sat", (V6_vsubuwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubuwsat_dv_altAlias : InstAlias<"$Vdd32 = vsubuw($Vuu32,$Vvv32):sat", (V6_vsubuwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubw_altAlias : InstAlias<"$Vd32 = vsubw($Vu32,$Vv32)", (V6_vsubw HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubw_dv_altAlias : InstAlias<"$Vdd32 = vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vsubwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w -= $Vu32.w", (V6_vsubwnq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w -= $Vu32.w", (V6_vsubwq HvxVR:$Vx32, HvxQR:$Qv4, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vsubwsat_altAlias : InstAlias<"$Vd32 = vsubw($Vu32,$Vv32):sat", (V6_vsubwsat HvxVR:$Vd32, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_vsubwsat_dv_altAlias : InstAlias<"$Vdd32 = vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv HvxWR:$Vdd32, HvxWR:$Vuu32, HvxWR:$Vvv32)>, Requires<[UseHVX]>; +def V6_vtmpyb_acc_altAlias : InstAlias<"$Vxx32 += vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vtmpyb_altAlias : InstAlias<"$Vdd32 = vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vtmpybus_acc_altAlias : InstAlias<"$Vxx32 += vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vtmpybus_altAlias : InstAlias<"$Vdd32 = vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vtmpyhb_acc_altAlias : InstAlias<"$Vxx32 += vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc HvxWR:$Vxx32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; +def V6_vtmpyhb_altAlias : InstAlias<"$Vdd32 = vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb HvxWR:$Vdd32, HvxWR:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>; def V6_vtran2x2_mapAlias : InstAlias<"vtrans2x2($Vy32,$Vx32,$Rt32)", (V6_vshuff HvxVR:$Vy32, HvxVR:$Vx32, IntRegs:$Rt32)>, Requires<[UseHVX]>; -def V6_vunpackb_altAlias : InstAlias<"$Vdd32=vunpackb($Vu32)", (V6_vunpackb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vunpackh_altAlias : InstAlias<"$Vdd32=vunpackh($Vu32)", (V6_vunpackh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vunpackoh_altAlias : InstAlias<"$Vxx32|=vunpackoh($Vu32)", (V6_vunpackoh HvxWR:$Vxx32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vunpackub_altAlias : InstAlias<"$Vdd32=vunpackub($Vu32)", (V6_vunpackub HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vunpackuh_altAlias : InstAlias<"$Vdd32=vunpackuh($Vu32)", (V6_vunpackuh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vzb_altAlias : InstAlias<"$Vdd32=vzxtb($Vu32)", (V6_vzb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; -def V6_vzh_altAlias : InstAlias<"$Vdd32=vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vunpackb_altAlias : InstAlias<"$Vdd32 = vunpackb($Vu32)", (V6_vunpackb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vunpackh_altAlias : InstAlias<"$Vdd32 = vunpackh($Vu32)", (V6_vunpackh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vunpackoh_altAlias : InstAlias<"$Vxx32 |= vunpackoh($Vu32)", (V6_vunpackoh HvxWR:$Vxx32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vunpackub_altAlias : InstAlias<"$Vdd32 = vunpackub($Vu32)", (V6_vunpackub HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vunpackuh_altAlias : InstAlias<"$Vdd32 = vunpackuh($Vu32)", (V6_vunpackuh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vzb_altAlias : InstAlias<"$Vdd32 = vzxtb($Vu32)", (V6_vzb HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; +def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>; diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td index 0e83b2678732..9d960953f8f5 100644 --- a/lib/Target/Hexagon/HexagonDepOperands.td +++ b/lib/Target/Hexagon/HexagonDepOperands.td @@ -1,4 +1,4 @@ -//===--- HexagonDepOperands.td --------------------------------------------===// +//===- HexagonDepOperands.td ----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + -def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } -def s3_0Imm : Operand { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } -def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; } def s4_0Imm : Operand { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; } def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>; @@ -61,6 +61,9 @@ def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtVal def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; } def s31_1Imm : Operand { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; } def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>; +def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; } +def s3_0Imm : Operand { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; } +def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>; def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; } def s30_2Imm : Operand { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; } def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>; diff --git a/lib/Target/Hexagon/HexagonDepTimingClasses.h b/lib/Target/Hexagon/HexagonDepTimingClasses.h index 2a3fb832733b..656c83f2d0c4 100644 --- a/lib/Target/Hexagon/HexagonDepTimingClasses.h +++ b/lib/Target/Hexagon/HexagonDepTimingClasses.h @@ -1,4 +1,4 @@ -//===--- HexagonDepTimingClasses.h ----------------------------------------===// +//===- HexagonDepTimingClasses.h ------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// Automatically generated file, please consult code owner before editing. +//===----------------------------------------------------------------------===// + + + #ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H #define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H @@ -15,21 +20,19 @@ namespace llvm { inline bool is_TC3x(unsigned SchedClass) { switch (SchedClass) { - case Hexagon::Sched::tc_1000eb10: - case Hexagon::Sched::tc_2aaab1e0: - case Hexagon::Sched::tc_4997da4a: - case Hexagon::Sched::tc_5d806107: - case Hexagon::Sched::tc_6264c5e0: - case Hexagon::Sched::tc_69bb508b: - case Hexagon::Sched::tc_8c8041e6: - case Hexagon::Sched::tc_8cb685d9: - case Hexagon::Sched::tc_a12a5971: - case Hexagon::Sched::tc_ae0722f7: - case Hexagon::Sched::tc_ae2c2dc2: - case Hexagon::Sched::tc_bc5561d8: - case Hexagon::Sched::tc_d6a805a8: - case Hexagon::Sched::tc_f055fbb6: - case Hexagon::Sched::tc_feb4974b: + case Hexagon::Sched::tc_16d0d8d5: + case Hexagon::Sched::tc_1853ea6d: + case Hexagon::Sched::tc_60571023: + case Hexagon::Sched::tc_7934b9df: + case Hexagon::Sched::tc_8fd5f294: + case Hexagon::Sched::tc_b9c0b731: + case Hexagon::Sched::tc_bcc96cee: + case Hexagon::Sched::tc_c6ce9b3f: + case Hexagon::Sched::tc_c6ebf8dd: + case Hexagon::Sched::tc_c82dc1ff: + case Hexagon::Sched::tc_caaebcba: + case Hexagon::Sched::tc_cf59f215: + case Hexagon::Sched::tc_e913dc32: return true; default: return false; @@ -38,8 +41,8 @@ inline bool is_TC3x(unsigned SchedClass) { inline bool is_TC2early(unsigned SchedClass) { switch (SchedClass) { - case Hexagon::Sched::tc_35fb9d13: - case Hexagon::Sched::tc_cbe45117: + case Hexagon::Sched::tc_14cd4cfa: + case Hexagon::Sched::tc_2a160009: return true; default: return false; @@ -48,12 +51,12 @@ inline bool is_TC2early(unsigned SchedClass) { inline bool is_TC4x(unsigned SchedClass) { switch (SchedClass) { - case Hexagon::Sched::tc_09c86199: - case Hexagon::Sched::tc_2d1e6f5c: - case Hexagon::Sched::tc_2e55aa16: - case Hexagon::Sched::tc_3bea1824: - case Hexagon::Sched::tc_e836c161: - case Hexagon::Sched::tc_f1aa2cdb: + case Hexagon::Sched::tc_038a1342: + case Hexagon::Sched::tc_4d99bca9: + case Hexagon::Sched::tc_6792d5ff: + case Hexagon::Sched::tc_9c00ce8d: + case Hexagon::Sched::tc_d580173f: + case Hexagon::Sched::tc_f3eaa14b: return true; default: return false; @@ -62,30 +65,23 @@ inline bool is_TC4x(unsigned SchedClass) { inline bool is_TC2(unsigned SchedClass) { switch (SchedClass) { - case Hexagon::Sched::tc_090485bb: - case Hexagon::Sched::tc_1fe8323c: - case Hexagon::Sched::tc_37326008: - case Hexagon::Sched::tc_3c10f809: - case Hexagon::Sched::tc_47ab9233: - case Hexagon::Sched::tc_485bb57c: - case Hexagon::Sched::tc_511f28f6: - case Hexagon::Sched::tc_583510c7: - case Hexagon::Sched::tc_63cd9d2d: - case Hexagon::Sched::tc_76c4c5ef: - case Hexagon::Sched::tc_7ca2ea10: - case Hexagon::Sched::tc_87601822: - case Hexagon::Sched::tc_88fa2da6: - case Hexagon::Sched::tc_94e6ffd9: - case Hexagon::Sched::tc_ab1b5e74: - case Hexagon::Sched::tc_b0f50e3c: - case Hexagon::Sched::tc_bd16579e: - case Hexagon::Sched::tc_c0cd91a8: - case Hexagon::Sched::tc_ca280e8b: - case Hexagon::Sched::tc_cd321066: - case Hexagon::Sched::tc_d95f4e98: - case Hexagon::Sched::tc_e17ce9ad: - case Hexagon::Sched::tc_f1240c08: - case Hexagon::Sched::tc_faab1248: + case Hexagon::Sched::tc_00afc57e: + case Hexagon::Sched::tc_1b9c9ee5: + case Hexagon::Sched::tc_234a11a5: + case Hexagon::Sched::tc_2b6f77c6: + case Hexagon::Sched::tc_41d5298e: + case Hexagon::Sched::tc_5ba5997d: + case Hexagon::Sched::tc_84df2cd3: + case Hexagon::Sched::tc_87735c3b: + case Hexagon::Sched::tc_897d1a9d: + case Hexagon::Sched::tc_976ddc4f: + case Hexagon::Sched::tc_b44c6e2a: + case Hexagon::Sched::tc_b9c4623f: + case Hexagon::Sched::tc_c2f7d806: + case Hexagon::Sched::tc_c74f796f: + case Hexagon::Sched::tc_d088982c: + case Hexagon::Sched::tc_ef84f62f: + case Hexagon::Sched::tc_f49e76f4: return true; default: return false; @@ -94,43 +90,45 @@ inline bool is_TC2(unsigned SchedClass) { inline bool is_TC1(unsigned SchedClass) { switch (SchedClass) { - case Hexagon::Sched::tc_07ac815d: - case Hexagon::Sched::tc_1b6011fb: - case Hexagon::Sched::tc_1b834fe7: - case Hexagon::Sched::tc_1e062b18: - case Hexagon::Sched::tc_1f9668cc: - case Hexagon::Sched::tc_43068634: - case Hexagon::Sched::tc_47f0b7ad: - case Hexagon::Sched::tc_537e2013: - case Hexagon::Sched::tc_548f402d: - case Hexagon::Sched::tc_5fa2857c: - case Hexagon::Sched::tc_5fe9fcd0: - case Hexagon::Sched::tc_78b3c689: - case Hexagon::Sched::tc_7c2dcd4d: - case Hexagon::Sched::tc_81a23d44: - case Hexagon::Sched::tc_821c4233: - case Hexagon::Sched::tc_92d1833c: - case Hexagon::Sched::tc_9a13af9d: - case Hexagon::Sched::tc_9c18c9a5: - case Hexagon::Sched::tc_9df8b0dc: - case Hexagon::Sched::tc_9f518242: - case Hexagon::Sched::tc_a1fb80e1: - case Hexagon::Sched::tc_a333d2a9: - case Hexagon::Sched::tc_a87879e8: - case Hexagon::Sched::tc_aad55963: - case Hexagon::Sched::tc_b08b653e: - case Hexagon::Sched::tc_b324366f: - case Hexagon::Sched::tc_b5bfaa60: - case Hexagon::Sched::tc_b86c7e8b: - case Hexagon::Sched::tc_c58f771a: - case Hexagon::Sched::tc_d108a090: - case Hexagon::Sched::tc_d1b5a4b6: - case Hexagon::Sched::tc_d2609065: - case Hexagon::Sched::tc_d63b71d1: - case Hexagon::Sched::tc_e2c31426: - case Hexagon::Sched::tc_e8c7a357: - case Hexagon::Sched::tc_eb07ef6f: - case Hexagon::Sched::tc_f16d5b17: + case Hexagon::Sched::tc_181af5d0: + case Hexagon::Sched::tc_1b82a277: + case Hexagon::Sched::tc_1e856f58: + case Hexagon::Sched::tc_351fed2d: + case Hexagon::Sched::tc_3669266a: + case Hexagon::Sched::tc_3cb8ea06: + case Hexagon::Sched::tc_452f85af: + case Hexagon::Sched::tc_481e5e5c: + case Hexagon::Sched::tc_49eb22c8: + case Hexagon::Sched::tc_523fcf30: + case Hexagon::Sched::tc_52d7bbea: + case Hexagon::Sched::tc_53bc8a6a: + case Hexagon::Sched::tc_540fdfbc: + case Hexagon::Sched::tc_55050d58: + case Hexagon::Sched::tc_609d2efe: + case Hexagon::Sched::tc_68cb12ce: + case Hexagon::Sched::tc_6ebb4a12: + case Hexagon::Sched::tc_6efc556e: + case Hexagon::Sched::tc_73043bf4: + case Hexagon::Sched::tc_7a830544: + case Hexagon::Sched::tc_855b0b61: + case Hexagon::Sched::tc_8fe6b782: + case Hexagon::Sched::tc_90f3e30c: + case Hexagon::Sched::tc_97743097: + case Hexagon::Sched::tc_99be14ca: + case Hexagon::Sched::tc_9faf76ae: + case Hexagon::Sched::tc_a46f0df5: + case Hexagon::Sched::tc_a904d137: + case Hexagon::Sched::tc_b9488031: + case Hexagon::Sched::tc_be706f30: + case Hexagon::Sched::tc_c6aa82f7: + case Hexagon::Sched::tc_cde8b071: + case Hexagon::Sched::tc_d6bf0472: + case Hexagon::Sched::tc_dbdffe3d: + case Hexagon::Sched::tc_e0739b8c: + case Hexagon::Sched::tc_e1e99bfa: + case Hexagon::Sched::tc_e9fae2d6: + case Hexagon::Sched::tc_f2704b9a: + case Hexagon::Sched::tc_f8eeed7a: return true; default: return false; diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index bec759a826d9..0f1b9a4733c5 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -25,39 +25,39 @@ // // Example: // -// %vreg40 = L2_loadrub_io %vreg39, 1 -// %vreg41 = S2_tstbit_i %vreg40, 0 -// J2_jumpt %vreg41, , %PC -// J2_jump , %PC -// Successors according to CFG: BB#4(62) BB#5(62) +// %40 = L2_loadrub_io killed %39, 1 +// %41 = S2_tstbit_i killed %40, 0 +// J2_jumpt killed %41, <%bb.5>, implicit dead %pc +// J2_jump <%bb.4>, implicit dead %pc +// Successors according to CFG: %bb.4(62) %bb.5(62) // -// BB#4: derived from LLVM BB %if.then -// Predecessors according to CFG: BB#3 -// %vreg11 = A2_addp %vreg6, %vreg10 -// S2_storerd_io %vreg32, 16, %vreg11 -// Successors according to CFG: BB#5 +// %bb.4: derived from LLVM BB %if.then +// Predecessors according to CFG: %bb.3 +// %11 = A2_addp %6, %10 +// S2_storerd_io %32, 16, %11 +// Successors according to CFG: %bb.5 // -// BB#5: derived from LLVM BB %if.end -// Predecessors according to CFG: BB#3 BB#4 -// %vreg12 = PHI %vreg6, , %vreg11, -// %vreg13 = A2_addp %vreg7, %vreg12 -// %vreg42 = C2_cmpeqi %vreg9, 10 -// J2_jumpf %vreg42, , %PC -// J2_jump , %PC -// Successors according to CFG: BB#6(4) BB#3(124) +// %bb.5: derived from LLVM BB %if.end +// Predecessors according to CFG: %bb.3 %bb.4 +// %12 = PHI %6, <%bb.3>, %11, <%bb.4> +// %13 = A2_addp %7, %12 +// %42 = C2_cmpeqi %9, 10 +// J2_jumpf killed %42, <%bb.3>, implicit dead %pc +// J2_jump <%bb.6>, implicit dead %pc +// Successors according to CFG: %bb.6(4) %bb.3(124) // // would become: // -// %vreg40 = L2_loadrub_io %vreg39, 1 -// %vreg41 = S2_tstbit_i %vreg40, 0 -// spec-> %vreg11 = A2_addp %vreg6, %vreg10 -// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11 -// %vreg46 = PS_pselect %vreg41, %vreg6, %vreg11 -// %vreg13 = A2_addp %vreg7, %vreg46 -// %vreg42 = C2_cmpeqi %vreg9, 10 -// J2_jumpf %vreg42, , %PC -// J2_jump , %PC -// Successors according to CFG: BB#6 BB#3 +// %40 = L2_loadrub_io killed %39, 1 +// %41 = S2_tstbit_i killed %40, 0 +// spec-> %11 = A2_addp %6, %10 +// pred-> S2_pstorerdf_io %41, %32, 16, %11 +// %46 = PS_pselect %41, %6, %11 +// %13 = A2_addp %7, %46 +// %42 = C2_cmpeqi %9, 10 +// J2_jumpf killed %42, <%bb.3>, implicit dead %pc +// J2_jump <%bb.6>, implicit dead %pc +// Successors according to CFG: %bb.6 %bb.3 #include "Hexagon.h" #include "HexagonInstrInfo.h" @@ -238,7 +238,7 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, FlowPattern &FP) { - DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n"); + DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B) << "\n"); // Interested only in conditional branches, no .new, no new-value, etc. // Check the terminators directly, it's easier than handling all responses @@ -1047,7 +1047,7 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { } bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; auto &ST = MF.getSubtarget(); diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 51c3b7843700..c2feaf5737b2 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -17,33 +17,33 @@ // // Liveness tracking aside, the main functionality of this pass is divided // into two steps. The first step is to replace an instruction -// vreg0 = C2_mux vreg1, vreg2, vreg3 +// %0 = C2_mux %1, %2, %3 // with a pair of conditional transfers -// vreg0 = A2_tfrt vreg1, vreg2 -// vreg0 = A2_tfrf vreg1, vreg3 +// %0 = A2_tfrt %1, %2 +// %0 = A2_tfrf %1, %3 // It is the intention that the execution of this pass could be terminated // after this step, and the code generated would be functionally correct. // -// If the uses of the source values vreg1 and vreg2 are kills, and their +// If the uses of the source values %1 and %2 are kills, and their // definitions are predicable, then in the second step, the conditional // transfers will then be rewritten as predicated instructions. E.g. -// vreg0 = A2_or vreg1, vreg2 -// vreg3 = A2_tfrt vreg99, vreg0 +// %0 = A2_or %1, %2 +// %3 = A2_tfrt %99, killed %0 // will be rewritten as -// vreg3 = A2_port vreg99, vreg1, vreg2 +// %3 = A2_port %99, %1, %2 // // This replacement has two variants: "up" and "down". Consider this case: -// vreg0 = A2_or vreg1, vreg2 +// %0 = A2_or %1, %2 // ... [intervening instructions] ... -// vreg3 = A2_tfrt vreg99, vreg0 +// %3 = A2_tfrt %99, killed %0 // variant "up": -// vreg3 = A2_port vreg99, vreg1, vreg2 -// ... [intervening instructions, vreg0->vreg3] ... +// %3 = A2_port %99, %1, %2 +// ... [intervening instructions, %0->vreg3] ... // [deleted] // variant "down": // [deleted] // ... [intervening instructions] ... -// vreg3 = A2_port vreg99, vreg1, vreg2 +// %3 = A2_port %99, %1, %2 // // Both, one or none of these variants may be valid, and checks are made // to rule out inapplicable variants. @@ -51,13 +51,13 @@ // As an additional optimization, before either of the two steps above is // executed, the pass attempts to coalesce the target register with one of // the source registers, e.g. given an instruction -// vreg3 = C2_mux vreg0, vreg1, vreg2 -// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds, +// %3 = C2_mux %0, %1, %2 +// %3 will be coalesced with either %1 or %2. If this succeeds, // the instruction would then be (for example) -// vreg3 = C2_mux vreg0, vreg3, vreg2 +// %3 = C2_mux %0, %3, %2 // and, under certain circumstances, this could result in only one predicated // instruction: -// vreg3 = A2_tfrf vreg0, vreg2 +// %3 = A2_tfrf %0, %2 // // Splitting a definition of a register into two predicated transfers @@ -65,18 +65,18 @@ // will see both instructions as actual definitions, and will mark the // first one as dead. The definition is not actually dead, and this // situation will need to be fixed. For example: -// vreg1 = A2_tfrt ... ; marked as dead -// vreg1 = A2_tfrf ... +// dead %1 = A2_tfrt ... ; marked as dead +// %1 = A2_tfrf ... // // Since any of the individual predicated transfers may end up getting // removed (in case it is an identity copy), some pre-existing def may // be marked as dead after live interval recomputation: -// vreg1 = ... ; marked as dead +// dead %1 = ... ; marked as dead // ... -// vreg1 = A2_tfrf ... ; if A2_tfrt is removed -// This case happens if vreg1 was used as a source in A2_tfrt, which means +// %1 = A2_tfrf ... ; if A2_tfrt is removed +// This case happens if %1 was used as a source in A2_tfrt, which means // that is it actually live at the A2_tfrf, and so the now dead definition -// of vreg1 will need to be updated to non-dead at some point. +// of %1 will need to be updated to non-dead at some point. // // This issue could be remedied by adding implicit uses to the predicated // transfers, but this will create a problem with subsequent predication, @@ -93,7 +93,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -654,7 +654,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, return false; TfrCounter++; } - DEBUG(dbgs() << "\nsplitting BB#" << MI.getParent()->getNumber() << ": " + DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) << ": " << MI); MachineOperand &MD = MI.getOperand(0); // Definition MachineOperand &MP = MI.getOperand(1); // Predicate register @@ -760,8 +760,8 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD, if (RR.Reg != RD.Reg) continue; // If the "Reg" part agrees, there is still the subregister to check. - // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but - // not vreg1 (w/o subregisters). + // If we are looking for %1:loreg, we can skip %1:hireg, but + // not %1 (w/o subregisters). if (RR.Sub == RD.Sub) return MI; if (RR.Sub == 0 || RD.Sub == 0) @@ -1071,7 +1071,7 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); if (!Done) { // If we didn't predicate I, we may need to remove it in case it is - // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1. + // an "identity" copy, e.g. %1 = A2_tfrt %2, %1. if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) { for (auto &Op : I->operands()) if (Op.isReg()) @@ -1198,18 +1198,18 @@ bool HexagonExpandCondsets::coalesceSegments( MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3); bool Done = false; // Consider this case: - // vreg1 = instr1 ... - // vreg2 = instr2 ... - // vreg0 = C2_mux ..., vreg1, vreg2 - // If vreg0 was coalesced with vreg1, we could end up with the following + // %1 = instr1 ... + // %2 = instr2 ... + // %0 = C2_mux ..., %1, %2 + // If %0 was coalesced with %1, we could end up with the following // code: - // vreg0 = instr1 ... - // vreg2 = instr2 ... - // vreg0 = A2_tfrf ..., vreg2 + // %0 = instr1 ... + // %2 = instr2 ... + // %0 = A2_tfrf ..., %2 // which will later become: - // vreg0 = instr1 ... - // vreg0 = instr2_cNotPt ... - // i.e. there will be an unconditional definition (instr1) of vreg0 + // %0 = instr1 ... + // %0 = instr2_cNotPt ... + // i.e. there will be an unconditional definition (instr1) of %0 // followed by a conditional one. The output dependency was there before // and it unavoidable, but if instr1 is predicable, we will no longer be // able to predicate it here. @@ -1243,7 +1243,7 @@ bool HexagonExpandCondsets::coalesceSegments( } bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; HII = static_cast(MF.getSubtarget().getInstrInfo()); @@ -1253,7 +1253,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", - MF.getFunction()->getParent())); + MF.getFunction().getParent())); bool Changed = false; std::set CoalUpd, PredUpd; @@ -1281,7 +1281,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { KillUpd.insert(Op.getReg()); updateLiveness(KillUpd, false, true, false); DEBUG(LIS->print(dbgs() << "After coalescing\n", - MF.getFunction()->getParent())); + MF.getFunction().getParent())); // First, simply split all muxes into a pair of conditional transfers // and update the live intervals to reflect the new arrangement. The @@ -1298,7 +1298,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { // (because of predicated defs), so make sure they are left untouched. // Predication does not use live intervals. DEBUG(LIS->print(dbgs() << "After splitting\n", - MF.getFunction()->getParent())); + MF.getFunction().getParent())); // Traverse all blocks and collapse predicable instructions feeding // conditional transfers into predicated instructions. @@ -1307,7 +1307,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { for (auto &B : MF) Changed |= predicateInBlock(B, PredUpd); DEBUG(LIS->print(dbgs() << "After predicating\n", - MF.getFunction()->getParent())); + MF.getFunction().getParent())); PredUpd.insert(CoalUpd.begin(), CoalUpd.end()); updateLiveness(PredUpd, true, true, true); @@ -1315,7 +1315,7 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { DEBUG({ if (Changed) LIS->print(dbgs() << "After expand-condsets\n", - MF.getFunction()->getParent()); + MF.getFunction().getParent()); }); return Changed; diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 6336075917e5..a842b672736c 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -89,7 +89,7 @@ static bool isHardwareLoop(const MachineInstr &MI) { } bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; return fixupLoopInstrs(MF); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index ebb7add82e16..65a2fc35b11b 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -225,7 +225,7 @@ namespace { bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { auto &HFI = *MF.getSubtarget().getFrameLowering(); bool NeedCFI = MF.getMMI().hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); + MF.getFunction().needsUnwindTableEntry(); if (!NeedCFI) return false; @@ -336,6 +336,8 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, /// in the block. static bool hasTailCall(const MachineBasicBlock &MBB) { MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; unsigned RetOpc = I->getOpcode(); return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r; } @@ -373,17 +375,17 @@ static bool isRestoreCall(unsigned Opc) { } static inline bool isOptNone(const MachineFunction &MF) { - return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) || + return MF.getFunction().hasFnAttribute(Attribute::OptimizeNone) || MF.getTarget().getOptLevel() == CodeGenOpt::None; } static inline bool isOptSize(const MachineFunction &MF) { - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); return F.optForSize() && !F.optForMinSize(); } static inline bool isMinSize(const MachineFunction &MF) { - return MF.getFunction()->optForMinSize(); + return MF.getFunction().optForMinSize(); } /// Implements shrink-wrapping of the stack frame. By default, stack frame @@ -443,7 +445,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, DEBUG({ dbgs() << "Blocks needing SF: {"; for (auto &B : SFBlocks) - dbgs() << " BB#" << B->getNumber(); + dbgs() << " " << printMBBReference(*B); dbgs() << " }\n"; }); // No frame needed? @@ -464,12 +466,16 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, break; } DEBUG({ - dbgs() << "Computed dom block: BB#"; - if (DomB) dbgs() << DomB->getNumber(); - else dbgs() << ""; - dbgs() << ", computed pdom block: BB#"; - if (PDomB) dbgs() << PDomB->getNumber(); - else dbgs() << ""; + dbgs() << "Computed dom block: "; + if (DomB) + dbgs() << printMBBReference(*DomB); + else + dbgs() << ""; + dbgs() << ", computed pdom block: "; + if (PDomB) + dbgs() << printMBBReference(*PDomB); + else + dbgs() << ""; dbgs() << "\n"; }); if (!DomB || !PDomB) @@ -632,7 +638,9 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // Handle EH_RETURN. if (RetOpc == Hexagon::EH_RETURN_JMPR) { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) + .addDef(Hexagon::D15) + .addReg(Hexagon::R30); BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP) .addReg(SP) .addReg(Hexagon::R28); @@ -678,11 +686,15 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // otherwise just add deallocframe. The function could be returning via a // tail call. if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) + .addDef(Hexagon::D15) + .addReg(Hexagon::R30); return; } unsigned NewOpc = Hexagon::L4_return; - MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)); + MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)) + .addDef(Hexagon::D15) + .addReg(Hexagon::R30); // Transfer the function live-out registers. NewI->copyImplicitOps(MF, *RetI); MBB.erase(RetI); @@ -705,10 +717,13 @@ void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, MachineMemOperand::MOStore, 4, 4); DebugLoc dl = MBB.findDebugLoc(InsertPt); + unsigned SP = HRI.getStackRegister(); if (NumBytes >= ALLOCFRAME_MAX) { // Emit allocframe(#0). BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addDef(SP) + .addReg(SP) .addImm(0) .addMemOperand(MMO); @@ -719,6 +734,8 @@ void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, .addImm(-int(NumBytes)); } else { BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addDef(SP) + .addReg(SP) .addImm(NumBytes) .addMemOperand(MMO); } @@ -943,7 +960,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, } bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return false; auto &MFI = MF.getFrameInfo(); @@ -1379,8 +1396,7 @@ static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector &CSI) const { - DEBUG(dbgs() << __func__ << " on " - << MF.getFunction()->getName() << '\n'); + DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector SRegs(Hexagon::NUM_TARGET_REGS); @@ -2010,7 +2026,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, auto P = BlockIndexes.insert( std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B))); auto &IndexMap = P.first->second; - DEBUG(dbgs() << "Index map for BB#" << B.getNumber() << "\n" + DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n" << IndexMap << '\n'); for (auto &In : B) { @@ -2129,7 +2145,8 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, else dbgs() << "\n"; for (auto &R : P.second.Map) - dbgs() << " BB#" << R.first->getNumber() << " { " << R.second << "}\n"; + dbgs() << " " << printMBBReference(*R.first) << " { " << R.second + << "}\n"; } }); @@ -2162,7 +2179,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, auto &FIs = P.second; if (FIs.empty()) continue; - dbgs() << " BB#" << P.first->getNumber() << ": {"; + dbgs() << " " << printMBBReference(*P.first) << ": {"; for (auto I : FIs) { dbgs() << " fi#" << I; if (LoxFIs.count(I)) @@ -2183,7 +2200,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, HexagonBlockRanges::InstrIndexMap &IM = F->second; HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); - DEBUG(dbgs() << "BB#" << B.getNumber() << " dead map\n" + DEBUG(dbgs() << printMBBReference(B) << " dead map\n" << HexagonBlockRanges::PrintRangeMap(DM, HRI)); for (auto FI : BlockFIMap[&B]) { diff --git a/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/lib/Target/Hexagon/HexagonGatherPacketize.cpp new file mode 100644 index 000000000000..253f09d12839 --- /dev/null +++ b/lib/Target/Hexagon/HexagonGatherPacketize.cpp @@ -0,0 +1,104 @@ +//===- HexagonGatherPacketize.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass ensures that producer and consumer of VTMP are paired in a bundle. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "gather-packetize" + +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +cl::opt EnableGatherPacketize( + "hexagon-enable-gather-packetize", cl::Hidden, cl::init(true), + cl::desc("Generate gather packets before packetization")); + +namespace llvm { +FunctionPass *createHexagonGatherPacketize(); +void initializeHexagonGatherPacketizePass(PassRegistry &); +} + +namespace { +class HexagonGatherPacketize : public MachineFunctionPass { +public: + static char ID; + HexagonGatherPacketize() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeHexagonGatherPacketizePass(Registry); + } + + StringRef getPassName() const override { + return "Hexagon Gather Packetize Code"; + } + bool runOnMachineFunction(MachineFunction &Fn) override; +}; + +char HexagonGatherPacketize::ID = 0; + +static inline bool isVtmpDef(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) + if (MO.isReg() && MO.isDef() && MO.isImplicit() && + (MO.getReg() == Hexagon::VTMP)) { + return true; + } + return false; +} + +static inline bool isVtmpUse(const MachineInstr &MI) { + return (MI.mayStore() && (MI.getOperand(2)).isReg() && + ((MI.getOperand(2)).getReg() == Hexagon::VTMP)); +} + +bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) { + if (!EnableGatherPacketize) + return false; + auto &ST = Fn.getSubtarget(); + bool HasV65 = ST.hasV65TOps(); + bool UseHVX = ST.useHVXOps(); + if (!(HasV65 & UseHVX)) + return false; + + for (auto &MBB : Fn) { + bool VtmpDef = false; + MachineBasicBlock::iterator MII, MIE, DefMII; + for (MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) { + MachineInstr &MI = *MII; + if (VtmpDef) { + if (!isVtmpUse(MI)) + continue; + MBB.splice(std::next(DefMII), &MBB, MII); + finalizeBundle(MBB, DefMII.getInstrIterator(), + std::next(MII).getInstrIterator()); + VtmpDef = false; + continue; + } + if (!(isVtmpDef(MI))) + continue; + VtmpDef = true; + DefMII = MII; + } + assert(!VtmpDef && "VTMP producer and consumer not in same block"); + } + return true; +} +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +INITIALIZE_PASS(HexagonGatherPacketize, "hexagon-gather-packetize", + "Hexagon gather packetize Code", false, false) + +FunctionPass *llvm::createHexagonGatherPacketize() { + return new HexagonGatherPacketize(); +} diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 09d3e6d4a154..9fb7d26598a7 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -55,6 +55,12 @@ static cl::opt VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " "generation.")); +// Limit the container sizes for extreme cases where we run out of memory. +static cl::opt MaxORLSize("insert-max-orl", cl::init(4096), + cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); +static cl::opt MaxIFMSize("insert-max-ifmap", cl::init(1024), + cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); + static cl::opt OptTiming("insert-timing", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); static cl::opt OptTimingDetail("insert-timing-detail", cl::init(false), @@ -86,6 +92,7 @@ namespace { struct RegisterSet : private BitVector { RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} using BitVector::clear; @@ -370,9 +377,11 @@ namespace { class OrderedRegisterList { using ListType = std::vector; + const unsigned MaxSize; public: - OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} + OrderedRegisterList(const RegisterOrdering &RO) + : MaxSize(MaxORLSize), Ord(RO) {} void insert(unsigned VR); void remove(unsigned VR); @@ -433,12 +442,17 @@ void OrderedRegisterList::insert(unsigned VR) { Seq.push_back(VR); else Seq.insert(L, VR); + + unsigned S = Seq.size(); + if (S > MaxSize) + Seq.resize(MaxSize); + assert(Seq.size() <= MaxSize); } void OrderedRegisterList::remove(unsigned VR) { iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); - assert(L != Seq.end()); - Seq.erase(L); + if (L != Seq.end()) + Seq.erase(L); } namespace { @@ -915,7 +929,7 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR, void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs) { if (isDebug()) - dbgs() << "visiting block BB#" << B->getNumber() << "\n"; + dbgs() << "visiting block " << printMBBReference(*B) << "\n"; // First, check if this block is reachable at all. If not, the bit tracker // will not have any information about registers in it. @@ -950,6 +964,9 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, continue; findRecordInsertForms(VR, AVs); + // Stop if the map size is too large. + if (IFMap.size() > MaxIFMSize) + return; } } @@ -1106,10 +1123,10 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) { // Now, remove those whose sets of potentially removable registers are // contained in another IF candidate for VR. For example, given these - // candidates for vreg45, - // %vreg45: - // (%vreg44,%vreg41,#9,#8), { %vreg42 } - // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 } + // candidates for %45, + // %45: + // (%44,%41,#9,#8), { %42 } + // (%43,%41,#9,#8), { %42 %44 } // remove the first one, since it is contained in the second one. for (unsigned i = 0, n = LL.size(); i < n; ) { const RegisterSet &RMi = LL[i].second; @@ -1482,7 +1499,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { } bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index dc1cdc8d0967..5a001d6ed9c1 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -368,7 +368,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { } bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index 4eb24e07be4b..9288ed03d4d2 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -492,7 +492,7 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { } bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 5ca8b0f30e01..715fd52f3acd 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -377,7 +377,7 @@ FunctionPass *llvm::createHexagonHardwareLoops() { bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool Changed = false; @@ -1011,7 +1011,7 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const { const std::vector &Blocks = L->getBlocks(); - DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; for (MachineBasicBlock::iterator @@ -1367,7 +1367,7 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, LoopFeederMap &LoopFeederPhi) const { if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { const std::vector &Blocks = L->getBlocks(); - DEBUG(dbgs() << "\nhw_loop head, BB#" << Blocks[0]->getNumber();); + DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); // Ignore all BBs that form Loop. for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; @@ -1622,8 +1622,8 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { RegisterInductionSet IndRegs; // Look for induction patterns: - // vreg1 = PHI ..., [ latch, vreg2 ] - // vreg2 = ADD vreg1, imm + // %1 = PHI ..., [ latch, %2 ] + // %2 = ADD %1, imm using instr_iterator = MachineBasicBlock::instr_iterator; for (instr_iterator I = Header->instr_begin(), E = Header->instr_end(); @@ -1720,7 +1720,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { MachineOperand &MO = PredDef->getOperand(i); if (MO.isReg()) { // Skip all implicit references. In one case there was: - // %vreg140 = FCMPUGT32_rr %vreg138, %vreg139, %USR + // %140 = FCMPUGT32_rr %138, %139, implicit %usr if (MO.isImplicit()) continue; if (MO.isUse()) { diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td index 1493d52f08e8..a804c5a80d03 100644 --- a/lib/Target/Hexagon/HexagonIICHVX.td +++ b/lib/Target/Hexagon/HexagonIICHVX.td @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +def CVI_GATHER_PSEUDO : InstrItinClass; def CVI_VA : InstrItinClass; class HVXItin { @@ -14,5 +15,14 @@ class HVXItin { InstrItinData, InstrStage<1, [CVI_XLANE,CVI_SHIFT, CVI_MPY0, CVI_MPY1]>], - [9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>]; + [9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + + // Used by Gather Pseudo Instructions which are expanded into + // V6_vgather* and V6_vS32b_new_ai. Even though these instructions + // use CVI_ST resource, it's not included below to avoid having more than + // 4 InstrStages and thus changing 'MaxResTerms' to 5. + InstrItinData , + InstrStage<1, [CVI_LD], 0>, InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>]>]; } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 2551fe5a1406..a6ac4e3df745 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -641,6 +641,27 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } + + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::hexagon_V6_vgathermw || + IntNo == Intrinsic::hexagon_V6_vgathermw_128B || + IntNo == Intrinsic::hexagon_V6_vgathermh || + IntNo == Intrinsic::hexagon_V6_vgathermh_128B || + IntNo == Intrinsic::hexagon_V6_vgathermhw || + IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) { + SelectV65Gather(N); + return; + } + if (IntNo == Intrinsic::hexagon_V6_vgathermwq || + IntNo == Intrinsic::hexagon_V6_vgathermwq_128B || + IntNo == Intrinsic::hexagon_V6_vgathermhq || + IntNo == Intrinsic::hexagon_V6_vgathermhq_128B || + IntNo == Intrinsic::hexagon_V6_vgathermhwq || + IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) { + SelectV65GatherPred(N); + return; + } + SelectCode(N); } @@ -654,6 +675,12 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { case Intrinsic::hexagon_S2_vsplatrh: Bits = 16; break; + case Intrinsic::hexagon_V6_vaddcarry: + case Intrinsic::hexagon_V6_vaddcarry_128B: + case Intrinsic::hexagon_V6_vsubcarry: + case Intrinsic::hexagon_V6_vsubcarry_128B: + SelectHVXDualOutput(N); + return; default: SelectCode(N); return; @@ -754,7 +781,6 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { CurDAG->RemoveDeadNode(N); } - void HexagonDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) return N->setNodeId(-1); // Already selected. @@ -772,6 +798,13 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); } + if (HST->useHVXOps()) { + switch (N->getOpcode()) { + case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N); + case HexagonISD::VROR: return SelectHvxRor(N); + } + } + SelectCode(N); } @@ -1415,26 +1448,6 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, return false; } - -bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const { - assert(N->getOpcode() == ISD::OR); - auto *C = dyn_cast(N->getOperand(1)); - if (!C) - return false; - - // Detect when "or" is used to add an offset to a stack object. - if (auto *FN = dyn_cast(N->getOperand(0))) { - MachineFrameInfo &MFI = MF->getFrameInfo(); - unsigned A = MFI.getObjectAlignment(FN->getIndex()); - assert(isPowerOf2_32(A)); - int32_t Off = C->getSExtValue(); - // If the alleged offset fits in the zero bits guaranteed by - // the alignment, then this or is really an add. - return (Off >= 0) && (((A-1) & Off) == unsigned(Off)); - } - return false; -} - bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } @@ -1733,10 +1746,10 @@ unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) { return GAUsesInFunction[V]; unsigned Result = 0; - const Function *CurF = CurDAG->getMachineFunction().getFunction(); + const Function &CurF = CurDAG->getMachineFunction().getFunction(); for (const User *U : V->users()) { if (isa(U) && - cast(U)->getParent()->getParent() == CurF) + cast(U)->getParent()->getParent() == &CurF) ++Result; } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/lib/Target/Hexagon/HexagonISelDAGToDAG.h index 4a7f4b79f8fb..fc66940ee52d 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.h +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.h @@ -26,6 +26,7 @@ namespace llvm { class MachineFunction; class HexagonInstrInfo; class HexagonRegisterInfo; +class HexagonTargetLowering; class HexagonDAGToDAGISel : public SelectionDAGISel { const HexagonSubtarget *HST; @@ -100,15 +101,29 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { void SelectConstant(SDNode *N); void SelectConstantFP(SDNode *N); void SelectBitcast(SDNode *N); - void SelectVectorShuffle(SDNode *N); + void SelectV65Gather(SDNode *N); + void SelectV65GatherPred(SDNode *N); + void SelectHVXDualOutput(SDNode *N); - // Include the pieces autogenerated from the target description. + // Include the declarations autogenerated from the selection patterns. #define GET_DAGISEL_DECL #include "HexagonGenDAGISel.inc" private: + // This is really only to get access to ReplaceNode (which is a protected + // member). Any other members used by HvxSelector can be moved around to + // make them accessible). + friend struct HvxSelector; + + SDValue selectUndef(const SDLoc &dl, MVT ResTy) { + SDNode *U = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy); + return SDValue(U, 0); + } + + void SelectHvxShuffle(SDNode *N); + void SelectHvxRor(SDNode *N); + bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); - bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp new file mode 100644 index 000000000000..de3741e507e4 --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -0,0 +1,2111 @@ +//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonISelDAGToDAG.h" +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "hexagon-isel" + +using namespace llvm; + +namespace { + +// -------------------------------------------------------------------- +// Implementation of permutation networks. + +// Implementation of the node routing through butterfly networks: +// - Forward delta. +// - Reverse delta. +// - Benes. +// +// +// Forward delta network consists of log(N) steps, where N is the number +// of inputs. In each step, an input can stay in place, or it can get +// routed to another position[1]. The step after that consists of two +// networks, each half in size in terms of the number of nodes. In those +// terms, in the given step, an input can go to either the upper or the +// lower network in the next step. +// +// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both +// positions as long as there is no conflict. + +// Here's a delta network for 8 inputs, only the switching routes are +// shown: +// +// Steps: +// |- 1 ---------------|- 2 -----|- 3 -| +// +// Inp[0] *** *** *** *** Out[0] +// \ / \ / \ / +// \ / \ / X +// \ / \ / / \ +// Inp[1] *** \ / *** X *** *** Out[1] +// \ \ / / \ / \ / +// \ \ / / X X +// \ \ / / / \ / \ +// Inp[2] *** \ \ / / *** X *** *** Out[2] +// \ \ X / / / \ \ / +// \ \ / \ / / / \ X +// \ X X / / \ / \ +// Inp[3] *** \ / \ / \ / *** *** *** Out[3] +// \ X X X / +// \ / \ / \ / \ / +// X X X X +// / \ / \ / \ / \ +// / X X X \ +// Inp[4] *** / \ / \ / \ *** *** *** Out[4] +// / X X \ \ / \ / +// / / \ / \ \ \ / X +// / / X \ \ \ / / \ +// Inp[5] *** / / \ \ *** X *** *** Out[5] +// / / \ \ \ / \ / +// / / \ \ X X +// / / \ \ / \ / \ +// Inp[6] *** / \ *** X *** *** Out[6] +// / \ / \ \ / +// / \ / \ X +// / \ / \ / \ +// Inp[7] *** *** *** *** Out[7] +// +// +// Reverse delta network is same as delta network, with the steps in +// the opposite order. +// +// +// Benes network is a forward delta network immediately followed by +// a reverse delta network. + +enum class ColorKind { None, Red, Black }; + +// Graph coloring utility used to partition nodes into two groups: +// they will correspond to nodes routed to the upper and lower networks. +struct Coloring { + using Node = int; + using MapType = std::map; + static constexpr Node Ignore = Node(-1); + + Coloring(ArrayRef Ord) : Order(Ord) { + build(); + if (!color()) + Colors.clear(); + } + + const MapType &colors() const { + return Colors; + } + + ColorKind other(ColorKind Color) { + if (Color == ColorKind::None) + return ColorKind::Red; + return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red; + } + + void dump() const; + +private: + ArrayRef Order; + MapType Colors; + std::set Needed; + + using NodeSet = std::set; + std::map Edges; + + Node conj(Node Pos) { + Node Num = Order.size(); + return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; + } + + ColorKind getColor(Node N) { + auto F = Colors.find(N); + return F != Colors.end() ? F->second : ColorKind::None; + } + + std::pair getUniqueColor(const NodeSet &Nodes); + + void build(); + bool color(); +}; +} // namespace + +std::pair Coloring::getUniqueColor(const NodeSet &Nodes) { + auto Color = ColorKind::None; + for (Node N : Nodes) { + ColorKind ColorN = getColor(N); + if (ColorN == ColorKind::None) + continue; + if (Color == ColorKind::None) + Color = ColorN; + else if (Color != ColorKind::None && Color != ColorN) + return { false, ColorKind::None }; + } + return { true, Color }; +} + +void Coloring::build() { + // Add Order[P] and Order[conj(P)] to Edges. + for (unsigned P = 0; P != Order.size(); ++P) { + Node I = Order[P]; + if (I != Ignore) { + Needed.insert(I); + Node PC = Order[conj(P)]; + if (PC != Ignore && PC != I) + Edges[I].insert(PC); + } + } + // Add I and conj(I) to Edges. + for (unsigned I = 0; I != Order.size(); ++I) { + if (!Needed.count(I)) + continue; + Node C = conj(I); + // This will create an entry in the edge table, even if I is not + // connected to any other node. This is necessary, because it still + // needs to be colored. + NodeSet &Is = Edges[I]; + if (Needed.count(C)) + Is.insert(C); + } +} + +bool Coloring::color() { + SetVector FirstQ; + auto Enqueue = [this,&FirstQ] (Node N) { + SetVector Q; + Q.insert(N); + for (unsigned I = 0; I != Q.size(); ++I) { + NodeSet &Ns = Edges[Q[I]]; + Q.insert(Ns.begin(), Ns.end()); + } + FirstQ.insert(Q.begin(), Q.end()); + }; + for (Node N : Needed) + Enqueue(N); + + for (Node N : FirstQ) { + if (Colors.count(N)) + continue; + NodeSet &Ns = Edges[N]; + auto P = getUniqueColor(Ns); + if (!P.first) + return false; + Colors[N] = other(P.second); + } + + // First, color nodes that don't have any dups. + for (auto E : Edges) { + Node N = E.first; + if (!Needed.count(conj(N)) || Colors.count(N)) + continue; + auto P = getUniqueColor(E.second); + if (!P.first) + return false; + Colors[N] = other(P.second); + } + + // Now, nodes that are still uncolored. Since the graph can be modified + // in this step, create a work queue. + std::vector WorkQ; + for (auto E : Edges) { + Node N = E.first; + if (!Colors.count(N)) + WorkQ.push_back(N); + } + + for (unsigned I = 0; I < WorkQ.size(); ++I) { + Node N = WorkQ[I]; + NodeSet &Ns = Edges[N]; + auto P = getUniqueColor(Ns); + if (P.first) { + Colors[N] = other(P.second); + continue; + } + + // Coloring failed. Split this node. + Node C = conj(N); + ColorKind ColorN = other(ColorKind::None); + ColorKind ColorC = other(ColorN); + NodeSet &Cs = Edges[C]; + NodeSet CopyNs = Ns; + for (Node M : CopyNs) { + ColorKind ColorM = getColor(M); + if (ColorM == ColorC) { + // Connect M with C, disconnect M from N. + Cs.insert(M); + Edges[M].insert(C); + Ns.erase(M); + Edges[M].erase(N); + } + } + Colors[N] = ColorN; + Colors[C] = ColorC; + } + + // Explicitly assign "None" all all uncolored nodes. + for (unsigned I = 0; I != Order.size(); ++I) + if (Colors.count(I) == 0) + Colors[I] = ColorKind::None; + + return true; +} + +LLVM_DUMP_METHOD +void Coloring::dump() const { + dbgs() << "{ Order: {"; + for (unsigned I = 0; I != Order.size(); ++I) { + Node P = Order[I]; + if (P != Ignore) + dbgs() << ' ' << P; + else + dbgs() << " -"; + } + dbgs() << " }\n"; + dbgs() << " Needed: {"; + for (Node N : Needed) + dbgs() << ' ' << N; + dbgs() << " }\n"; + + dbgs() << " Edges: {\n"; + for (auto E : Edges) { + dbgs() << " " << E.first << " -> {"; + for (auto N : E.second) + dbgs() << ' ' << N; + dbgs() << " }\n"; + } + dbgs() << " }\n"; + + auto ColorKindToName = [](ColorKind C) { + switch (C) { + case ColorKind::None: + return "None"; + case ColorKind::Red: + return "Red"; + case ColorKind::Black: + return "Black"; + } + llvm_unreachable("all ColorKinds should be handled by the switch above"); + }; + + dbgs() << " Colors: {\n"; + for (auto C : Colors) + dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n"; + dbgs() << " }\n}\n"; +} + +namespace { +// Base class of for reordering networks. They don't strictly need to be +// permutations, as outputs with repeated occurrences of an input element +// are allowed. +struct PermNetwork { + using Controls = std::vector; + using ElemType = int; + static constexpr ElemType Ignore = ElemType(-1); + + enum : uint8_t { + None, + Pass, + Switch + }; + enum : uint8_t { + Forward, + Reverse + }; + + PermNetwork(ArrayRef Ord, unsigned Mult = 1) { + Order.assign(Ord.data(), Ord.data()+Ord.size()); + Log = 0; + + unsigned S = Order.size(); + while (S >>= 1) + ++Log; + + Table.resize(Order.size()); + for (RowType &Row : Table) + Row.resize(Mult*Log, None); + } + + void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const { + unsigned Size = Order.size(); + V.resize(Size); + for (unsigned I = 0; I != Size; ++I) { + unsigned W = 0; + for (unsigned L = 0; L != Log; ++L) { + unsigned C = ctl(I, StartAt+L) == Switch; + if (Dir == Forward) + W |= C << (Log-1-L); + else + W |= C << L; + } + assert(isUInt<8>(W)); + V[I] = uint8_t(W); + } + } + + uint8_t ctl(ElemType Pos, unsigned Step) const { + return Table[Pos][Step]; + } + unsigned size() const { + return Order.size(); + } + unsigned steps() const { + return Log; + } + +protected: + unsigned Log; + std::vector Order; + using RowType = std::vector; + std::vector Table; +}; + +struct ForwardDeltaNetwork : public PermNetwork { + ForwardDeltaNetwork(ArrayRef Ord) : PermNetwork(Ord) {} + + bool run(Controls &V) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + getControls(V, 0, Forward); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; + +struct ReverseDeltaNetwork : public PermNetwork { + ReverseDeltaNetwork(ArrayRef Ord) : PermNetwork(Ord) {} + + bool run(Controls &V) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + getControls(V, 0, Reverse); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; + +struct BenesNetwork : public PermNetwork { + BenesNetwork(ArrayRef Ord) : PermNetwork(Ord, 2) {} + + bool run(Controls &F, Controls &R) { + if (!route(Order.data(), Table.data(), size(), 0)) + return false; + + getControls(F, 0, Forward); + getControls(R, Log, Reverse); + return true; + } + +private: + bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step); +}; +} // namespace + +bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + bool UseUp = false, UseDown = false; + ElemType Num = Size; + + // Cannot use coloring here, because coloring is used to determine + // the "big" switch, i.e. the one that changes halves, and in a forward + // network, a color can be simultaneously routed to both halves in the + // step we're working on. + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + // I is the position in the input, + // J is the position in the output. + if (I == Ignore) + continue; + uint8_t S; + if (I < Num/2) + S = (J < Num/2) ? Pass : Switch; + else + S = (J < Num/2) ? Switch : Pass; + + // U is the element in the table that needs to be updated. + ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2); + if (U < Num/2) + UseUp = true; + else + UseDown = true; + if (T[U][Step] != S && T[U][Step] != None) + return false; + T[U][Step] = S; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + unsigned Pets = Log-1 - Step; + bool UseUp = false, UseDown = false; + ElemType Num = Size; + + // In this step half-switching occurs, so coloring can be used. + Coloring G({P,Size}); + const Coloring::MapType &M = G.colors(); + if (M.empty()) + return false; + + ColorKind ColorUp = ColorKind::None; + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + // I is the position in the input, + // J is the position in the output. + if (I == Ignore) + continue; + ColorKind C = M.at(I); + if (C == ColorKind::None) + continue; + // During "Step", inputs cannot switch halves, so if the "up" color + // is still unknown, make sure that it is selected in such a way that + // "I" will stay in the same half. + bool InpUp = I < Num/2; + if (ColorUp == ColorKind::None) + ColorUp = InpUp ? C : G.other(C); + if ((C == ColorUp) != InpUp) { + // If I should go to a different half than where is it now, give up. + return false; + } + + uint8_t S; + if (InpUp) { + S = (J < Num/2) ? Pass : Switch; + UseUp = true; + } else { + S = (J < Num/2) ? Switch : Pass; + UseDown = true; + } + T[J][Pets] = S; + } + + // Reorder the working permutation according to the computed switch table + // for the last step (i.e. Pets). + for (ElemType J = 0, E = Size / 2; J != E; ++J) { + ElemType PJ = P[J]; // Current values of P[J] + ElemType PC = P[J+Size/2]; // and P[conj(J)] + ElemType QJ = PJ; // New values of P[J] + ElemType QC = PC; // and P[conj(J)] + if (T[J][Pets] == Switch) + QC = PJ; + if (T[J+Size/2][Pets] == Switch) + QJ = PC; + P[J] = QJ; + P[J+Size/2] = QC; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, + unsigned Step) { + Coloring G({P,Size}); + const Coloring::MapType &M = G.colors(); + if (M.empty()) + return false; + ElemType Num = Size; + + unsigned Pets = 2*Log-1 - Step; + bool UseUp = false, UseDown = false; + + // Both assignments, i.e. Red->Up and Red->Down are valid, but they will + // result in different controls. Let's pick the one where the first + // control will be "Pass". + ColorKind ColorUp = ColorKind::None; + for (ElemType J = 0; J != Num; ++J) { + ElemType I = P[J]; + if (I == Ignore) + continue; + ColorKind C = M.at(I); + if (C == ColorKind::None) + continue; + if (ColorUp == ColorKind::None) { + ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black; + } + unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; + if (C == ColorUp) { + if (I < Num/2) + T[I][Step] = Pass; + else + T[CI][Step] = Switch; + T[J][Pets] = (J < Num/2) ? Pass : Switch; + UseUp = true; + } else { // Down + if (I < Num/2) + T[CI][Step] = Switch; + else + T[I][Step] = Pass; + T[J][Pets] = (J < Num/2) ? Switch : Pass; + UseDown = true; + } + } + + // Reorder the working permutation according to the computed switch table + // for the last step (i.e. Pets). + for (ElemType J = 0; J != Num/2; ++J) { + ElemType PJ = P[J]; // Current values of P[J] + ElemType PC = P[J+Num/2]; // and P[conj(J)] + ElemType QJ = PJ; // New values of P[J] + ElemType QC = PC; // and P[conj(J)] + if (T[J][Pets] == Switch) + QC = PJ; + if (T[J+Num/2][Pets] == Switch) + QJ = PC; + P[J] = QJ; + P[J+Num/2] = QC; + } + + for (ElemType J = 0; J != Num; ++J) + if (P[J] != Ignore && P[J] >= Num/2) + P[J] -= Num/2; + + if (Step+1 < Log) { + if (UseUp && !route(P, T, Size/2, Step+1)) + return false; + if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1)) + return false; + } + return true; +} + +// -------------------------------------------------------------------- +// Support for building selection results (output instructions that are +// parts of the final selection). + +namespace { +struct OpRef { + OpRef(SDValue V) : OpV(V) {} + bool isValue() const { return OpV.getNode() != nullptr; } + bool isValid() const { return isValue() || !(OpN & Invalid); } + static OpRef res(int N) { return OpRef(Whole | (N & Index)); } + static OpRef fail() { return OpRef(Invalid); } + + static OpRef lo(const OpRef &R) { + assert(!R.isValue()); + return OpRef(R.OpN & (Undef | Index | LoHalf)); + } + static OpRef hi(const OpRef &R) { + assert(!R.isValue()); + return OpRef(R.OpN & (Undef | Index | HiHalf)); + } + static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); } + + // Direct value. + SDValue OpV = SDValue(); + + // Reference to the operand of the input node: + // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the + // operand index: + // If bit 30 is set, it's the high half of the operand. + // If bit 29 is set, it's the low half of the operand. + unsigned OpN = 0; + + enum : unsigned { + Invalid = 0x10000000, + LoHalf = 0x20000000, + HiHalf = 0x40000000, + Whole = LoHalf | HiHalf, + Undef = 0x80000000, + Index = 0x0FFFFFFF, // Mask of the index value. + IndexBits = 28, + }; + + void print(raw_ostream &OS, const SelectionDAG &G) const; + +private: + OpRef(unsigned N) : OpN(N) {} +}; + +struct NodeTemplate { + NodeTemplate() = default; + unsigned Opc = 0; + MVT Ty = MVT::Other; + std::vector Ops; + + void print(raw_ostream &OS, const SelectionDAG &G) const; +}; + +struct ResultStack { + ResultStack(SDNode *Inp) + : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {} + SDNode *InpNode; + MVT InpTy; + unsigned push(const NodeTemplate &Res) { + List.push_back(Res); + return List.size()-1; + } + unsigned push(unsigned Opc, MVT Ty, std::vector &&Ops) { + NodeTemplate Res; + Res.Opc = Opc; + Res.Ty = Ty; + Res.Ops = Ops; + return push(Res); + } + bool empty() const { return List.empty(); } + unsigned size() const { return List.size(); } + unsigned top() const { return size()-1; } + const NodeTemplate &operator[](unsigned I) const { return List[I]; } + unsigned reset(unsigned NewTop) { + List.resize(NewTop+1); + return NewTop; + } + + using BaseType = std::vector; + BaseType::iterator begin() { return List.begin(); } + BaseType::iterator end() { return List.end(); } + BaseType::const_iterator begin() const { return List.begin(); } + BaseType::const_iterator end() const { return List.end(); } + + BaseType List; + + void print(raw_ostream &OS, const SelectionDAG &G) const; +}; +} // namespace + +void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const { + if (isValue()) { + OpV.getNode()->print(OS, &G); + return; + } + if (OpN & Invalid) { + OS << "invalid"; + return; + } + if (OpN & Undef) { + OS << "undef"; + return; + } + if ((OpN & Whole) != Whole) { + assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf); + if (OpN & LoHalf) + OS << "lo "; + else + OS << "hi "; + } + OS << '#' << SignExtend32(OpN & Index, IndexBits); +} + +void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const { + const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo(); + OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " " + << TII.getName(Opc); + bool Comma = false; + for (const auto &R : Ops) { + if (Comma) + OS << ','; + Comma = true; + OS << ' '; + R.print(OS, G); + } +} + +void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const { + OS << "Input node:\n"; +#ifndef NDEBUG + InpNode->dumpr(&G); +#endif + OS << "Result templates:\n"; + for (unsigned I = 0, E = List.size(); I != E; ++I) { + OS << '[' << I << "] "; + List[I].print(OS, G); + OS << '\n'; + } +} + +namespace { +struct ShuffleMask { + ShuffleMask(ArrayRef M) : Mask(M) { + for (unsigned I = 0, E = Mask.size(); I != E; ++I) { + int M = Mask[I]; + if (M == -1) + continue; + MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M); + MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M); + } + } + + ArrayRef Mask; + int MinSrc = -1, MaxSrc = -1; + + ShuffleMask lo() const { + size_t H = Mask.size()/2; + return ShuffleMask(Mask.take_front(H)); + } + ShuffleMask hi() const { + size_t H = Mask.size()/2; + return ShuffleMask(Mask.take_back(H)); + } +}; +} // namespace + +// -------------------------------------------------------------------- +// The HvxSelector class. + +static const HexagonTargetLowering &getHexagonLowering(SelectionDAG &G) { + return static_cast(G.getTargetLoweringInfo()); +} +static const HexagonSubtarget &getHexagonSubtarget(SelectionDAG &G) { + return static_cast(G.getSubtarget()); +} + +namespace llvm { + struct HvxSelector { + const HexagonTargetLowering &Lower; + HexagonDAGToDAGISel &ISel; + SelectionDAG &DAG; + const HexagonSubtarget &HST; + const unsigned HwLen; + + HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G) + : Lower(getHexagonLowering(G)), ISel(HS), DAG(G), + HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {} + + MVT getSingleVT(MVT ElemTy) const { + unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8); + return MVT::getVectorVT(ElemTy, NumElems); + } + + MVT getPairVT(MVT ElemTy) const { + unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8); + return MVT::getVectorVT(ElemTy, NumElems); + } + + void selectShuffle(SDNode *N); + void selectRor(SDNode *N); + + private: + void materialize(const ResultStack &Results); + + SDValue getVectorConstant(ArrayRef Data, const SDLoc &dl); + + enum : unsigned { + None, + PackMux, + }; + OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results); + OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, + MutableArrayRef NewMask, unsigned Options = None); + OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, + MutableArrayRef NewMask); + OpRef vmuxs(ArrayRef Bytes, OpRef Va, OpRef Vb, + ResultStack &Results); + OpRef vmuxp(ArrayRef Bytes, OpRef Va, OpRef Vb, + ResultStack &Results); + + OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + + OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results); + OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results); + OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results); + + bool selectVectorConstants(SDNode *N); + bool scalarizeShuffle(ArrayRef Mask, const SDLoc &dl, MVT ResTy, + SDValue Va, SDValue Vb, SDNode *N); + + }; +} + +static void splitMask(ArrayRef Mask, MutableArrayRef MaskL, + MutableArrayRef MaskR) { + unsigned VecLen = Mask.size(); + assert(MaskL.size() == VecLen && MaskR.size() == VecLen); + for (unsigned I = 0; I != VecLen; ++I) { + int M = Mask[I]; + if (M < 0) { + MaskL[I] = MaskR[I] = -1; + } else if (unsigned(M) < VecLen) { + MaskL[I] = M; + MaskR[I] = -1; + } else { + MaskL[I] = -1; + MaskR[I] = M-VecLen; + } + } +} + +static std::pair findStrip(ArrayRef A, int Inc, + unsigned MaxLen) { + assert(A.size() > 0 && A.size() >= MaxLen); + int F = A[0]; + int E = F; + for (unsigned I = 1; I != MaxLen; ++I) { + if (A[I] - E != Inc) + return { F, I }; + E = A[I]; + } + return { F, MaxLen }; +} + +static bool isUndef(ArrayRef Mask) { + for (int Idx : Mask) + if (Idx != -1) + return false; + return true; +} + +static bool isIdentity(ArrayRef Mask) { + for (int I = 0, E = Mask.size(); I != E; ++I) { + int M = Mask[I]; + if (M >= 0 && M != I) + return false; + } + return true; +} + +static bool isPermutation(ArrayRef Mask) { + // Check by adding all numbers only works if there is no overflow. + assert(Mask.size() < 0x00007FFF && "Sanity failure"); + int Sum = 0; + for (int Idx : Mask) { + if (Idx == -1) + return false; + Sum += Idx; + } + int N = Mask.size(); + return 2*Sum == N*(N-1); +} + +bool HvxSelector::selectVectorConstants(SDNode *N) { + // Constant vectors are generated as loads from constant pools. + // Since they are generated during the selection process, the main + // selection algorithm is not aware of them. Select them directly + // here. + SmallVector Loads; + SetVector WorkQ; + + // The DAG can change (due to CSE) during selection, so cache all the + // unselected nodes first to avoid traversing a mutating DAG. + + auto IsLoadToSelect = [] (SDNode *N) { + if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) { + SDValue Addr = cast(N)->getBasePtr(); + unsigned AddrOpc = Addr.getOpcode(); + if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) + if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) + return true; + } + return false; + }; + + WorkQ.insert(N); + for (unsigned i = 0; i != WorkQ.size(); ++i) { + SDNode *W = WorkQ[i]; + if (IsLoadToSelect(W)) { + Loads.push_back(W); + continue; + } + for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j) + WorkQ.insert(W->getOperand(j).getNode()); + } + + for (SDNode *L : Loads) + ISel.Select(L); + + return !Loads.empty(); +} + +void HvxSelector::materialize(const ResultStack &Results) { + DEBUG_WITH_TYPE("isel", { + dbgs() << "Materializing\n"; + Results.print(dbgs(), DAG); + }); + if (Results.empty()) + return; + const SDLoc &dl(Results.InpNode); + std::vector Output; + + for (unsigned I = 0, E = Results.size(); I != E; ++I) { + const NodeTemplate &Node = Results[I]; + std::vector Ops; + for (const OpRef &R : Node.Ops) { + assert(R.isValid()); + if (R.isValue()) { + Ops.push_back(R.OpV); + continue; + } + if (R.OpN & OpRef::Undef) { + MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index); + Ops.push_back(ISel.selectUndef(dl, MVT(SVT))); + continue; + } + // R is an index of a result. + unsigned Part = R.OpN & OpRef::Whole; + int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits); + if (Idx < 0) + Idx += I; + assert(Idx >= 0 && unsigned(Idx) < Output.size()); + SDValue Op = Output[Idx]; + MVT OpTy = Op.getValueType().getSimpleVT(); + if (Part != OpRef::Whole) { + assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf); + if (Op.getOpcode() == HexagonISD::VCOMBINE) { + Op = (Part == OpRef::HiHalf) ? Op.getOperand(0) : Op.getOperand(1); + } else { + MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), + OpTy.getVectorNumElements()/2); + unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo + : Hexagon::vsub_hi; + Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); + } + } + Ops.push_back(Op); + } // for (Node : Results) + + assert(Node.Ty != MVT::Other); + SDNode *ResN = (Node.Opc == TargetOpcode::COPY) + ? Ops.front().getNode() + : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops); + Output.push_back(SDValue(ResN, 0)); + } + + SDNode *OutN = Output.back().getNode(); + SDNode *InpN = Results.InpNode; + DEBUG_WITH_TYPE("isel", { + dbgs() << "Generated node:\n"; + OutN->dumpr(&DAG); + }); + + ISel.ReplaceNode(InpN, OutN); + selectVectorConstants(OutN); + DAG.RemoveDeadNodes(); +} + +OpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + const SDLoc &dl(Results.InpNode); + Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), { + DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32), + Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32), + Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32), + }); + return OpRef::res(Results.top()); +} + +// Va, Vb are single vectors, SM can be arbitrarily long. +OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results, MutableArrayRef NewMask, + unsigned Options) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (!Va.isValid() || !Vb.isValid()) + return OpRef::fail(); + + int VecLen = SM.Mask.size(); + MVT Ty = getSingleVT(MVT::i8); + + if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { + if (SM.MaxSrc < int(HwLen)) { + memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); + return Va; + } + if (SM.MinSrc >= int(HwLen)) { + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M != -1) + M -= HwLen; + NewMask[I] = M; + } + return Vb; + } + const SDLoc &dl(Results.InpNode); + SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32); + if (isUInt<3>(SM.MinSrc)) { + Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S}); + } else { + Results.push(Hexagon::A2_tfrsi, MVT::i32, {S}); + unsigned Top = Results.top(); + Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)}); + } + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M != -1) + M -= SM.MinSrc; + NewMask[I] = M; + } + return OpRef::res(Results.top()); + } + + if (Options & PackMux) { + // If elements picked from Va and Vb have all different (source) indexes + // (relative to the start of the argument), do a mux, and update the mask. + BitVector Picked(HwLen); + SmallVector MuxBytes(HwLen); + bool CanMux = true; + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M == -1) + continue; + if (M >= int(HwLen)) + M -= HwLen; + else + MuxBytes[M] = 0xFF; + if (Picked[M]) { + CanMux = false; + break; + } + NewMask[I] = M; + } + if (CanMux) + return vmuxs(MuxBytes, Va, Vb, Results); + } + + return OpRef::fail(); +} + +OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results, MutableArrayRef NewMask) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + unsigned HalfMask = 0; + unsigned LogHw = Log2_32(HwLen); + for (int M : SM.Mask) { + if (M == -1) + continue; + HalfMask |= (1u << (M >> LogHw)); + } + + if (HalfMask == 0) + return OpRef::undef(getPairVT(MVT::i8)); + + // If more than two halves are used, bail. + // TODO: be more aggressive here? + if (countPopulation(HalfMask) > 2) + return OpRef::fail(); + + MVT HalfTy = getSingleVT(MVT::i8); + + OpRef Inp[2] = { Va, Vb }; + OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) }; + + uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; + unsigned Idx = 0; + for (unsigned I = 0; I != 4; ++I) { + if ((HalfMask & (1u << I)) == 0) + continue; + assert(Idx < 2); + OpRef Op = Inp[I/2]; + Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op); + HalfIdx[I] = Idx++; + } + + int VecLen = SM.Mask.size(); + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M >= 0) { + uint8_t Idx = HalfIdx[M >> LogHw]; + assert(Idx == 0 || Idx == 1); + M = (M & (HwLen-1)) + HwLen*Idx; + } + NewMask[I] = M; + } + + return concat(Out[0], Out[1], Results); +} + +OpRef HvxSelector::vmuxs(ArrayRef Bytes, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + MVT ByteTy = getSingleVT(MVT::i8); + MVT BoolTy = MVT::getVectorVT(MVT::i1, 8*HwLen); // XXX + const SDLoc &dl(Results.InpNode); + SDValue B = getVectorConstant(Bytes, dl); + Results.push(Hexagon::V6_vd0, ByteTy, {}); + Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)}); + Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va}); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::vmuxp(ArrayRef Bytes, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + size_t S = Bytes.size() / 2; + OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results); + OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results); + return concat(L, H, Results); +} + +OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + unsigned VecLen = SM.Mask.size(); + assert(HwLen == VecLen); + (void)VecLen; + assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); })); + + if (isIdentity(SM.Mask)) + return Va; + if (isUndef(SM.Mask)) + return OpRef::undef(getSingleVT(MVT::i8)); + + OpRef P = perfect(SM, Va, Results); + if (P.isValid()) + return P; + return butterfly(SM, Va, Results); +} + +OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (isUndef(SM.Mask)) + return OpRef::undef(getSingleVT(MVT::i8)); + + OpRef C = contracting(SM, Va, Vb, Results); + if (C.isValid()) + return C; + + int VecLen = SM.Mask.size(); + SmallVector NewMask(VecLen); + OpRef P = packs(SM, Va, Vb, Results, NewMask); + if (P.isValid()) + return shuffs1(ShuffleMask(NewMask), P, Results); + + SmallVector MaskL(VecLen), MaskR(VecLen); + splitMask(SM.Mask, MaskL, MaskR); + + OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results); + OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results); + if (!L.isValid() || !R.isValid()) + return OpRef::fail(); + + SmallVector Bytes(VecLen); + for (int I = 0; I != VecLen; ++I) { + if (MaskL[I] != -1) + Bytes[I] = 0xFF; + } + return vmuxs(Bytes, L, R, Results); +} + +OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + int VecLen = SM.Mask.size(); + + if (isIdentity(SM.Mask)) + return Va; + if (isUndef(SM.Mask)) + return OpRef::undef(getPairVT(MVT::i8)); + + SmallVector PackedMask(VecLen); + OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask); + if (P.isValid()) { + ShuffleMask PM(PackedMask); + OpRef E = expanding(PM, P, Results); + if (E.isValid()) + return E; + + OpRef L = shuffs1(PM.lo(), P, Results); + OpRef H = shuffs1(PM.hi(), P, Results); + if (L.isValid() && H.isValid()) + return concat(L, H, Results); + } + + OpRef R = perfect(SM, Va, Results); + if (R.isValid()) + return R; + // TODO commute the mask and try the opposite order of the halves. + + OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results); + OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results); + if (L.isValid() && H.isValid()) + return concat(L, H, Results); + + return OpRef::fail(); +} + +OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (isUndef(SM.Mask)) + return OpRef::undef(getPairVT(MVT::i8)); + + int VecLen = SM.Mask.size(); + SmallVector PackedMask(VecLen); + OpRef P = packp(SM, Va, Vb, Results, PackedMask); + if (P.isValid()) + return shuffp1(ShuffleMask(PackedMask), P, Results); + + SmallVector MaskL(VecLen), MaskR(VecLen); + OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results); + OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results); + if (!L.isValid() || !R.isValid()) + return OpRef::fail(); + + // Mux the results. + SmallVector Bytes(VecLen); + for (int I = 0; I != VecLen; ++I) { + if (MaskL[I] != -1) + Bytes[I] = 0xFF; + } + return vmuxp(Bytes, L, R, Results); +} + +bool HvxSelector::scalarizeShuffle(ArrayRef Mask, const SDLoc &dl, + MVT ResTy, SDValue Va, SDValue Vb, + SDNode *N) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + MVT ElemTy = ResTy.getVectorElementType(); + assert(ElemTy == MVT::i8); + unsigned VecLen = Mask.size(); + bool HavePairs = (2*HwLen == VecLen); + MVT SingleTy = getSingleVT(MVT::i8); + + SmallVector Ops; + for (int I : Mask) { + if (I < 0) { + Ops.push_back(ISel.selectUndef(dl, ElemTy)); + continue; + } + SDValue Vec; + unsigned M = I; + if (M < VecLen) { + Vec = Va; + } else { + Vec = Vb; + M -= VecLen; + } + if (HavePairs) { + if (M < HwLen) { + Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec); + } else { + Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec); + M -= HwLen; + } + } + SDValue Idx = DAG.getConstant(M, dl, MVT::i32); + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx}); + SDValue L = Lower.LowerOperation(Ex, DAG); + assert(L.getNode()); + Ops.push_back(L); + } + + SDValue LV; + if (2*HwLen == VecLen) { + SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen}); + SDValue L0 = Lower.LowerOperation(B0, DAG); + SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen}); + SDValue L1 = Lower.LowerOperation(B1, DAG); + // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering) + // functions may expect to be called only for illegal operations, so + // make sure that they are not called for legal ones. Develop a better + // mechanism for dealing with this. + LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1}); + } else { + SDValue BV = DAG.getBuildVector(ResTy, dl, Ops); + LV = Lower.LowerOperation(BV, DAG); + } + + assert(!N->use_empty()); + ISel.ReplaceNode(N, LV.getNode()); + DAG.RemoveDeadNodes(); + + std::deque SubNodes; + SubNodes.push_back(LV.getNode()); + for (unsigned I = 0; I != SubNodes.size(); ++I) { + for (SDValue Op : SubNodes[I]->ops()) + SubNodes.push_back(Op.getNode()); + } + while (!SubNodes.empty()) { + SDNode *S = SubNodes.front(); + SubNodes.pop_front(); + if (S->use_empty()) + continue; + // This isn't great, but users need to be selected before any nodes that + // they use. (The reason is to match larger patterns, and avoid nodes that + // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.). + bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) { + return llvm::any_of(SubNodes, [U](const SDNode *T) { + return T == U; + }); + }); + if (PendingUser) + SubNodes.push_back(S); + else + ISel.Select(S); + } + + DAG.RemoveDeadNodes(); + return true; +} + +OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb, + ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + if (!Va.isValid() || !Vb.isValid()) + return OpRef::fail(); + + // Contracting shuffles, i.e. instructions that always discard some bytes + // from the operand vectors. + // + // V6_vshuff{e,o}b + // V6_vdealb4w + // V6_vpack{e,o}{b,h} + + int VecLen = SM.Mask.size(); + std::pair Strip = findStrip(SM.Mask, 1, VecLen); + MVT ResTy = getSingleVT(MVT::i8); + + // The following shuffles only work for bytes and halfwords. This requires + // the strip length to be 1 or 2. + if (Strip.second != 1 && Strip.second != 2) + return OpRef::fail(); + + // The patterns for the shuffles, in terms of the starting offsets of the + // consecutive strips (L = length of the strip, N = VecLen): + // + // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2 + // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2 + // + // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2 + // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2 + // + // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ... + + // The value of the element in the mask following the strip will decide + // what kind of a shuffle this can be. + int NextInMask = SM.Mask[Strip.second]; + + // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask + // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would + // satisfy this. + if (NextInMask < VecLen) { + // vpack{e,o} or vdealb4w + if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) { + int N = VecLen; + // Check if this is vdealb4w (L=1). + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I] != 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+N/4] != 2 + 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+N/2] != N + 4*I) + return OpRef::fail(); + for (int I = 0; I != N/4; ++I) + if (SM.Mask[I+3*N/4] != N+2 + 4*I) + return OpRef::fail(); + // Matched mask for vdealb4w. + Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va}); + return OpRef::res(Results.top()); + } + + // Check if this is vpack{e,o}. + int N = VecLen; + int L = Strip.second; + // Check if the first strip starts at 0 or at L. + if (Strip.first != 0 && Strip.first != L) + return OpRef::fail(); + // Examine the rest of the mask. + for (int I = L; I < N; I += L) { + auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); + // Check whether the mask element at the beginning of each strip + // increases by 2L each time. + if (S.first - Strip.first != 2*I) + return OpRef::fail(); + // Check whether each strip is of the same length. + if (S.second != unsigned(L)) + return OpRef::fail(); + } + + // Strip.first == 0 => vpacke + // Strip.first == L => vpacko + assert(Strip.first == 0 || Strip.first == L); + using namespace Hexagon; + NodeTemplate Res; + Res.Opc = Strip.second == 1 // Number of bytes. + ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob) + : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh); + Res.Ty = ResTy; + Res.Ops = { Vb, Va }; + Results.push(Res); + return OpRef::res(Results.top()); + } + + // Check if this is vshuff{e,o}. + int N = VecLen; + int L = Strip.second; + std::pair PrevS = Strip; + bool Flip = false; + for (int I = L; I < N; I += L) { + auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); + if (S.second != PrevS.second) + return OpRef::fail(); + int Diff = Flip ? PrevS.first - S.first + 2*L + : S.first - PrevS.first; + if (Diff != N) + return OpRef::fail(); + Flip ^= true; + PrevS = S; + } + // Strip.first == 0 => vshuffe + // Strip.first == L => vshuffo + assert(Strip.first == 0 || Strip.first == L); + using namespace Hexagon; + NodeTemplate Res; + Res.Opc = Strip.second == 1 // Number of bytes. + ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob) + : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh); + Res.Ty = ResTy; + Res.Ops = { Vb, Va }; + Results.push(Res); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // Expanding shuffles (using all elements and inserting into larger vector): + // + // V6_vunpacku{b,h} [*] + // + // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask. + // + // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so + // they are not shuffles. + // + // The argument is a single vector. + + int VecLen = SM.Mask.size(); + assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type"); + + std::pair Strip = findStrip(SM.Mask, 1, VecLen); + + // The patterns for the unpacks, in terms of the starting offsets of the + // consecutive strips (L = length of the strip, N = VecLen): + // + // vunpacku: 0, -1, L, -1, 2L, -1 ... + + if (Strip.first != 0) + return OpRef::fail(); + + // The vunpackus only handle byte and half-word. + if (Strip.second != 1 && Strip.second != 2) + return OpRef::fail(); + + int N = VecLen; + int L = Strip.second; + + // First, check the non-ignored strips. + for (int I = 2*L; I < 2*N; I += 2*L) { + auto S = findStrip(SM.Mask.drop_front(I), 1, N-I); + if (S.second != unsigned(L)) + return OpRef::fail(); + if (2*S.first != I) + return OpRef::fail(); + } + // Check the -1s. + for (int I = L; I < 2*N; I += 2*L) { + auto S = findStrip(SM.Mask.drop_front(I), 0, N-I); + if (S.first != -1 || S.second != unsigned(L)) + return OpRef::fail(); + } + + unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub + : Hexagon::V6_vunpackuh; + Results.push(Opc, getPairVT(MVT::i8), {Va}); + return OpRef::res(Results.top()); +} + +OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // V6_vdeal{b,h} + // V6_vshuff{b,h} + + // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2}) + // V6_vshuffvdd (V6_vshuff) + // V6_dealvdd (V6_vdeal) + + int VecLen = SM.Mask.size(); + assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8); + unsigned LogLen = Log2_32(VecLen); + unsigned HwLog = Log2_32(HwLen); + // The result length must be the same as the length of a single vector, + // or a vector pair. + assert(LogLen == HwLog || LogLen == HwLog+1); + bool Extend = (LogLen == HwLog); + + if (!isPermutation(SM.Mask)) + return OpRef::fail(); + + SmallVector Perm(LogLen); + + // Check if this could be a perfect shuffle, or a combination of perfect + // shuffles. + // + // Consider this permutation (using hex digits to make the ASCII diagrams + // easier to read): + // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }. + // This is a "deal" operation: divide the input into two halves, and + // create the output by picking elements by alternating between these two + // halves: + // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*] + // 8 9 A B C D E F + // + // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides + // a somwehat different mechanism that could be used to perform shuffle/ + // deal operations: a 2x2 transpose. + // Consider the halves of inputs again, they can be interpreted as a 2x8 + // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated + // together. Now, when considering 2 elements at a time, it will be a 2x4 + // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices: + // 01 23 45 67 + // 89 AB CD EF + // With groups of 4, this will become a single 2x2 matrix, and so on. + // + // The 2x2 transpose instruction works by transposing each of the 2x2 + // matrices (or "sub-matrices"), given a specific group size. For example, + // if the group size is 1 (i.e. each element is its own group), there + // will be four transposes of the four 2x2 matrices that form the 2x8. + // For example, with the inputs as above, the result will be: + // 0 8 2 A 4 C 6 E + // 1 9 3 B 5 D 7 F + // Now, this result can be tranposed again, but with the group size of 2: + // 08 19 4C 5D + // 2A 3B 6E 7F + // If we then transpose that result, but with the group size of 4, we get: + // 0819 2A3B + // 4C5D 6E7F + // If we concatenate these two rows, it will be + // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F + // which is the same as the "deal" [*] above. + // + // In general, a "deal" of individual elements is a series of 2x2 transposes, + // with changing group size. HVX has two instructions: + // Vdd = V6_vdealvdd Vu, Vv, Rt + // Vdd = V6_shufvdd Vu, Vv, Rt + // that perform exactly that. The register Rt controls which transposes are + // going to happen: a bit at position n (counting from 0) indicates that a + // transpose with a group size of 2^n will take place. If multiple bits are + // set, multiple transposes will happen: vdealvdd will perform them starting + // with the largest group size, vshuffvdd will do them in the reverse order. + // + // The main observation is that each 2x2 transpose corresponds to swapping + // columns of bits in the binary representation of the values. + // + // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits + // in a given column. The * denote the columns that will be swapped. + // The transpose with the group size 2^n corresponds to swapping columns + // 3 (the highest log) and log2(n): + // + // 3 2 1 0 0 2 1 3 0 2 3 1 + // * * * * * * + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0 + // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1 + // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1 + // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0 + // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0 + // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1 + // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1 + // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0 + // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0 + // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1 + // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1 + // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0 + // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0 + // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1 + // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 + + auto XorPow2 = [] (ArrayRef Mask, unsigned Num) { + unsigned X = Mask[0] ^ Mask[Num/2]; + // Check that the first half has the X's bits clear. + if ((Mask[0] & X) != 0) + return 0u; + for (unsigned I = 1; I != Num/2; ++I) { + if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X) + return 0u; + if ((Mask[I] & X) != 0) + return 0u; + } + return X; + }; + + // Create a vector of log2's for each column: Perm[i] corresponds to + // the i-th bit (lsb is 0). + assert(VecLen > 2); + for (unsigned I = VecLen; I >= 2; I >>= 1) { + // Examine the initial segment of Mask of size I. + unsigned X = XorPow2(SM.Mask, I); + if (!isPowerOf2_32(X)) + return OpRef::fail(); + // Check the other segments of Mask. + for (int J = I; J < VecLen; J += I) { + if (XorPow2(SM.Mask.slice(J, I), I) != X) + return OpRef::fail(); + } + Perm[Log2_32(X)] = Log2_32(I)-1; + } + + // Once we have Perm, represent it as cycles. Denote the maximum log2 + // (equal to log2(VecLen)-1) as M. The cycle containing M can then be + // written as (M a1 a2 a3 ... an). That cycle can be broken up into + // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition + // order being from left to right. Any (contiguous) segment where the + // values ai, ai+1...aj are either all increasing or all decreasing, + // can be implemented via a single vshuffvdd/vdealvdd respectively. + // + // If there is a cycle (a1 a2 ... an) that does not involve M, it can + // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can + // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure + // can be used to generate a sequence of vshuffvdd/vdealvdd. + // + // Example: + // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written + // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply + // (4 0 1)(4 0)(4 2 3)(4 2). + // It can then be expanded into swaps as + // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2), + // and broken up into "increasing" segments as + // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)]. + // This is equivalent to + // (4 0 1)(4 0 2 3)(4 2), + // which can be implemented as 3 vshufvdd instructions. + + using CycleType = SmallVector; + std::set Cycles; + std::set All; + + for (unsigned I : Perm) + All.insert(I); + + // If the cycle contains LogLen-1, move it to the front of the cycle. + // Otherwise, return the cycle unchanged. + auto canonicalize = [LogLen](const CycleType &C) -> CycleType { + unsigned LogPos, N = C.size(); + for (LogPos = 0; LogPos != N; ++LogPos) + if (C[LogPos] == LogLen-1) + break; + if (LogPos == N) + return C; + + CycleType NewC(C.begin()+LogPos, C.end()); + NewC.append(C.begin(), C.begin()+LogPos); + return NewC; + }; + + auto pfs = [](const std::set &Cs, unsigned Len) { + // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6), + // for bytes zero is included, for halfwords is not. + if (Cs.size() != 1) + return 0u; + const CycleType &C = *Cs.begin(); + if (C[0] != Len-1) + return 0u; + int D = Len - C.size(); + if (D != 0 && D != 1) + return 0u; + + bool IsDeal = true, IsShuff = true; + for (unsigned I = 1; I != Len-D; ++I) { + if (C[I] != Len-1-I) + IsDeal = false; + if (C[I] != I-(1-D)) // I-1, I + IsShuff = false; + } + // At most one, IsDeal or IsShuff, can be non-zero. + assert(!(IsDeal || IsShuff) || IsDeal != IsShuff); + static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh }; + static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh }; + return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0); + }; + + while (!All.empty()) { + unsigned A = *All.begin(); + All.erase(A); + CycleType C; + C.push_back(A); + for (unsigned B = Perm[A]; B != A; B = Perm[B]) { + C.push_back(B); + All.erase(B); + } + if (C.size() <= 1) + continue; + Cycles.insert(canonicalize(C)); + } + + MVT SingleTy = getSingleVT(MVT::i8); + MVT PairTy = getPairVT(MVT::i8); + + // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}. + if (unsigned(VecLen) == HwLen) { + if (unsigned SingleOpc = pfs(Cycles, LogLen)) { + Results.push(SingleOpc, SingleTy, {Va}); + return OpRef::res(Results.top()); + } + } + + SmallVector SwapElems; + if (HwLen == unsigned(VecLen)) + SwapElems.push_back(LogLen-1); + + for (const CycleType &C : Cycles) { + unsigned First = (C[0] == LogLen-1) ? 1 : 0; + SwapElems.append(C.begin()+First, C.end()); + if (First == 0) + SwapElems.push_back(C[0]); + } + + const SDLoc &dl(Results.InpNode); + OpRef Arg = !Extend ? Va + : concat(Va, OpRef::undef(SingleTy), Results); + + for (unsigned I = 0, E = SwapElems.size(); I != E; ) { + bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1]; + unsigned S = (1u << SwapElems[I]); + if (I < E-1) { + while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1])) + S |= 1u << SwapElems[I]; + // The above loop will not add a bit for the final SwapElems[I+1], + // so add it here. + S |= 1u << SwapElems[I]; + } + ++I; + + NodeTemplate Res; + Results.push(Hexagon::A2_tfrsi, MVT::i32, + { DAG.getTargetConstant(S, dl, MVT::i32) }); + Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd; + Res.Ty = PairTy; + Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) }; + Results.push(Res); + Arg = OpRef::res(Results.top()); + } + + return !Extend ? Arg : OpRef::lo(Arg); +} + +OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) { + DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); + // Butterfly shuffles. + // + // V6_vdelta + // V6_vrdelta + // V6_vror + + // The assumption here is that all elements picked by Mask are in the + // first operand to the vector_shuffle. This assumption is enforced + // by the caller. + + MVT ResTy = getSingleVT(MVT::i8); + PermNetwork::Controls FC, RC; + const SDLoc &dl(Results.InpNode); + int VecLen = SM.Mask.size(); + + for (int M : SM.Mask) { + if (M != -1 && M >= VecLen) + return OpRef::fail(); + } + + // Try the deltas/benes for both single vectors and vector pairs. + ForwardDeltaNetwork FN(SM.Mask); + if (FN.run(FC)) { + SDValue Ctl = getVectorConstant(FC, dl); + Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)}); + return OpRef::res(Results.top()); + } + + // Try reverse delta. + ReverseDeltaNetwork RN(SM.Mask); + if (RN.run(RC)) { + SDValue Ctl = getVectorConstant(RC, dl); + Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)}); + return OpRef::res(Results.top()); + } + + // Do Benes. + BenesNetwork BN(SM.Mask); + if (BN.run(FC, RC)) { + SDValue CtlF = getVectorConstant(FC, dl); + SDValue CtlR = getVectorConstant(RC, dl); + Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)}); + Results.push(Hexagon::V6_vrdelta, ResTy, + {OpRef::res(-1), OpRef(CtlR)}); + return OpRef::res(Results.top()); + } + + return OpRef::fail(); +} + +SDValue HvxSelector::getVectorConstant(ArrayRef Data, + const SDLoc &dl) { + SmallVector Elems; + for (uint8_t C : Data) + Elems.push_back(DAG.getConstant(C, dl, MVT::i8)); + MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size()); + SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); + SDValue LV = Lower.LowerOperation(BV, DAG); + DAG.RemoveDeadNode(BV.getNode()); + return LV; +} + +void HvxSelector::selectShuffle(SDNode *N) { + DEBUG_WITH_TYPE("isel", { + dbgs() << "Starting " << __func__ << " on node:\n"; + N->dump(&DAG); + }); + MVT ResTy = N->getValueType(0).getSimpleVT(); + // Assume that vector shuffles operate on vectors of bytes. + assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8); + + auto *SN = cast(N); + std::vector Mask(SN->getMask().begin(), SN->getMask().end()); + // This shouldn't really be necessary. Is it? + for (int &Idx : Mask) + if (Idx != -1 && Idx < 0) + Idx = -1; + + unsigned VecLen = Mask.size(); + bool HavePairs = (2*HwLen == VecLen); + assert(ResTy.getSizeInBits() / 8 == VecLen); + + // Vd = vector_shuffle Va, Vb, Mask + // + + bool UseLeft = false, UseRight = false; + for (unsigned I = 0; I != VecLen; ++I) { + if (Mask[I] == -1) + continue; + unsigned Idx = Mask[I]; + assert(Idx < 2*VecLen); + if (Idx < VecLen) + UseLeft = true; + else + UseRight = true; + } + + DEBUG_WITH_TYPE("isel", { + dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft=" + << UseLeft << " UseRight=" << UseRight << " HavePairs=" + << HavePairs << '\n'; + }); + // If the mask is all -1's, generate "undef". + if (!UseLeft && !UseRight) { + ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); + DAG.RemoveDeadNode(N); + return; + } + + SDValue Vec0 = N->getOperand(0); + SDValue Vec1 = N->getOperand(1); + ResultStack Results(SN); + Results.push(TargetOpcode::COPY, ResTy, {Vec0}); + Results.push(TargetOpcode::COPY, ResTy, {Vec1}); + OpRef Va = OpRef::res(Results.top()-1); + OpRef Vb = OpRef::res(Results.top()); + + OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results) + : shuffp2(ShuffleMask(Mask), Va, Vb, Results); + + bool Done = Res.isValid(); + if (Done) { + // Make sure that Res is on the stack before materializing. + Results.push(TargetOpcode::COPY, ResTy, {Res}); + materialize(Results); + } else { + Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N); + } + + if (!Done) { +#ifndef NDEBUG + dbgs() << "Unhandled shuffle:\n"; + SN->dumpr(&DAG); +#endif + llvm_unreachable("Failed to select vector shuffle"); + } +} + +void HvxSelector::selectRor(SDNode *N) { + // If this is a rotation by less than 8, use V6_valignbi. + MVT Ty = N->getValueType(0).getSimpleVT(); + const SDLoc &dl(N); + SDValue VecV = N->getOperand(0); + SDValue RotV = N->getOperand(1); + SDNode *NewN = nullptr; + + if (auto *CN = dyn_cast(RotV.getNode())) { + unsigned S = CN->getZExtValue(); + if (S % HST.getVectorLength() == 0) { + NewN = VecV.getNode(); + } else if (isUInt<3>(S)) { + SDValue C = DAG.getTargetConstant(S, dl, MVT::i32); + NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty, + {VecV, VecV, C}); + } + } + + if (!NewN) + NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); + + ISel.ReplaceNode(N, NewN); + DAG.RemoveDeadNode(N); +} + +void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { + HvxSelector(*this, *CurDAG).selectShuffle(N); +} + +void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { + HvxSelector(*this, *CurDAG).selectRor(N); +} + +void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { + const SDLoc &dl(N); + SDValue Chain = N->getOperand(0); + SDValue Address = N->getOperand(2); + SDValue Predicate = N->getOperand(3); + SDValue Base = N->getOperand(4); + SDValue Modifier = N->getOperand(5); + SDValue Offset = N->getOperand(6); + + unsigned Opcode; + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + llvm_unreachable("Unexpected HVX gather intrinsic."); + case Intrinsic::hexagon_V6_vgathermhq: + case Intrinsic::hexagon_V6_vgathermhq_128B: + Opcode = Hexagon::V6_vgathermhq_pseudo; + break; + case Intrinsic::hexagon_V6_vgathermwq: + case Intrinsic::hexagon_V6_vgathermwq_128B: + Opcode = Hexagon::V6_vgathermwq_pseudo; + break; + case Intrinsic::hexagon_V6_vgathermhwq: + case Intrinsic::hexagon_V6_vgathermhwq_128B: + Opcode = Hexagon::V6_vgathermhwq_pseudo; + break; + } + + SDVTList VTs = CurDAG->getVTList(MVT::Other); + SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain }; + SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(N, Result); + CurDAG->RemoveDeadNode(N); +} + +void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { + const SDLoc &dl(N); + SDValue Chain = N->getOperand(0); + SDValue Address = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue Modifier = N->getOperand(4); + SDValue Offset = N->getOperand(5); + + unsigned Opcode; + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + llvm_unreachable("Unexpected HVX gather intrinsic."); + case Intrinsic::hexagon_V6_vgathermh: + case Intrinsic::hexagon_V6_vgathermh_128B: + Opcode = Hexagon::V6_vgathermh_pseudo; + break; + case Intrinsic::hexagon_V6_vgathermw: + case Intrinsic::hexagon_V6_vgathermw_128B: + Opcode = Hexagon::V6_vgathermw_pseudo; + break; + case Intrinsic::hexagon_V6_vgathermhw: + case Intrinsic::hexagon_V6_vgathermhw_128B: + Opcode = Hexagon::V6_vgathermhw_pseudo; + break; + } + + SDVTList VTs = CurDAG->getVTList(MVT::Other); + SDValue Ops[] = { Address, Base, Modifier, Offset, Chain }; + SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(N, Result); + CurDAG->RemoveDeadNode(N); +} + +void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + SDNode *Result; + switch (IID) { + case Intrinsic::hexagon_V6_vaddcarry: { + SmallVector Ops = { N->getOperand(1), N->getOperand(2), + N->getOperand(3) }; + SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); + Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); + break; + } + case Intrinsic::hexagon_V6_vaddcarry_128B: { + SmallVector Ops = { N->getOperand(1), N->getOperand(2), + N->getOperand(3) }; + SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); + Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops); + break; + } + case Intrinsic::hexagon_V6_vsubcarry: { + SmallVector Ops = { N->getOperand(1), N->getOperand(2), + N->getOperand(3) }; + SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v512i1); + Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); + break; + } + case Intrinsic::hexagon_V6_vsubcarry_128B: { + SmallVector Ops = { N->getOperand(1), N->getOperand(2), + N->getOperand(3) }; + SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v1024i1); + Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops); + break; + } + default: + llvm_unreachable("Unexpected HVX dual output intrinsic."); + } + ReplaceUses(N, Result); + ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); + CurDAG->RemoveDeadNode(N); +} + + diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 3ecc28679077..f2ab1ec51a9d 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -129,6 +129,11 @@ namespace { // Implement calling convention for Hexagon. +static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; +static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; +static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; +static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; + static bool CC_Hexagon(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, @@ -222,19 +227,19 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + if (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { Offset = State.AllocateStack(64, 64); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + if (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { Offset = State.AllocateStack(128, 128); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || + if (LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || LocVT == MVT::v256i8) { Offset = State.AllocateStack(256, 256); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); @@ -357,7 +362,7 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, auto &HST = MF.getSubtarget(); if (HST.useHVX64BOps() && - (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || + (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { if (unsigned Reg = State.AllocateReg(VecLstS)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -367,7 +372,7 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if (HST.useHVX64BOps() && (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || + if (HST.useHVX64BOps() && (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || LocVT == MVT::v128i8)) { if (unsigned Reg = State.AllocateReg(VecLstD)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -378,7 +383,7 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, return false; } // 128B Mode - if (HST.useHVX128BOps() && (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || + if (HST.useHVX128BOps() && (LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || LocVT == MVT::v256i8)) { if (unsigned Reg = State.AllocateReg(VecLstD)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -389,7 +394,7 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, return false; } if (HST.useHVX128BOps() && - (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || + (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { if (unsigned Reg = State.AllocateReg(VecLstS)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -429,19 +434,18 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocVT = MVT::i64; LocInfo = CCValAssign::BCvt; } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 || - LocVT == MVT::v16i32 || LocVT == MVT::v8i64 || - LocVT == MVT::v512i1) { + LocVT == MVT::v16i32 || LocVT == MVT::v512i1) { LocVT = MVT::v16i32; ValVT = MVT::v16i32; LocInfo = CCValAssign::Full; } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || - LocVT == MVT::v32i32 || LocVT == MVT::v16i64 || + LocVT == MVT::v32i32 || (LocVT == MVT::v1024i1 && HST.useHVX128BOps())) { LocVT = MVT::v32i32; ValVT = MVT::v32i32; LocInfo = CCValAssign::Full; } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 || - LocVT == MVT::v64i32 || LocVT == MVT::v32i64) { + LocVT == MVT::v64i32) { LocVT = MVT::v64i32; ValVT = MVT::v64i32; LocInfo = CCValAssign::Full; @@ -713,12 +717,12 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); - auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); if (Attr.getValueAsString() == "true") IsTailCall = false; if (IsTailCall) { - bool StructAttrFlag = MF.getFunction()->hasStructRetAttr(); + bool StructAttrFlag = MF.getFunction().hasStructRetAttr(); IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, IsVarArg, IsStructRet, StructAttrFlag, @@ -757,11 +761,13 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Promote the value if needed. switch (VA.getLocInfo()) { default: - // Loc info must be one of Full, SExt, ZExt, or AExt. + // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt. llvm_unreachable("Unknown loc info!"); - case CCValAssign::BCvt: case CCValAssign::Full: break; + case CCValAssign::BCvt: + Arg = DAG.getBitcast(VA.getLocVT(), Arg); + break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; @@ -919,10 +925,10 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, auto &HST = static_cast(DAG.getSubtarget()); bool ValidHVX128BType = - HST.useHVX128BOps() && (VT == MVT::v32i32 || VT == MVT::v16i64 || + HST.useHVX128BOps() && (VT == MVT::v32i32 || VT == MVT::v64i16 || VT == MVT::v128i8); bool ValidHVXType = - HST.useHVX64BOps() && (VT == MVT::v16i32 || VT == MVT::v8i64 || + HST.useHVX64BOps() && (VT == MVT::v16i32 || VT == MVT::v32i16 || VT == MVT::v64i8); if (ValidHVX128BType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 || @@ -1131,6 +1137,8 @@ SDValue HexagonTargetLowering::LowerFormalArguments( unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); + if (VA.getLocInfo() == CCValAssign::BCvt) + RegVT = VA.getValVT(); SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // Treat values of type MVT::i1 specially: they are passed in // registers of type i32, but they need to remain as values of @@ -1151,17 +1159,19 @@ SDValue HexagonTargetLowering::LowerFormalArguments( unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); + if (VA.getLocInfo() == CCValAssign::BCvt) + RegVT = VA.getValVT(); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); // Single Vector - } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 || + } else if ((RegVT == MVT::v16i32 || RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else if (Subtarget.useHVX128BOps() && - ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + ((RegVT == MVT::v32i32 || RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); @@ -1169,14 +1179,14 @@ SDValue HexagonTargetLowering::LowerFormalArguments( InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); // Double Vector - } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + } else if ((RegVT == MVT::v32i32 || RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else if (Subtarget.useHVX128BOps() && - ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || + ((RegVT == MVT::v64i32 || RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); @@ -1268,6 +1278,9 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS))) + return LowerHvxSetCC(Op, DAG); + SDValue Cmp = Op.getOperand(2); ISD::CondCode CC = cast(Cmp)->get(); @@ -1682,6 +1695,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setPrefFunctionAlignment(4); setMinFunctionAlignment(2); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); + setBooleanContents(TargetLoweringBase::UndefinedBooleanContent); + setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent); setMaxAtomicSizeInBitsSupported(64); setMinCmpXchgSizeInBits(32); @@ -1708,8 +1723,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); @@ -1725,21 +1740,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v8i64, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v16i64, &Hexagon::HvxWRRegClass); + // These "short" boolean vector types should be legal because + // they will appear as results of vector compares. If they were + // not legal, type legalization would try to make them legal + // and that would require using operations that do not use or + // produce such types. That, in turn, would imply using custom + // nodes, which would be unoptimizable by the DAG combiner. + // The idea is to rely on target-independent operations as much + // as possible. + addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); } else if (Subtarget.useHVX128BOps()) { addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v16i64, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v32i64, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass); } } @@ -1955,9 +1980,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); // Types natively supported: - for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, - MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, - MVT::v2i32, MVT::v1i64}) { + for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, + MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) { setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom); @@ -1975,39 +1999,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, MVT::v2i16, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - if (Subtarget.useHVXOps()) { - if (Subtarget.useHVX64BOps()) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); - // We try to generate the vpack{e/o} instructions. If we fail - // we fall back upon ExpandOp. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); - } else if (Subtarget.useHVX128BOps()) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); - // We try to generate the vpack{e/o} instructions. If we fail - // we fall back upon ExpandOp. - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); - } else { - llvm_unreachable("Unrecognized HVX mode"); - } - } + auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { + setOperationAction(Opc, FromTy, Promote); + AddPromotedToType(Opc, FromTy, ToTy); + }; + // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { @@ -2069,17 +2069,66 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - if (Subtarget.useHVX64BOps()) { - for (MVT VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, - MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { - setIndexedLoadAction(ISD::POST_INC, VT, Legal); - setIndexedStoreAction(ISD::POST_INC, VT, Legal); + if (Subtarget.useHVXOps()) { + bool Use64b = Subtarget.useHVX64BOps(); + ArrayRef LegalV = Use64b ? LegalV64 : LegalV128; + ArrayRef LegalW = Use64b ? LegalW64 : LegalW128; + MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; + MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; + + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal); + setOperationAction(ISD::AND, ByteV, Legal); + setOperationAction(ISD::OR, ByteV, Legal); + setOperationAction(ISD::XOR, ByteV, Legal); + + for (MVT T : LegalV) { + setIndexedLoadAction(ISD::POST_INC, T, Legal); + setIndexedStoreAction(ISD::POST_INC, T, Legal); + + setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::SUB, T, Legal); + if (T != ByteV) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); + } + + setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::MULHS, T, Custom); + setOperationAction(ISD::MULHU, T, Custom); + setOperationAction(ISD::SETCC, T, Custom); + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + if (T != ByteV) + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); } - } else if (Subtarget.useHVX128BOps()) { - for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64, - MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) { - setIndexedLoadAction(ISD::POST_INC, VT, Legal); - setIndexedStoreAction(ISD::POST_INC, VT, Legal); + + for (MVT T : LegalV) { + if (T == ByteV) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV); + setPromoteTo(ISD::AND, T, ByteV); + setPromoteTo(ISD::OR, T, ByteV); + setPromoteTo(ISD::XOR, T, ByteV); + } + + for (MVT T : LegalW) { + // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- + // independent) handling of it would convert it to a load, which is + // not always the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + + if (T == ByteW) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW); } } @@ -2212,11 +2261,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH"; case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; - case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP"; case HexagonISD::INSERT: return "HexagonISD::INSERT"; - case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; - case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; @@ -2226,12 +2272,55 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VASR: return "HexagonISD::VASR"; case HexagonISD::VLSR: return "HexagonISD::VLSR"; case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT"; + case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW"; + case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; + case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; + case HexagonISD::VZERO: return "HexagonISD::VZERO"; case HexagonISD::OP_END: break; } return nullptr; } +/// Given an intrinsic, checks if on the target the intrinsic will need to map +/// to a MemIntrinsicNode (touches memory). If this is the case, it returns +/// true and store the intrinsic information into the IntrinsicInfo that was +/// passed to the function. +bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::hexagon_V6_vgathermw: + case Intrinsic::hexagon_V6_vgathermw_128B: + case Intrinsic::hexagon_V6_vgathermh: + case Intrinsic::hexagon_V6_vgathermh_128B: + case Intrinsic::hexagon_V6_vgathermhw: + case Intrinsic::hexagon_V6_vgathermhw_128B: + case Intrinsic::hexagon_V6_vgathermwq: + case Intrinsic::hexagon_V6_vgathermwq_128B: + case Intrinsic::hexagon_V6_vgathermhq: + case Intrinsic::hexagon_V6_vgathermhq_128B: + case Intrinsic::hexagon_V6_vgathermhwq: + case Intrinsic::hexagon_V6_vgathermhwq_128B: { + const Module &M = *I.getParent()->getParent()->getParent(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Type *VecTy = I.getArgOperand(1)->getType(); + Info.memVT = MVT::getVT(VecTy); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; + } + default: + break; + } + return false; +} + bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { EVT MTy1 = EVT::getEVT(Ty1); EVT MTy2 = EVT::getEVT(Ty2); @@ -2253,116 +2342,163 @@ bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { // Should we expand the build vector with shuffles? bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const { - // Hexagon vector shuffle operates on element sizes of bytes or halfwords - EVT EltVT = VT.getVectorElementType(); - int EltBits = EltVT.getSizeInBits(); - if ((EltBits != 8) && (EltBits != 16)) - return false; - - return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); -} - -static StridedLoadKind isStridedLoad(const ArrayRef &Mask) { - int even_start = -2; - int odd_start = -1; - size_t mask_len = Mask.size(); - for (auto idx : Mask) { - if ((idx - even_start) == 2) - even_start = idx; - else - break; - } - if (even_start == (int)(mask_len * 2) - 2) - return StridedLoadKind::Even; - for (auto idx : Mask) { - if ((idx - odd_start) == 2) - odd_start = idx; - else - break; - } - if (odd_start == (int)(mask_len * 2) - 1) - return StridedLoadKind::Odd; - - return StridedLoadKind::NoPattern; + return false; } bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef Mask, EVT VT) const { - if (Subtarget.useHVXOps()) - return isStridedLoad(Mask) != StridedLoadKind::NoPattern; return true; } +TargetLoweringBase::LegalizeTypeAction +HexagonTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() == 1) + return TargetLoweringBase::TypeScalarizeVector; + + // Always widen vectors of i1. + MVT ElemTy = VT.getSimpleVT().getVectorElementType(); + if (ElemTy == MVT::i1) + return TargetLoweringBase::TypeWidenVector; + + if (Subtarget.useHVXOps()) { + // If the size of VT is at least half of the vector length, + // widen the vector. Note: the threshold was not selected in + // any scientific way. + ArrayRef Tys = Subtarget.getHVXElementTypes(); + if (llvm::find(Tys, ElemTy) != Tys.end()) { + unsigned HwWidth = 8*Subtarget.getVectorLength(); + unsigned VecWidth = VT.getSizeInBits(); + if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) + return TargetLoweringBase::TypeWidenVector; + } + } + return TargetLoweringBase::TypeSplitVector; +} + // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors // to select data from, V3 is the permutation. SDValue HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - const ShuffleVectorSDNode *SVN = cast(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - bool UseHVX = Subtarget.useHVXOps(); - - if (V2.isUndef()) - V2 = V1; - - if (SVN->isSplat()) { - int Lane = SVN->getSplatIndex(); - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); - - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR - // (and probably will turn into a SCALAR_TO_VECTOR once legalization - // reaches it). - if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && - !isa(V1.getOperand(0))) { - bool IsScalarToVector = true; - for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) { - if (!V1.getOperand(i).isUndef()) { - IsScalarToVector = false; - break; - } - } - if (IsScalarToVector) - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); + const auto *SVN = cast(Op); + ArrayRef AM = SVN->getMask(); + assert(AM.size() <= 8 && "Unexpected shuffle mask"); + unsigned VecLen = AM.size(); + + MVT VecTy = ty(Op); + assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length"); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + // If the inputs are not the same as the output, bail. This is not an + // error situation, but complicates the handling and the default expansion + // (into BUILD_VECTOR) should be adequate. + if (ty(Op0) != VecTy || ty(Op1) != VecTy) + return SDValue(); + + // Normalize the mask so that the first non-negative index comes from + // the first operand. + SmallVector Mask(AM.begin(), AM.end()); + unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data(); + if (F == AM.size()) + return DAG.getUNDEF(VecTy); + if (AM[F] >= int(VecLen)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(Op0, Op1); + } + + // Express the shuffle mask in terms of bytes. + SmallVector ByteMask; + unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + int M = Mask[i]; + if (M < 0) { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(-1); + } else { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(M*ElemBytes + j); } - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, - DAG.getConstant(Lane, dl, MVT::i32)); } + assert(ByteMask.size() <= 8); + + // All non-undef (non-negative) indexes are well within [0..127], so they + // fit in a single byte. Build two 64-bit words: + // - MaskIdx where each byte is the corresponding index (for non-negative + // indexes), and 0xFF for negative indexes, and + // - MaskUnd that has 0xFF for each negative index. + uint64_t MaskIdx = 0; + uint64_t MaskUnd = 0; + for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) { + unsigned S = 8*i; + uint64_t M = ByteMask[i] & 0xFF; + if (M == 0xFF) + MaskUnd |= M << S; + MaskIdx |= M << S; + } + + const SDLoc &dl(Op); + + if (ByteMask.size() == 4) { + // Identity. + if (MaskIdx == (0x03020100 | MaskUnd)) + return Op0; + // Byte swap. + if (MaskIdx == (0x00010203 | MaskUnd)) { + SDValue T0 = DAG.getBitcast(MVT::i32, Op0); + SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0); + return DAG.getBitcast(VecTy, T1); + } - if (UseHVX) { - ArrayRef Mask = SVN->getMask(); - size_t MaskLen = Mask.size(); - unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen; - - if ((Subtarget.useHVX64BOps() && SizeInBits == 64 * 8) || - (Subtarget.useHVX128BOps() && SizeInBits == 128 * 8)) { - StridedLoadKind Pattern = isStridedLoad(Mask); - if (Pattern == StridedLoadKind::NoPattern) - return SDValue(); + // Byte packs. + SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl, + typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0}); + if (MaskIdx == (0x06040200 | MaskUnd)) + return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG); + if (MaskIdx == (0x07050301 | MaskUnd)) + return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG); + + SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl, + typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1}); + if (MaskIdx == (0x02000604 | MaskUnd)) + return getNode(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG); + if (MaskIdx == (0x03010705 | MaskUnd)) + return getNode(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG); + } + + if (ByteMask.size() == 8) { + // Identity. + if (MaskIdx == (0x0706050403020100ull | MaskUnd)) + return Op0; + // Byte swap. + if (MaskIdx == (0x0001020304050607ull | MaskUnd)) { + SDValue T0 = DAG.getBitcast(MVT::i64, Op0); + SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0); + return DAG.getBitcast(VecTy, T1); + } - unsigned Opc = Pattern == StridedLoadKind::Even ? HexagonISD::VPACKE - : HexagonISD::VPACKO; - return DAG.getNode(Opc, dl, VT, {Op.getOperand(1), Op.getOperand(0)}); + // Halfword picks. + if (MaskIdx == (0x0d0c050409080100ull | MaskUnd)) + return getNode(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd)) + return getNode(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0d0c090805040100ull | MaskUnd)) + return getNode(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd)) + return getNode(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0706030205040100ull | MaskUnd)) { + VectorPair P = opSplit(Op0, dl, DAG); + return getNode(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG); } - // We used to assert in the "else" part here, but that is bad for Halide - // Halide creates intermediate double registers by interleaving two - // concatenated vector registers. The interleaving requires vector_shuffle - // nodes and we shouldn't barf on a double register result of a - // vector_shuffle because it is most likely an intermediate result. - } - // FIXME: We need to support more general vector shuffles. See - // below the comment from the ARM backend that deals in the general - // case with the vector shuffles. For now, let expand handle these. - return SDValue(); - // If the shuffle is not directly supported and it has 4 elements, use - // the PerfectShuffle-generated table to synthesize it from other shuffles. + // Byte packs. + if (MaskIdx == (0x0e060c040a020800ull | MaskUnd)) + return getNode(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f070d050b030901ull | MaskUnd)) + return getNode(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG); + } + + return SDValue(); } // If BUILD_VECTOR has same base element repeated several times, @@ -2437,29 +2573,56 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, dl, VT, Result); } +bool +HexagonTargetLowering::getBuildVectorConstInts(ArrayRef Values, + MVT VecTy, SelectionDAG &DAG, + MutableArrayRef Consts) const { + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth); + bool AllConst = true; + + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + SDValue V = Values[i]; + if (V.isUndef()) { + Consts[i] = ConstantInt::get(IntTy, 0); + continue; + } + // Make sure to always cast to IntTy. + if (auto *CN = dyn_cast(V.getNode())) { + const ConstantInt *CI = CN->getConstantIntValue(); + Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue()); + } else if (auto *CN = dyn_cast(V.getNode())) { + const ConstantFP *CF = CN->getConstantFPValue(); + APInt A = CF->getValueAPF().bitcastToAPInt(); + Consts[i] = ConstantInt::get(IntTy, A.getZExtValue()); + } else { + AllConst = false; + } + } + return AllConst; +} + SDValue HexagonTargetLowering::buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { MVT ElemTy = VecTy.getVectorElementType(); assert(VecTy.getVectorNumElements() == Elem.size()); - SmallVector Consts; - bool AllConst = true; - for (SDValue V : Elem) { - if (V.getOpcode() == ISD::UNDEF) - V = DAG.getConstant(0, dl, ElemTy); - auto *C = dyn_cast(V.getNode()); - Consts.push_back(C); - AllConst = AllConst && C != nullptr; - } + SmallVector Consts(Elem.size()); + bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts); unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) - if (Elem[First].getOpcode() != ISD::UNDEF) + if (!isUndef(Elem[First])) break; if (First == Num) return DAG.getUNDEF(VecTy); + if (AllConst && + llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + if (ElemTy == MVT::i16) { assert(Elem.size() == 2); if (AllConst) { @@ -2467,48 +2630,60 @@ HexagonTargetLowering::buildVector32(ArrayRef Elem, const SDLoc &dl, Consts[1]->getZExtValue() << 16; return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32)); } - SDNode *N = DAG.getMachineNode(Hexagon::A2_combine_ll, dl, MVT::i32, - { Elem[1], Elem[0] }); - return DAG.getBitcast(MVT::v2i16, SDValue(N,0)); + SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, + {Elem[1], Elem[0]}, DAG); + return DAG.getBitcast(MVT::v2i16, N); } - // First try generating a constant. - assert(ElemTy == MVT::i8 && Num == 4); - if (AllConst) { - int32_t V = (Consts[0]->getZExtValue() & 0xFF) | - (Consts[1]->getZExtValue() & 0xFF) << 8 | - (Consts[1]->getZExtValue() & 0xFF) << 16 | - Consts[2]->getZExtValue() << 24; - return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32)); - } + if (ElemTy == MVT::i8) { + // First try generating a constant. + if (AllConst) { + int32_t V = (Consts[0]->getZExtValue() & 0xFF) | + (Consts[1]->getZExtValue() & 0xFF) << 8 | + (Consts[1]->getZExtValue() & 0xFF) << 16 | + Consts[2]->getZExtValue() << 24; + return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32)); + } - // Then try splat. - bool IsSplat = true; - for (unsigned i = 0; i != Num; ++i) { - if (i == First) - continue; - if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF) - continue; - IsSplat = false; - break; + // Then try splat. + bool IsSplat = true; + for (unsigned i = 0; i != Num; ++i) { + if (i == First) + continue; + if (Elem[i] == Elem[First] || isUndef(Elem[i])) + continue; + IsSplat = false; + break; + } + if (IsSplat) { + // Legalize the operand to VSPLAT. + SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32); + return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext); + } + + // Generate + // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) | + // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16 + assert(Elem.size() == 4); + SDValue Vs[4]; + for (unsigned i = 0; i != 4; ++i) { + Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32); + Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8); + } + SDValue S8 = DAG.getConstant(8, dl, MVT::i32); + SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8}); + SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8}); + SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0}); + SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1}); + + SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG); + return DAG.getBitcast(MVT::v4i8, R); } - if (IsSplat) - return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Elem[First]); - - // Generate - // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) | - // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16 - SDValue S8 = DAG.getConstant(8, dl, MVT::i32); - SDValue S16 = DAG.getConstant(16, dl, MVT::i32); - SDValue V0 = DAG.getZExtOrTrunc(Elem[0], dl, MVT::i32); - SDValue V1 = DAG.getZExtOrTrunc(Elem[2], dl, MVT::i32); - SDValue V2 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Elem[1], S8}); - SDValue V3 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Elem[3], S8}); - SDValue V4 = DAG.getNode(ISD::OR, dl, MVT::i32, {V0, V2}); - SDValue V5 = DAG.getNode(ISD::OR, dl, MVT::i32, {V1, V3}); - SDValue V6 = DAG.getNode(ISD::SHL, dl, MVT::i32, {V5, S16}); - SDValue V7 = DAG.getNode(ISD::OR, dl, MVT::i32, {V4, V6}); - return DAG.getBitcast(MVT::v4i8, V7); + +#ifndef NDEBUG + dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n'; +#endif + llvm_unreachable("Unexpected vector element type"); } SDValue @@ -2517,36 +2692,36 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, MVT ElemTy = VecTy.getVectorElementType(); assert(VecTy.getVectorNumElements() == Elem.size()); - SmallVector Consts; - bool AllConst = true; - for (SDValue V : Elem) { - if (V.getOpcode() == ISD::UNDEF) - V = DAG.getConstant(0, dl, ElemTy); - auto *C = dyn_cast(V.getNode()); - Consts.push_back(C); - AllConst = AllConst && C != nullptr; - } + SmallVector Consts(Elem.size()); + bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts); unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) - if (Elem[First].getOpcode() != ISD::UNDEF) + if (!isUndef(Elem[First])) break; if (First == Num) return DAG.getUNDEF(VecTy); + if (AllConst && + llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + // First try splat if possible. if (ElemTy == MVT::i16) { bool IsSplat = true; for (unsigned i = 0; i != Num; ++i) { if (i == First) continue; - if (Elem[i] == Elem[First] || Elem[i].getOpcode() == ISD::UNDEF) + if (Elem[i] == Elem[First] || isUndef(Elem[i])) continue; IsSplat = false; break; } - if (IsSplat) - return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Elem[First]); + if (IsSplat) { + // Legalize the operand to VSPLAT. + SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32); + return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext); + } } // Then try constant. @@ -2556,7 +2731,7 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull; for (unsigned i = 0; i != Num; ++i) - Val = (Val << W) | (Consts[i]->getZExtValue() & Mask); + Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask); SDValue V0 = DAG.getConstant(Val, dl, MVT::i64); return DAG.getBitcast(VecTy, V0); } @@ -2565,279 +2740,199 @@ HexagonTargetLowering::buildVector64(ArrayRef Elem, const SDLoc &dl, MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2); SDValue L = (ElemTy == MVT::i32) ? Elem[0] - : buildVector32({Elem.data(), Num/2}, dl, HalfTy, DAG); + : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG); SDValue H = (ElemTy == MVT::i32) ? Elem[1] - : buildVector32({Elem.data()+Num/2, Num/2}, dl, HalfTy, DAG); - unsigned Id = Hexagon::DoubleRegsRegClassID; - SDNode *N = DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VecTy, - { DAG.getTargetConstant(Id, dl, MVT::i32), - L, DAG.getTargetConstant(Hexagon::isub_lo, dl, MVT::i32), - H, DAG.getTargetConstant(Hexagon::isub_hi, dl, MVT::i32) }); - return SDValue(N, 0); + : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG); + return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L}); } SDValue -HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - MVT VT = Op.getValueType().getSimpleVT(); - unsigned BW = VT.getSizeInBits(); - if (BW == 32 || BW == 64) { - SmallVector Ops; - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - if (BW == 32) - return buildVector32(Ops, SDLoc(Op), VT, DAG); - return buildVector64(Ops, SDLoc(Op), VT, DAG); +HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, + const SDLoc &dl, MVT ValTy, MVT ResTy, + SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + assert(!ValTy.isVector() || + VecTy.getVectorElementType() == ValTy.getVectorElementType()); + unsigned VecWidth = VecTy.getSizeInBits(); + unsigned ValWidth = ValTy.getSizeInBits(); + unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits(); + assert(VecWidth == 32 || VecWidth == 64); + assert((VecWidth % ElemWidth) == 0); + + // Cast everything to scalar integer types. + MVT ScalarTy = tyScalar(VecTy); + VecV = DAG.getBitcast(ScalarTy, VecV); + + SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32); + SDValue ExtV; + + if (ConstantSDNode *C = dyn_cast(IdxV)) { + unsigned Off = C->getZExtValue() * ElemWidth; + if (VecWidth == 64 && ValWidth == 32) { + assert(Off == 0 || Off == 32); + unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi; + ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV); + } else if (Off == 0 && (ValWidth % 8) == 0) { + ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy)); + } else { + SDValue OffV = DAG.getConstant(Off, dl, MVT::i32); + // The return type of EXTRACTU must be the same as the type of the + // input vector. + ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy, + {VecV, WidthV, OffV}); + } + } else { + if (ty(IdxV) != MVT::i32) + IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32); + SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, + DAG.getConstant(ElemWidth, dl, MVT::i32)); + ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy, + {VecV, WidthV, OffV}); } - return SDValue(); + // Cast ExtV to the requested result type. + ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy)); + ExtV = DAG.getBitcast(ResTy, ExtV); + return ExtV; } SDValue -HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - bool UseHVX = Subtarget.useHVXOps(); - EVT VT = Op.getValueType(); - unsigned NElts = Op.getNumOperands(); - SDValue Vec0 = Op.getOperand(0); - EVT VecVT = Vec0.getValueType(); - unsigned Width = VecVT.getSizeInBits(); - - if (NElts == 2) { - MVT ST = VecVT.getSimpleVT(); - // We are trying to concat two v2i16 to a single v4i16, or two v4i8 - // into a single v8i8. - if (ST == MVT::v2i16 || ST == MVT::v4i8) - return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); - - if (UseHVX) { - assert((Width == 64 * 8 && Subtarget.useHVX64BOps()) || - (Width == 128 * 8 && Subtarget.useHVX128BOps())); - SDValue Vec1 = Op.getOperand(1); - MVT OpTy = Subtarget.useHVX64BOps() ? MVT::v16i32 : MVT::v32i32; - MVT ReTy = Subtarget.useHVX64BOps() ? MVT::v32i32 : MVT::v64i32; - SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); - SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); - SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); - return DAG.getNode(ISD::BITCAST, dl, VT, VC); - } +HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, + const SDLoc &dl, MVT ValTy, + SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + unsigned VecWidth = VecTy.getSizeInBits(); + unsigned ValWidth = ValTy.getSizeInBits(); + assert(VecWidth == 32 || VecWidth == 64); + assert((VecWidth % ValWidth) == 0); + + // Cast everything to scalar integer types. + MVT ScalarTy = MVT::getIntegerVT(VecWidth); + // The actual type of ValV may be different than ValTy (which is related + // to the vector type). + unsigned VW = ty(ValV).getSizeInBits(); + ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV); + VecV = DAG.getBitcast(ScalarTy, VecV); + if (VW != VecWidth) + ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy); + + SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32); + SDValue InsV; + + if (ConstantSDNode *C = dyn_cast(IdxV)) { + unsigned W = C->getZExtValue() * ValWidth; + SDValue OffV = DAG.getConstant(W, dl, MVT::i32); + InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy, + {VecV, ValV, WidthV, OffV}); + } else { + if (ty(IdxV) != MVT::i32) + IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32); + SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV); + InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy, + {VecV, ValV, WidthV, OffV}); } - if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) - return SDValue(); - - SDValue C0 = DAG.getConstant(0, dl, MVT::i64); - SDValue C32 = DAG.getConstant(32, dl, MVT::i64); - SDValue W = DAG.getConstant(Width, dl, MVT::i64); - // Create the "width" part of the argument to insert_rp/insertp_rp. - SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); - SDValue V = C0; - - for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned N = NElts-i-1; - SDValue OpN = Op.getOperand(N); + return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV); +} - if (VT.getSizeInBits() == 64 && OpN.getValueSizeInBits() == 32) { - SDValue C = DAG.getConstant(0, dl, MVT::i32); - OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); - } - SDValue Idx = DAG.getConstant(N, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); - SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); - if (VT.getSizeInBits() == 32) - V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); - else if (VT.getSizeInBits() == 64) - V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); - else - return SDValue(); +SDValue +HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) + const { + if (Ty.isVector()) { + assert(Ty.isInteger() && "Only integer vectors are supported here"); + unsigned W = Ty.getSizeInBits(); + if (W <= 64) + return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W))); + return DAG.getNode(HexagonISD::VZERO, dl, Ty); } - return DAG.getNode(ISD::BITCAST, dl, VT, V); + if (Ty.isInteger()) + return DAG.getConstant(0, dl, Ty); + if (Ty.isFloatingPoint()) + return DAG.getConstantFP(0.0, dl, Ty); + llvm_unreachable("Invalid type for zero"); } SDValue -HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc dl(Op); - bool UseHVX = Subtarget.useHVXOps(); - bool UseHVX64B = Subtarget.useHVX64BOps(); - // Just in case... - - if (!VT.isVector() || !UseHVX) - return SDValue(); - - EVT ResVT = Op.getValueType(); - unsigned ResSize = ResVT.getSizeInBits(); - unsigned VectorSizeInBits = UseHVX64B ? (64 * 8) : (128 * 8); - unsigned OpSize = VT.getSizeInBits(); - - // We deal only with cases where the result is the vector size - // and the vector operand is a double register. - if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) || - !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits)) - return SDValue(); - - ConstantSDNode *Cst = dyn_cast(Op.getOperand(1)); - if (!Cst) - return SDValue(); - unsigned Val = Cst->getZExtValue(); +HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + MVT VecTy = ty(Op); + unsigned BW = VecTy.getSizeInBits(); - // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel. - if (Val == 0) { - SDValue Vec = Op.getOperand(0); - return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResVT, Vec); - } + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true)) + return LowerHvxBuildVector(Op, DAG); - if (ResVT.getVectorNumElements() == Val) { - SDValue Vec = Op.getOperand(0); - return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResVT, Vec); + if (BW == 32 || BW == 64) { + const SDLoc &dl(Op); + SmallVector Ops; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) + Ops.push_back(Op.getOperand(i)); + if (BW == 32) + return buildVector32(Ops, dl, VecTy, DAG); + return buildVector64(Ops, dl, VecTy, DAG); } return SDValue(); } SDValue -HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, +HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - // If we are dealing with EXTRACT_SUBVECTOR on a HVX type, we may - // be able to simplify it to an EXTRACT_SUBREG. - if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && Subtarget.useHVXOps() && - Subtarget.isHVXVectorType(Op.getValueType().getSimpleVT())) - return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG); + MVT VecTy = ty(Op); + assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy)); - EVT VT = Op.getValueType(); - int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; - SDLoc dl(Op); - SDValue Idx = Op.getOperand(1); - SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); - int EltSize = EltVT.getSizeInBits(); - SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ? - EltSize : VTN * EltSize, dl, MVT::i64); - - // Constant element number. - if (ConstantSDNode *CI = dyn_cast(Idx)) { - uint64_t X = CI->getZExtValue(); - SDValue Offset = DAG.getConstant(X * EltSize, dl, MVT::i32); - const SDValue Ops[] = {Vec, Width, Offset}; - - ConstantSDNode *CW = dyn_cast(Width); - assert(CW && "Non constant width in LowerEXTRACT_VECTOR"); - - SDValue N; - MVT SVT = VecVT.getSimpleVT(); - uint64_t W = CW->getZExtValue(); - - if (W == 1) { - MVT LocVT = MVT::getIntegerVT(SVT.getSizeInBits()); - SDValue VecCast = DAG.getNode(ISD::BITCAST, dl, LocVT, Vec); - SDValue Shifted = DAG.getNode(ISD::SRA, dl, LocVT, VecCast, Offset); - return DAG.getNode(ISD::AND, dl, LocVT, Shifted, - DAG.getConstant(1, dl, LocVT)); - } else if (W == 32) { - // Translate this node into EXTRACT_SUBREG. - unsigned Subreg = (X == 0) ? Hexagon::isub_lo : 0; - - if (X == 0) - Subreg = Hexagon::isub_lo; - else if (SVT == MVT::v2i32 && X == 1) - Subreg = Hexagon::isub_hi; - else if (SVT == MVT::v4i16 && X == 2) - Subreg = Hexagon::isub_hi; - else if (SVT == MVT::v8i8 && X == 4) - Subreg = Hexagon::isub_hi; - else - llvm_unreachable("Bad offset"); - N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec); - - } else if (SVT.getSizeInBits() == 32) { - N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops); - } else if (SVT.getSizeInBits() == 64) { - N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops); - if (VT.getSizeInBits() == 32) - N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N); - } else - return SDValue(); - - return DAG.getNode(ISD::BITCAST, dl, VT, N); + if (VecTy.getSizeInBits() == 64) { + assert(Op.getNumOperands() == 2); + return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1), + Op.getOperand(0)); } - // Variable element number. - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, - DAG.getConstant(EltSize, dl, MVT::i32)); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + return SDValue(); +} - const SDValue Ops[] = {Vec, Combined}; +SDValue +HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Vec = Op.getOperand(0); + MVT VecTy = ty(Vec); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxExtractElement(Op, DAG); - SDValue N; - if (VecVT.getSizeInBits() == 32) { - N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i32, Ops); - } else { - N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops); - if (VT.getSizeInBits() == 32) - N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N); - } - return DAG.getNode(ISD::BITCAST, dl, VT, N); + MVT ElemTy = ty(Vec).getVectorElementType(); + return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG); } SDValue -HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - int VTN = VT.isVector() ? VT.getVectorNumElements() : 1; - SDLoc dl(Op); +HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { SDValue Vec = Op.getOperand(0); - SDValue Val = Op.getOperand(1); - SDValue Idx = Op.getOperand(2); - EVT VecVT = Vec.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); - int EltSize = EltVT.getSizeInBits(); - SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ? - EltSize : VTN * EltSize, dl, MVT::i64); - - if (ConstantSDNode *C = cast(Idx)) { - SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, dl, MVT::i32); - const SDValue Ops[] = {Vec, Val, Width, Offset}; - - SDValue N; - if (VT.getSizeInBits() == 32) - N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops); - else if (VT.getSizeInBits() == 64) - N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops); - else - return SDValue(); - - return DAG.getNode(ISD::BITCAST, dl, VT, N); - } + MVT VecTy = ty(Vec); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxExtractSubvector(Op, DAG); - // Variable element number. - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx, - DAG.getConstant(EltSize, dl, MVT::i32)); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); + return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG); +} - if (VT.getSizeInBits() == 64 && Val.getValueSizeInBits() == 32) { - SDValue C = DAG.getConstant(0, dl, MVT::i32); - Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val); - } +SDValue +HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + MVT VecTy = ty(Op); + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) + return LowerHvxInsertElement(Op, DAG); - const SDValue Ops[] = {Vec, Val, Combined}; + return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), + SDLoc(Op), VecTy.getVectorElementType(), DAG); +} - SDValue N; - if (VT.getSizeInBits() == 32) - N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); - else if (VT.getSizeInBits() == 64) - N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); - else - return SDValue(); +SDValue +HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op))) + return LowerHvxInsertSubvector(Op, DAG); - return DAG.getNode(ISD::BITCAST, dl, VT, N); + SDValue ValV = Op.getOperand(1); + return insertVector(Op.getOperand(0), ValV, Op.getOperand(2), + SDLoc(Op), ty(ValV), DAG); } bool @@ -2888,14 +2983,14 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { #ifndef NDEBUG Op.getNode()->dumpr(&DAG); if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END) - errs() << "Check for a non-legal type in this operation\n"; + errs() << "Error: check for a non-legal type in this operation\n"; #endif llvm_unreachable("Should not custom lower this!"); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG); - case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SRA: @@ -2921,7 +3016,17 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + case ISD::MUL: + if (Subtarget.useHVXOps()) + return LowerHvxMul(Op, DAG); + break; + case ISD::MULHS: + case ISD::MULHU: + if (Subtarget.useHVXOps()) + return LowerHvxMulh(Op, DAG); + break; } + return SDValue(); } /// Returns relocation base for the given PIC jumptable. @@ -3085,8 +3190,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { - const Function *CallerF = DAG.getMachineFunction().getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; // *************************************************************************** @@ -3172,9 +3277,6 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, case MVT::v16i32: case MVT::v32i32: case MVT::v64i32: - case MVT::v8i64: - case MVT::v16i64: - case MVT::v32i64: return true; } return false; @@ -3192,13 +3294,11 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: - case MVT::v8i64: RRC = &Hexagon::HvxVRRegClass; break; case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: - case MVT::v16i64: if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && Subtarget.useHVX128BOps()) RRC = &Hexagon::HvxVRRegClass; @@ -3208,7 +3308,6 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, case MVT::v256i8: case MVT::v128i16: case MVT::v64i32: - case MVT::v32i64: RRC = &Hexagon::HvxWRRegClass; break; } diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index b76fd0c15923..4330cfb7302f 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -51,23 +51,24 @@ namespace HexagonISD { CP, // Constant pool. COMBINE, - PACKHL, VSPLAT, VASL, VASR, VLSR, INSERT, - INSERTRP, EXTRACTU, - EXTRACTURP, VCOMBINE, VPACKE, VPACKO, + VEXTRACTW, + VINSERTW0, + VROR, TC_RETURN, EH_RETURN, DCFETCH, READCYCLE, + VZERO, OP_END }; @@ -89,6 +90,8 @@ namespace HexagonISD { explicit HexagonTargetLowering(const TargetMachine &TM, const HexagonSubtarget &ST); + bool isHVXVectorType(MVT Ty) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -98,6 +101,10 @@ namespace HexagonISD { const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, + unsigned Intrinsic) const override; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; @@ -114,16 +121,21 @@ namespace HexagonISD { unsigned DefinedValues) const override; bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override; + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) + const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; const char *getTargetNodeName(unsigned Opcode) const override; + + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; @@ -269,11 +281,88 @@ namespace HexagonISD { return AtomicExpansionKind::LLSC; } - protected: + private: + bool getBuildVectorConstInts(ArrayRef Values, MVT VecTy, + SelectionDAG &DAG, + MutableArrayRef Consts) const; SDValue buildVector32(ArrayRef Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const; SDValue buildVector64(ArrayRef Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const; + SDValue extractVector(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; + SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, + const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; + bool isUndef(SDValue Op) const { + if (Op.isMachineOpcode()) + return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; + return Op.getOpcode() == ISD::UNDEF; + } + SDValue getNode(unsigned MachineOpc, const SDLoc &dl, MVT Ty, + ArrayRef Ops, SelectionDAG &DAG) const { + SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); + return SDValue(N, 0); + } + SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; + + using VectorPair = std::pair; + using TypePair = std::pair; + + SDValue getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, + const SDLoc &dl, SelectionDAG &DAG) const; + + MVT ty(SDValue Op) const { + return Op.getValueType().getSimpleVT(); + } + TypePair ty(const VectorPair &Ops) const { + return { Ops.first.getValueType().getSimpleVT(), + Ops.second.getValueType().getSimpleVT() }; + } + MVT tyScalar(MVT Ty) const { + if (!Ty.isVector()) + return Ty; + return MVT::getIntegerVT(Ty.getSizeInBits()); + } + MVT tyVector(MVT Ty, MVT ElemTy) const { + if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) + return Ty; + unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits(); + assert((TyWidth % ElemWidth) == 0); + return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); + } + + MVT typeJoin(const TypePair &Tys) const; + TypePair typeSplit(MVT Ty) const; + MVT typeExtElem(MVT VecTy, unsigned Factor) const; + MVT typeTruncElem(MVT VecTy, unsigned Factor) const; + + SDValue opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const; + VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; + + SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const; + SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; + SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, + ArrayRef Mask, SelectionDAG &DAG) const; + + MVT getVecBoolVT() const; + + SDValue buildHvxVectorSingle(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + + SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp new file mode 100644 index 000000000000..acf8b3e1f27f --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -0,0 +1,651 @@ +//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" + +using namespace llvm; + +SDValue +HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, + const SDLoc &dl, SelectionDAG &DAG) const { + SmallVector IntOps; + IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); + for (const SDValue &Op : Ops) + IntOps.push_back(Op); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); +} + +MVT +HexagonTargetLowering::typeJoin(const TypePair &Tys) const { + assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); + + MVT ElemTy = Tys.first.getVectorElementType(); + return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + + Tys.second.getVectorNumElements()); +} + +HexagonTargetLowering::TypePair +HexagonTargetLowering::typeSplit(MVT VecTy) const { + assert(VecTy.isVector()); + unsigned NumElem = VecTy.getVectorNumElements(); + assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); + MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); + return { HalfTy, HalfTy }; +} + +MVT +HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { + MVT ElemTy = VecTy.getVectorElementType(); + MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); + return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); +} + +MVT +HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { + MVT ElemTy = VecTy.getVectorElementType(); + MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); + return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); +} + +SDValue +HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, + SelectionDAG &DAG) const { + if (ty(Vec).getVectorElementType() == ElemTy) + return Vec; + MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); + return DAG.getBitcast(CastTy, Vec); +} + +SDValue +HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, + SelectionDAG &DAG) const { + return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), + Ops.second, Ops.first); +} + +HexagonTargetLowering::VectorPair +HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, + SelectionDAG &DAG) const { + TypePair Tys = typeSplit(ty(Vec)); + return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); +} + +SDValue +HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, + SelectionDAG &DAG) const { + if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) + ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); + + unsigned ElemWidth = ElemTy.getSizeInBits(); + if (ElemWidth == 8) + return ElemIdx; + + unsigned L = Log2_32(ElemWidth/8); + const SDLoc &dl(ElemIdx); + return DAG.getNode(ISD::SHL, dl, MVT::i32, + {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); +} + +SDValue +HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, + SelectionDAG &DAG) const { + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); + if (ElemWidth == 32) + return Idx; + + if (ty(Idx) != MVT::i32) + Idx = DAG.getBitcast(MVT::i32, Idx); + const SDLoc &dl(Idx); + SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); + SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); + return SubIdx; +} + +SDValue +HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, + SDValue Op1, ArrayRef Mask, + SelectionDAG &DAG) const { + MVT OpTy = ty(Op0); + assert(OpTy == ty(Op1)); + + MVT ElemTy = OpTy.getVectorElementType(); + if (ElemTy == MVT::i8) + return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); + assert(ElemTy.getSizeInBits() >= 8); + + MVT ResTy = tyVector(OpTy, MVT::i8); + unsigned ElemSize = ElemTy.getSizeInBits() / 8; + + SmallVector ByteMask; + for (int M : Mask) { + if (M < 0) { + for (unsigned I = 0; I != ElemSize; ++I) + ByteMask.push_back(-1); + } else { + int NewM = M*ElemSize; + for (unsigned I = 0; I != ElemSize; ++I) + ByteMask.push_back(NewM+I); + } + } + assert(ResTy.getVectorNumElements() == ByteMask.size()); + return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), + opCastElem(Op1, MVT::i8, DAG), ByteMask); +} + +MVT +HexagonTargetLowering::getVecBoolVT() const { + return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength()); +} + +SDValue +HexagonTargetLowering::buildHvxVectorSingle(ArrayRef Values, + const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const { + unsigned VecLen = Values.size(); + MachineFunction &MF = DAG.getMachineFunction(); + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + unsigned HwLen = Subtarget.getVectorLength(); + + SmallVector Consts(VecLen); + bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); + if (AllConst) { + if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + + ArrayRef Tmp((Constant**)Consts.begin(), + (Constant**)Consts.end()); + Constant *CV = ConstantVector::get(Tmp); + unsigned Align = HwLen; + SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); + return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(MF), Align); + } + + unsigned ElemSize = ElemWidth / 8; + assert(ElemSize*VecLen == HwLen); + SmallVector Words; + + if (VecTy.getVectorElementType() != MVT::i32) { + assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); + unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; + MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); + for (unsigned i = 0; i != VecLen; i += OpsPerWord) { + SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); + Words.push_back(DAG.getBitcast(MVT::i32, W)); + } + } else { + Words.assign(Values.begin(), Values.end()); + } + + // Construct two halves in parallel, then or them together. + assert(4*Words.size() == Subtarget.getVectorLength()); + SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue S = DAG.getConstant(4, dl, MVT::i32); + unsigned NumWords = Words.size(); + for (unsigned i = 0; i != NumWords/2; ++i) { + SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV0, Words[i]}); + SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, + {HalfV1, Words[i+NumWords/2]}); + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); + HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); + } + + HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, + {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); + SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); + return DstV; +} + +SDValue +HexagonTargetLowering::buildHvxVectorPred(ArrayRef Values, + const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const { + // Construct a vector V of bytes, such that a comparison V >u 0 would + // produce the required vector predicate. + unsigned VecLen = Values.size(); + unsigned HwLen = Subtarget.getVectorLength(); + assert(VecLen <= HwLen || VecLen == 8*HwLen); + SmallVector Bytes; + + if (VecLen <= HwLen) { + // In the hardware, each bit of a vector predicate corresponds to a byte + // of a vector register. Calculate how many bytes does a bit of VecTy + // correspond to. + assert(HwLen % VecLen == 0); + unsigned BitBytes = HwLen / VecLen; + for (SDValue V : Values) { + SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) + : DAG.getConstant(0, dl, MVT::i8); + for (unsigned B = 0; B != BitBytes; ++B) + Bytes.push_back(Ext); + } + } else { + // There are as many i1 values, as there are bits in a vector register. + // Divide the values into groups of 8 and check that each group consists + // of the same value (ignoring undefs). + for (unsigned I = 0; I != VecLen; I += 8) { + unsigned B = 0; + // Find the first non-undef value in this group. + for (; B != 8; ++B) { + if (!Values[I+B].isUndef()) + break; + } + SDValue F = Values[I+B]; + SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) + : DAG.getConstant(0, dl, MVT::i8); + Bytes.push_back(Ext); + // Verify that the rest of values in the group are the same as the + // first. + for (; B != 8; ++B) + assert(Values[I+B].isUndef() || Values[I+B] == F); + } + } + + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG); + SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG), + ISD::SETUGT); + return Cmp; +} + +SDValue +HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + MVT VecTy = ty(Op); + + unsigned Size = Op.getNumOperands(); + SmallVector Ops; + for (unsigned i = 0; i != Size; ++i) + Ops.push_back(Op.getOperand(i)); + + if (VecTy.getVectorElementType() == MVT::i1) + return buildHvxVectorPred(Ops, dl, VecTy, DAG); + + if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { + ArrayRef A(Ops); + MVT SingleTy = typeSplit(VecTy).first; + SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG); + SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); + } + + return buildHvxVectorSingle(Ops, dl, VecTy, DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) + const { + // Change the type of the extracted element to i32. + SDValue VecV = Op.getOperand(0); + MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); + (void)ElemWidth; + + const SDLoc &dl(Op); + SDValue IdxV = Op.getOperand(1); + if (ty(IdxV) != MVT::i32) + IdxV = DAG.getBitcast(MVT::i32, IdxV); + + SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); + SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, + {VecV, ByteIdx}); + if (ElemTy == MVT::i32) + return ExWord; + + // Have an extracted word, need to extract the smaller element out of it. + // 1. Extract the bits of (the original) IdxV that correspond to the index + // of the desired element in the 32-bit word. + SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); + // 2. Extract the element from the word. + SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); + return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + SDValue VecV = Op.getOperand(0); + SDValue ValV = Op.getOperand(1); + SDValue IdxV = Op.getOperand(2); + MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(ElemWidth >= 8 && ElemWidth <= 32); + (void)ElemWidth; + + auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, + SDValue ByteIdxV) { + MVT VecTy = ty(VecV); + unsigned HwLen = Subtarget.getVectorLength(); + SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, + {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); + SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); + SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); + SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, + {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV}); + SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); + return TorV; + }; + + SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); + if (ElemTy == MVT::i32) + return InsertWord(VecV, ValV, ByteIdx); + + // If this is not inserting a 32-bit word, convert it into such a thing. + // 1. Extract the existing word from the target vector. + SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, + {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); + SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + {opCastElem(VecV, MVT::i32, DAG), WordIdx}); + SDValue Ext = LowerHvxExtractElement(Ex0, DAG); + + // 2. Treating the extracted word as a 32-bit vector, insert the given + // value into it. + SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); + MVT SubVecTy = tyVector(ty(Ext), ElemTy); + SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), + ValV, SubIdx, dl, ElemTy, DAG); + + // 3. Insert the 32-bit word back into the original vector. + return InsertWord(VecV, Ins, ByteIdx); +} + +SDValue +HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) + const { + SDValue SrcV = Op.getOperand(0); + MVT SrcTy = ty(SrcV); + unsigned SrcElems = SrcTy.getVectorNumElements(); + SDValue IdxV = Op.getOperand(1); + unsigned Idx = cast(IdxV.getNode())->getZExtValue(); + MVT DstTy = ty(Op); + assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0); + const SDLoc &dl(Op); + if (Idx == 0) + return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV); + if (Idx == SrcElems/2) + return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV); + return SDValue(); +} + +SDValue +HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) + const { + // Idx may be variable. + SDValue IdxV = Op.getOperand(2); + auto *IdxN = dyn_cast(IdxV.getNode()); + if (!IdxN) + return SDValue(); + unsigned Idx = IdxN->getZExtValue(); + + SDValue DstV = Op.getOperand(0); + SDValue SrcV = Op.getOperand(1); + MVT DstTy = ty(DstV); + MVT SrcTy = ty(SrcV); + unsigned DstElems = DstTy.getVectorNumElements(); + unsigned SrcElems = SrcTy.getVectorNumElements(); + if (2*SrcElems != DstElems) + return SDValue(); + + const SDLoc &dl(Op); + if (Idx == 0) + return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV); + if (Idx == SrcElems) + return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV); + return SDValue(); +} + +SDValue +HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + assert(ResTy.isVector()); + const SDLoc &dl(Op); + SmallVector ShuffMask; + + MVT ElemTy = ResTy.getVectorElementType(); + unsigned VecLen = ResTy.getVectorNumElements(); + SDValue Vs = Op.getOperand(0); + SDValue Vt = Op.getOperand(1); + + switch (ElemTy.SimpleTy) { + case MVT::i8: + case MVT::i16: { // V6_vmpyih + // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), + // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, + // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). + // For i16, use V6_vmpyhv, which behaves in an analogous way to + // V6_vmpybv: results Lo and Hi are products of even/odd elements + // respectively. + MVT ExtTy = typeExtElem(ResTy, 2); + unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv + : Hexagon::V6_vmpyhv; + SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); + + // Discard high halves of the resulting values, collect the low halves. + for (unsigned I = 0; I < VecLen; I += 2) { + ShuffMask.push_back(I); // Pick even element. + ShuffMask.push_back(I+VecLen); // Pick odd element. + } + VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); + SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); + return DAG.getBitcast(ResTy, BS); + } + case MVT::i32: { + // Use the following sequence for signed word multiply: + // T0 = V6_vmpyiowh Vs, Vt + // T1 = V6_vaslw T0, 16 + // T2 = V6_vmpyiewuh_acc T1, Vs, Vt + SDValue S16 = DAG.getConstant(16, dl, MVT::i32); + SDValue T0 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); + SDValue T1 = getNode(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); + SDValue T2 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, + {T1, Vs, Vt}, DAG); + return T2; + } + default: + break; + } + return SDValue(); +} + +SDValue +HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + assert(ResTy.isVector()); + const SDLoc &dl(Op); + SmallVector ShuffMask; + + MVT ElemTy = ResTy.getVectorElementType(); + unsigned VecLen = ResTy.getVectorNumElements(); + SDValue Vs = Op.getOperand(0); + SDValue Vt = Op.getOperand(1); + bool IsSigned = Op.getOpcode() == ISD::MULHS; + + if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { + // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), + // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, + // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). + // For i16, use V6_vmpyhv, which behaves in an analogous way to + // V6_vmpybv: results Lo and Hi are products of even/odd elements + // respectively. + MVT ExtTy = typeExtElem(ResTy, 2); + unsigned MpyOpc = ElemTy == MVT::i8 + ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) + : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); + SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); + + // Discard low halves of the resulting values, collect the high halves. + for (unsigned I = 0; I < VecLen; I += 2) { + ShuffMask.push_back(I+1); // Pick even element. + ShuffMask.push_back(I+VecLen+1); // Pick odd element. + } + VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); + SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); + return DAG.getBitcast(ResTy, BS); + } + + assert(ElemTy == MVT::i32); + SDValue S16 = DAG.getConstant(16, dl, MVT::i32); + + if (IsSigned) { + // mulhs(Vs,Vt) = + // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 + // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 + // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 + // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 + // + Lo(Vs) *us Vt] >> 32 + // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to + // anything, so it cannot produce any carry over to higher bits), + // so everything in [] can be shifted by 16 without loss of precision. + // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 + // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 + // Denote Hi(Vs) = Vs': + // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 + // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 + SDValue T0 = getNode(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); + // Get Vs': + SDValue S0 = getNode(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); + SDValue T1 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, + {T0, S0, Vt}, DAG); + // Shift by 16: + SDValue S2 = getNode(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); + // Get Vs'*Hi(Vt): + SDValue T2 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); + // Add: + SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); + return T3; + } + + // Unsigned mulhw. (Would expansion using signed mulhw be better?) + + auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { + return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); + }; + auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { + return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); + }; + + MVT PairTy = typeJoin({ResTy, ResTy}); + SDValue P = getNode(Hexagon::V6_lvsplatw, dl, ResTy, + {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); + // Multiply-unsigned halfwords: + // LoVec = Vs.uh[2i] * Vt.uh[2i], + // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] + SDValue T0 = getNode(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); + // The low halves in the LoVec of the pair can be discarded. They are + // not added to anything (in the full-precision product), so they cannot + // produce a carry into the higher bits. + SDValue T1 = getNode(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); + // Swap low and high halves in Vt, and do the halfword multiplication + // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. + SDValue D0 = getNode(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); + SDValue T2 = getNode(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); + // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). + // These products are words, but cannot be added directly because the + // sums could overflow. Add these products, by halfwords, where each sum + // of a pair of halfwords gives a word. + SDValue T3 = getNode(Hexagon::V6_vadduhw, dl, PairTy, + {LoVec(T2), HiVec(T2)}, DAG); + // Add the high halfwords from the products of the low halfwords. + SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); + SDValue T5 = getNode(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); + SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); + SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); + return T7; +} + +SDValue +HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const { + MVT VecTy = ty(Op.getOperand(0)); + assert(VecTy == ty(Op.getOperand(1))); + + SDValue Cmp = Op.getOperand(2); + ISD::CondCode CC = cast(Cmp)->get(); + bool Negate = false, Swap = false; + + // HVX has instructions for SETEQ, SETGT, SETUGT. The other comparisons + // can be arranged as operand-swapped/negated versions of these. Since + // the generated code will have the original CC expressed as + // (negate (swap-op NewCmp)), + // the condition code for the NewCmp should be calculated from the original + // CC by applying these operations in the reverse order. + // + // This could also be done through setCondCodeAction, but for negation it + // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR. + // That is far too expensive for what can be done with a single instruction. + + switch (CC) { + case ISD::SETNE: // !eq + case ISD::SETLE: // !gt + case ISD::SETGE: // !lt + case ISD::SETULE: // !ugt + case ISD::SETUGE: // !ult + CC = ISD::getSetCCInverse(CC, true); + Negate = true; + break; + default: + break; + } + + switch (CC) { + case ISD::SETLT: // swap gt + case ISD::SETULT: // swap ugt + CC = ISD::getSetCCSwappedOperands(CC); + Swap = true; + break; + default: + break; + } + + assert(CC == ISD::SETEQ || CC == ISD::SETGT || CC == ISD::SETUGT); + + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + assert(isPowerOf2_32(ElemWidth)); + + auto getIdx = [] (unsigned Code) { + static const unsigned Idx[] = { ISD::SETEQ, ISD::SETGT, ISD::SETUGT }; + for (unsigned I = 0, E = array_lengthof(Idx); I != E; ++I) + if (Code == Idx[I]) + return I; + llvm_unreachable("Unhandled CondCode"); + }; + + static unsigned OpcTable[3][3] = { + // SETEQ SETGT, SETUGT + /* Byte */ { Hexagon::V6_veqb, Hexagon::V6_vgtb, Hexagon::V6_vgtub }, + /* Half */ { Hexagon::V6_veqh, Hexagon::V6_vgth, Hexagon::V6_vgtuh }, + /* Word */ { Hexagon::V6_veqw, Hexagon::V6_vgtw, Hexagon::V6_vgtuw } + }; + + unsigned CmpOpc = OpcTable[Log2_32(ElemWidth)-3][getIdx(CC)]; + + MVT ResTy = ty(Op); + const SDLoc &dl(Op); + SDValue OpL = Swap ? Op.getOperand(1) : Op.getOperand(0); + SDValue OpR = Swap ? Op.getOperand(0) : Op.getOperand(1); + SDValue CmpV = getNode(CmpOpc, dl, ResTy, {OpL, OpR}, DAG); + return Negate ? getNode(Hexagon::V6_pred_not, dl, ResTy, {CmpV}, DAG) + : CmpV; +} + +SDValue +HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { + // Sign- and zero-extends are legal. + assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); + return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op)); +} diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 4da2edc24f3a..1bb3bc1ea31b 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -77,9 +77,9 @@ class InstHexagon pattern, // Packed only with A or X-type instructions. bits<1> isSoloAX = 0; let TSFlags{7} = isSoloAX; - // Only A-type instruction in first slot or nothing. - bits<1> isSoloAin1 = 0; - let TSFlags{8} = isSoloAin1; + // Restricts slot 1 to ALU-only instructions. + bits<1> isRestrictSlot1AOK = 0; + let TSFlags{8} = isRestrictSlot1AOK; // Predicated instructions. bits<1> isPredicated = 0; @@ -121,6 +121,16 @@ class InstHexagon pattern, bits<2> opExtentAlign = 0; let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending. + bit cofMax1 = 0; + let TSFlags{35} = cofMax1; + bit cofRelax1 = 0; + let TSFlags{36} = cofRelax1; + bit cofRelax2 = 0; + let TSFlags{37} = cofRelax2; + + bit isRestrictNoSlot1Store = 0; + let TSFlags{38} = isRestrictNoSlot1Store; + // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; let TSFlags{43-41} = addrMode.Value; @@ -135,6 +145,9 @@ class InstHexagon pattern, bits<1> isFP = 0; let TSFlags {49} = isFP; // Floating-point. + bits<1> isSomeOK = 0; + let TSFlags {50} = isSomeOK; // Relax some grouping constraints. + bits<1> hasNewValue2 = 0; let TSFlags{51} = hasNewValue2; // Second New-value producer insn. bits<3> opNewValue2 = 0; @@ -146,8 +159,8 @@ class InstHexagon pattern, bits<1> prefersSlot3 = 0; let TSFlags{56} = prefersSlot3; // Complex XU - bit cofMax1 = 0; - let TSFlags{60} = cofMax1; + bits<1> hasTmpDst = 0; + let TSFlags{59} = hasTmpDst; // v65 : 'fake" register VTMP is set bit CVINew = 0; let TSFlags{61} = CVINew; @@ -229,15 +242,8 @@ class PseudoM pattern = [], include "HexagonInstrFormatsV4.td" //===----------------------------------------------------------------------===// -// V55 Instruction Format Definitions + -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// V60 Instruction Format Definitions + +// V60+ Instruction Format Definitions + //===----------------------------------------------------------------------===// include "HexagonInstrFormatsV60.td" - -//===----------------------------------------------------------------------===// -// V62 Instruction Format Definitions + -//===----------------------------------------------------------------------===// +include "HexagonInstrFormatsV65.td" diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV65.td b/lib/Target/Hexagon/HexagonInstrFormatsV65.td new file mode 100644 index 000000000000..cddb8777b417 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormatsV65.td @@ -0,0 +1,32 @@ +//==- HexagonInstrFormatsV65.td - Hexagon Instruction Formats -*- tablegen -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V60 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +class CVI_VA_Resource_NoOpcode pattern = [], string cstr = "", + InstrItinClass itin = CVI_VA> + : InstHexagon; + +class CVI_GATHER_TMP_LD_Resource_NoOpcode pattern = [], string cstr = "", + InstrItinClass itin = CVI_GATHER_PSEUDO> + : InstHexagon; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3c0b30616884..b82a0157e81f 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -463,7 +463,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, Cond.push_back(LastInst->getOperand(1)); return false; } - DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) << " with one jump\n";); // Otherwise, don't know what this is. return true; @@ -511,7 +511,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, FBB = LastInst->getOperand(0).getMBB(); return false; } - DEBUG(dbgs() << "\nCant analyze BB#" << MBB.getNumber() + DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) << " with two jumps";); // Otherwise, can't handle this. return true; @@ -521,7 +521,7 @@ unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); - DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber()); + DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB)); MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; while (I != MBB.begin()) { @@ -593,7 +593,7 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); - DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber();); + DEBUG(dbgs() << "\nInserting NVJump for " << printMBBReference(MBB);); if (Cond[2].isReg()) { unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). @@ -829,9 +829,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, #ifndef NDEBUG // Show the invalid registers to ease debugging. - dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber() - << ": " << printReg(DestReg, &HRI) - << " = " << printReg(SrcReg, &HRI) << '\n'; + dbgs() << "Invalid registers for copy in " << printMBBReference(MBB) << ": " + << printReg(DestReg, &HRI) << " = " << printReg(SrcReg, &HRI) << '\n'; #endif llvm_unreachable("Unimplemented"); } @@ -1243,6 +1242,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } + case Hexagon::PS_tailcall_i: MI.setDesc(get(Hexagon::J2_jump)); return true; @@ -1268,6 +1268,82 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Hexagon::PS_jmpretfnew: MI.setDesc(get(Hexagon::J2_jumprfnew)); return true; + + case Hexagon::V6_vgathermh_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermh)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + + case Hexagon::V6_vgathermw_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermw)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + + case Hexagon::V6_vgathermhw_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhw)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + + case Hexagon::V6_vgathermhq_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhq)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + + case Hexagon::V6_vgathermwq_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermwq)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + + case Hexagon::V6_vgathermhwq_pseudo: + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgathermhwq)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vS32b_new_ai)) + .add(MI.getOperand(0)) + .addImm(0) + .addReg(Hexagon::VTMP); + MBB.erase(MI); + return true; + } return false; @@ -1616,8 +1692,8 @@ DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( } // Inspired by this pair: -// %R13 = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] -// S2_storeri_io %R29, 132, %R1; flags: mem:ST4[FixedStack1] +// %r13 = L2_loadri_io %r29, 136; mem:LD4[FixedStack0] +// S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1] // Currently AA considers the addresses in these instructions to be aliasing. bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const { @@ -2052,6 +2128,8 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, // TODO: Add all the compound branches here. Can we do this in Relation model? case Hexagon::J4_cmpeqi_tp0_jump_nt: case Hexagon::J4_cmpeqi_tp1_jump_nt: + case Hexagon::J4_cmpeqn1_tp0_jump_nt: + case Hexagon::J4_cmpeqn1_tp1_jump_nt: return isInt<11>(offset); } } @@ -2818,10 +2896,8 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator MII = BII; MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); - if (!(*MII).isBundle()) { - const MachineInstr &J = *MII; - return producesStall(J, MI); - } + if (!MII->isBundle()) + return producesStall(*MII, MI); for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { const MachineInstr &J = *MII; @@ -2907,6 +2983,9 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, /// Return the position of the base and offset operands for this instruction. bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const { + if (!isAddrModeWithOffset(MI) && !isPostIncrement(MI)) + return false; + // Deal with memops first. if (isMemOp(MI)) { BasePos = 0; @@ -3098,15 +3177,24 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B); if ((GA.getOpcode() != Hexagon::C2_cmpeqi) || (GB.getOpcode() != Hexagon::J2_jumptnew)) - return -1; + return -1u; unsigned DestReg = GA.getOperand(0).getReg(); if (!GB.readsRegister(DestReg)) - return -1; - if (DestReg == Hexagon::P0) - return Hexagon::J4_cmpeqi_tp0_jump_nt; - if (DestReg == Hexagon::P1) - return Hexagon::J4_cmpeqi_tp1_jump_nt; - return -1; + return -1u; + if (DestReg != Hexagon::P0 && DestReg != Hexagon::P1) + return -1u; + // The value compared against must be either u5 or -1. + const MachineOperand &CmpOp = GA.getOperand(2); + if (!CmpOp.isImm()) + return -1u; + int V = CmpOp.getImm(); + if (V == -1) + return DestReg == Hexagon::P0 ? Hexagon::J4_cmpeqn1_tp0_jump_nt + : Hexagon::J4_cmpeqn1_tp1_jump_nt; + if (!isUInt<5>(V)) + return -1u; + return DestReg == Hexagon::P0 ? Hexagon::J4_cmpeqi_tp0_jump_nt + : Hexagon::J4_cmpeqi_tp1_jump_nt; } int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { @@ -3515,8 +3603,9 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_L2; case Hexagon::EH_RETURN_JMPR: case Hexagon::PS_jmpret: + case Hexagon::SL2_jumpr31: // jumpr r31 - // Actual form JMPR %PC, %R31, %R0. + // Actual form JMPR implicit-def %pc, implicit %r31, implicit internal %r0 DstReg = MI.getOperand(0).getReg(); if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)) return HexagonII::HSIG_L2; @@ -3527,6 +3616,9 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::PS_jmpretfnewpt: case Hexagon::PS_jmprettnew: case Hexagon::PS_jmpretfnew: + case Hexagon::SL2_jumpr31_t: + case Hexagon::SL2_jumpr31_f: + case Hexagon::SL2_jumpr31_tnew: DstReg = MI.getOperand(1).getReg(); SrcReg = MI.getOperand(0).getReg(); // [if ([!]p0[.new])] jumpr r31 @@ -3620,8 +3712,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_S2; break; case Hexagon::S2_allocframe: - if (MI.getOperand(0).isImm() && - isShiftedUInt<5,3>(MI.getOperand(0).getImm())) + if (MI.getOperand(2).isImm() && + isShiftedUInt<5,3>(MI.getOperand(2).getImm())) return HexagonII::HSIG_S1; break; // @@ -3706,7 +3798,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( case Hexagon::C2_cmovenewif: // if ([!]P0[.new]) Rd = #0 // Actual form: - // %R16 = C2_cmovenewit %P0, 0, %R16; + // %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16; DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); if (isIntRegForSubInst(DstReg) && @@ -3826,8 +3918,14 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } - return TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, - UseMI, UseIdx); + int Latency = TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); + if (!Latency) + // We should never have 0 cycle latency between two instructions unless + // they can be packetized together. However, this decision can't be made + // here. + Latency = 1; + return Latency; } // inverts the predication logic. @@ -3866,6 +3964,35 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { return ~(-1U << bits); } + +bool HexagonInstrInfo::isAddrModeWithOffset(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Hexagon::L2_loadrbgp: + case Hexagon::L2_loadrdgp: + case Hexagon::L2_loadrhgp: + case Hexagon::L2_loadrigp: + case Hexagon::L2_loadrubgp: + case Hexagon::L2_loadruhgp: + case Hexagon::S2_storerbgp: + case Hexagon::S2_storerbnewgp: + case Hexagon::S2_storerhgp: + case Hexagon::S2_storerhnewgp: + case Hexagon::S2_storerigp: + case Hexagon::S2_storerinewgp: + case Hexagon::S2_storerdgp: + case Hexagon::S2_storerfgp: + return true; + } + const uint64_t F = MI.getDesc().TSFlags; + unsigned addrMode = + ((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask); + // Disallow any base+offset instruction. The assembler does not yet reorder + // based up any zero offset instruction. + return (addrMode == HexagonII::BaseRegOffset || + addrMode == HexagonII::BaseImmOffset || + addrMode == HexagonII::BaseLongOffset); +} + unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { using namespace HexagonII; @@ -4032,8 +4159,9 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { bool HexagonInstrInfo::invertAndChangeJumpTarget( MachineInstr &MI, MachineBasicBlock *NewTarget) const { - DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" - << NewTarget->getNumber(); MI.dump();); + DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to " + << printMBBReference(*NewTarget); + MI.dump();); assert(MI.isBranch()); unsigned NewOpcode = getInvertedPredicatedOpcode(MI.getOpcode()); int TargetPos = MI.getNumOperands() - 1; @@ -4094,6 +4222,22 @@ bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } +void HexagonInstrInfo:: +setBundleNoShuf(MachineBasicBlock::instr_iterator MIB) const { + assert(MIB->isBundle()); + MachineOperand &Operand = MIB->getOperand(0); + if (Operand.isImm()) + Operand.setImm(Operand.getImm() | memShufDisabledMask); + else + MIB->addOperand(MachineOperand::CreateImm(memShufDisabledMask)); +} + +bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const { + assert(MIB.isBundle()); + const MachineOperand &Operand = MIB.getOperand(0); + return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0); +} + // Addressing mode relations. short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; @@ -4103,10 +4247,18 @@ short HexagonInstrInfo::changeAddrMode_io_abs(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_io_abs(Opc) : Opc; } +short HexagonInstrInfo::changeAddrMode_io_pi(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_io_pi(Opc) : Opc; +} + short HexagonInstrInfo::changeAddrMode_io_rr(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_io_rr(Opc) : Opc; } +short HexagonInstrInfo::changeAddrMode_pi_io(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_pi_io(Opc) : Opc; +} + short HexagonInstrInfo::changeAddrMode_rr_io(short Opc) const { return Opc >= 0 ? Hexagon::changeAddrMode_rr_io(Opc) : Opc; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 1558c2e98508..7c095d9c2779 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -38,6 +38,11 @@ class TargetRegisterInfo; class HexagonInstrInfo : public HexagonGenInstrInfo { const HexagonSubtarget &Subtarget; + + enum BundleAttribute { + memShufDisabledMask = 0x4 + }; + virtual void anchor(); public: @@ -117,8 +122,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop is - /// finished. Return the value/register of the the new loop count. We need + /// Generate code to reduce the loop iteration by one and check if the loop + /// is finished. Return the value/register of the new loop count. We need /// this function when peeling off one or more iterations of a loop. This /// function assumes the nth iteration is peeled first. unsigned reduceLoopCount(MachineBasicBlock &MBB, @@ -326,10 +331,11 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// HexagonInstrInfo specifics. - unsigned createVR(MachineFunction* MF, MVT VT) const; + unsigned createVR(MachineFunction *MF, MVT VT) const; bool isAbsoluteSet(const MachineInstr &MI) const; bool isAccumulator(const MachineInstr &MI) const; + bool isAddrModeWithOffset(const MachineInstr &MI) const; bool isComplex(const MachineInstr &MI) const; bool isCompoundBranchInstr(const MachineInstr &MI) const; bool isConstExtended(const MachineInstr &MI) const; @@ -432,7 +438,6 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr &MI) const; short getEquivalentHWInstr(const MachineInstr &MI) const; - MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const; unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData, const MachineInstr &MI) const; bool getInvertedPredSense(SmallVectorImpl &Cond) const; @@ -456,16 +461,20 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { void immediateExtend(MachineInstr &MI) const; bool invertAndChangeJumpTarget(MachineInstr &MI, - MachineBasicBlock* NewTarget) const; + MachineBasicBlock *NewTarget) const; void genAllInsnTimingClasses(MachineFunction &MF) const; bool reversePredSense(MachineInstr &MI) const; unsigned reversePrediction(unsigned Opcode) const; bool validateBranchCond(const ArrayRef &Cond) const; + void setBundleNoShuf(MachineBasicBlock::instr_iterator MIB) const; + bool getBundleNoShuf(const MachineInstr &MIB) const; // Addressing mode relations. short changeAddrMode_abs_io(short Opc) const; short changeAddrMode_io_abs(short Opc) const; + short changeAddrMode_io_pi(short Opc) const; short changeAddrMode_io_rr(short Opc) const; + short changeAddrMode_pi_io(short Opc) const; short changeAddrMode_rr_io(short Opc) const; short changeAddrMode_rr_ur(short Opc) const; short changeAddrMode_ur_rr(short Opc) const; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index 8a77b7760df1..1df143de6e80 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -735,6 +735,28 @@ def : Pat <(int_hexagon_A2_not I32:$Rs), def : Pat <(int_hexagon_A2_neg I32:$Rs), (A2_subri 0, I32:$Rs)>; +// Make sure the patterns with zero immediate value has higher complexity +// otherwise, we need to updated the predicates for immediates to exclude zero +let AddedComplexity = 200 in { +def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, (i32 0)), + (A2_tfr I32:$Rs)>; +def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, (i32 0)), + (A2_combinew (HiReg I64:$Rs), (LoReg I64:$Rs))>; +def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, (i32 0)), + (A2_combinew (HiReg I64:$Rs), (LoReg I64:$Rs))>; +def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)), + (S2_vsathub I64:$Rs)>; +} + +def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm), + (S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>; +def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm), + (S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>; +def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm), + (S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; +def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm), + (S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>; + // Transfer immediate def : Pat <(int_hexagon_A2_tfril I32:$Rs, u16_0ImmPred:$Is), (A2_tfril I32:$Rs, u16_0ImmPred:$Is)>; @@ -1360,6 +1382,11 @@ defm : MaskedStore ; defm : MaskedStore ; defm : MaskedStore ; +defm : MaskedStore ; +defm : MaskedStore ; +defm : MaskedStore ; +defm : MaskedStore ; + //******************************************************************* // SYSTEM //******************************************************************* diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td index d26a3d1ae540..5e5c77b38e8e 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td @@ -35,9 +35,6 @@ def : Pat <(v512i1 (bitconvert (v32i16 HvxVR:$src1))), def : Pat <(v512i1 (bitconvert (v64i8 HvxVR:$src1))), (v512i1 (V6_vandvrt(v64i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>; -def : Pat <(v512i1 (bitconvert (v8i64 HvxVR:$src1))), - (v512i1 (V6_vandvrt(v8i64 HvxVR:$src1), (A2_tfrsi 0x01010101)))>; - def : Pat <(v16i32 (bitconvert (v512i1 HvxQR:$src1))), (v16i32 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; @@ -47,9 +44,6 @@ def : Pat <(v32i16 (bitconvert (v512i1 HvxQR:$src1))), def : Pat <(v64i8 (bitconvert (v512i1 HvxQR:$src1))), (v64i8 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; -def : Pat <(v8i64 (bitconvert (v512i1 HvxQR:$src1))), - (v8i64 (V6_vandqrt(v512i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; - def : Pat <(v1024i1 (bitconvert (v32i32 HvxVR:$src1))), (v1024i1 (V6_vandvrt (v32i32 HvxVR:$src1), (A2_tfrsi 0x01010101)))>; @@ -59,9 +53,6 @@ def : Pat <(v1024i1 (bitconvert (v64i16 HvxVR:$src1))), def : Pat <(v1024i1 (bitconvert (v128i8 HvxVR:$src1))), (v1024i1 (V6_vandvrt (v128i8 HvxVR:$src1), (A2_tfrsi 0x01010101)))>; -def : Pat <(v1024i1 (bitconvert (v16i64 HvxVR:$src1))), - (v1024i1 (V6_vandvrt (v16i64 HvxVR:$src1), (A2_tfrsi 0x01010101)))>; - def : Pat <(v32i32 (bitconvert (v1024i1 HvxQR:$src1))), (v32i32 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; @@ -71,9 +62,6 @@ def : Pat <(v64i16 (bitconvert (v1024i1 HvxQR:$src1))), def : Pat <(v128i8 (bitconvert (v1024i1 HvxQR:$src1))), (v128i8 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; -def : Pat <(v16i64 (bitconvert (v1024i1 HvxQR:$src1))), - (v16i64 (V6_vandqrt (v1024i1 HvxQR:$src1), (A2_tfrsi 0x01010101)))>; - let AddedComplexity = 140 in { def : Pat <(store (v512i1 HvxQR:$src1), (i32 IntRegs:$addr)), (V6_vS32b_ai IntRegs:$addr, 0, diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 2154a485dc69..2646d0bcbf47 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -140,7 +140,6 @@ namespace { bool runOnLoop(Loop *L, LPPassManager &LPM) override; private: - unsigned getStoreSizeInBytes(StoreInst *SI); int getSCEVStride(const SCEVAddRecExpr *StoreEv); bool isLegalStore(Loop *CurLoop, StoreInst *SI); void collectStores(Loop *CurLoop, BasicBlock *BB, @@ -1051,14 +1050,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, // Check if the exit values have types that are no wider than the type // that we want to promote to. unsigned DestBW = DestTy->getBitWidth(); - for (Instruction &In : *ExitB) { - PHINode *P = dyn_cast(&In); - if (!P) - break; - if (P->getNumIncomingValues() != 1) + for (PHINode &P : ExitB->phis()) { + if (P.getNumIncomingValues() != 1) return false; - assert(P->getIncomingBlock(0) == LoopB); - IntegerType *T = dyn_cast(P->getType()); + assert(P.getIncomingBlock(0) == LoopB); + IntegerType *T = dyn_cast(P.getType()); if (!T || T->getBitWidth() > DestBW) return false; } @@ -1847,13 +1843,6 @@ bool PolynomialMultiplyRecognize::recognize() { return true; } -unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst *SI) { - uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType()); - assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) && - "Don't overflow unsigned."); - return (unsigned)SizeInBits >> 3; -} - int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { if (const SCEVConstant *SC = dyn_cast(S->getOperand(1))) return SC->getAPInt().getSExtValue(); @@ -1885,7 +1874,7 @@ bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { int Stride = getSCEVStride(StoreEv); if (Stride == 0) return false; - unsigned StoreSize = getStoreSizeInBytes(SI); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); if (StoreSize != unsigned(std::abs(Stride))) return false; @@ -1936,7 +1925,9 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, for (auto *B : L->blocks()) for (auto &I : *B) - if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access)) + if (Ignored.count(&I) == 0 && + isModOrRefSet( + intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) return true; return false; @@ -1960,7 +1951,7 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, Value *StorePtr = SI->getPointerOperand(); auto *StoreEv = cast(SE->getSCEV(StorePtr)); unsigned Stride = getSCEVStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); if (Stride != StoreSize) return false; @@ -2015,12 +2006,12 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, SmallPtrSet Ignore1; Ignore1.insert(SI); - if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, + if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Ignore1)) { // Check if the load is the offending instruction. Ignore1.insert(LI); - if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, - StoreSize, *AA, Ignore1)) { + if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, + BECount, StoreSize, *AA, Ignore1)) { // Still bad. Nothing we can do. goto CleanupAndExit; } @@ -2062,8 +2053,8 @@ bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, SmallPtrSet Ignore2; Ignore2.insert(SI); - if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Ignore2)) + if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, + StoreSize, *AA, Ignore2)) goto CleanupAndExit; // Check the stride. diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 5daceac6496d..b1c549aa13fa 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -186,12 +186,10 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. void VLIWMachineScheduler::schedule() { - DEBUG(dbgs() - << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() - << " " << BB->getName() - << " in_func " << BB->getParent()->getFunction()->getName() - << " at loop depth " << MLI->getLoopDepth(BB) - << " \n"); + DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " + << printMBBReference(*BB) << " " << BB->getName() << " in_func " + << BB->getParent()->getName() << " at loop depth " + << MLI->getLoopDepth(BB) << " \n"); buildDAGWithRegPressure(); @@ -237,8 +235,8 @@ void VLIWMachineScheduler::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = begin()->getParent()->getNumber(); - dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dbgs() << "*** Final schedule for " + << printMBBReference(*begin()->getParent()) << " ***\n"; dumpSchedule(); dbgs() << '\n'; }); diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td new file mode 100644 index 000000000000..718d3ac7d45a --- /dev/null +++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td @@ -0,0 +1,86 @@ +//===--- HexagonMapAsm2IntrinV65.gen.td -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2), (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65T]>; +def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vdd0), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_V6_vdd0_128B), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index cf7a5fff1496..ffa447cc1311 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -129,9 +129,9 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic // before the callsite of this function // But we can not as it comes in the following fashion. - // %D0 = Hexagon_S2_lsr_r_p %D0, %R2 - // %R0 = KILL %R0, %D0 - // %P0 = CMPEQri %R0, 0 + // %d0 = Hexagon_S2_lsr_r_p killed %d0, killed %r2 + // %r0 = KILL %r0, implicit killed %d0 + // %p0 = CMPEQri killed %r0, 0 // Hence, we need to check if it's a KILL instruction. if (II->getOpcode() == TargetOpcode::KILL) return false; @@ -139,6 +139,9 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, if (II->isImplicitDef()) return false; + if (QII->isSolo(*II)) + return false; + // Make sure there there is no 'def' or 'use' of any of the uses of // feeder insn between it's definition, this MI and jump, jmpInst // skipping compare, cmpInst. @@ -193,9 +196,9 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // to new value jump. If they are in the path, bail out. // KILL sets kill flag on the opcode. It also sets up a // single register, out of pair. - // %D0 = S2_lsr_r_p %D0, %R2 - // %R0 = KILL %R0, %D0 - // %P0 = C2_cmpeqi %R0, 0 + // %d0 = S2_lsr_r_p killed %d0, killed %r2 + // %r0 = KILL %r0, implicit killed %d0 + // %p0 = C2_cmpeqi killed %r0, 0 // PHI can be anything after RA. // COPY can remateriaze things in between feeder, compare and nvj. if (MII->getOpcode() == TargetOpcode::KILL || @@ -431,7 +434,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // If we move NewValueJump before register allocation we'll need live variable diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index f42b6ed99357..4738a4d32409 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -461,7 +461,7 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr AddAslUN, DEBUG(dbgs() << "[InstrNode]: " << Print>(UseIA, *DFG) << "\n"); MachineInstr *UseMI = UseIA.Addr->getCode(); - DEBUG(dbgs() << "[MI getParent()->getNumber() + DEBUG(dbgs() << "[MI <" << printMBBReference(*UseMI->getParent()) << ">]: " << *UseMI << "\n"); const MCInstrDesc &UseMID = UseMI->getDesc(); assert(HII->getAddrMode(*UseMI) == HexagonII::BaseImmOffset); @@ -570,7 +570,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { NodeAddr OwnerN = UseN.Addr->getOwner(*DFG); MachineInstr *UseMI = OwnerN.Addr->getCode(); - DEBUG(dbgs() << "\t\t[MI getParent()->getNumber() + DEBUG(dbgs() << "\t\t[MI <" << printMBBReference(*UseMI->getParent()) << ">]: " << *UseMI << "\n"); int UseMOnum = -1; @@ -595,7 +595,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { } bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool Changed = false; diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 00ce6916fbdf..bf1b55b7b891 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -19,10 +19,10 @@ // (8) Shift/permute // (9) Arithmetic/bitwise // (10) Bit -// (11) Load -// (12) Store -// (13) Memop -// (14) PIC +// (11) PIC +// (12) Load +// (13) Store +// (14) Memop // (15) Call // (16) Branch // (17) Misc @@ -88,15 +88,17 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; +def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; +def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; +def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; + def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; -def HVI64: PatLeaf<(VecI64 HvxVR:$R)>; def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; -def HWI64: PatLeaf<(VecPI64 HvxWR:$R)>; // Pattern fragments to extract the low and high subregisters from a // 64-bit value. @@ -340,6 +342,8 @@ def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>; def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; +// The HVX load patterns also match CP directly. Make sure that if +// the selection of this opcode changes, it's updated in all places. def: Pat<(i1 0), (PS_false)>; def: Pat<(i1 1), (PS_true)>; @@ -464,7 +468,7 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)), // S2_vtruneh def: Pat<(v2i16 (trunc V2I32:$Rs)), - (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + (A2_combine_ll (HiReg $Rs), (LoReg $Rs))>; // --(4) Logical --------------------------------------------------------- @@ -818,11 +822,9 @@ let Predicates = [HasV60T,UseHVX] in { def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; - def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; def: HvxSel_pat; - def: HvxSel_pat; } // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). @@ -890,40 +892,34 @@ let AddedComplexity = 100, Predicates = [HasV5T] in { def SDTHexagonINSERT: SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; - def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; + def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; +} +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off), + (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off), + (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>; def SDTHexagonEXTRACTU : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP - : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; -def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), - (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; -def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), - (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; -def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt), - (S2_extractu_rp I32:$Rs, I64:$Rt)>; -def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt), - (S2_extractup_rp I64:$Rs, I64:$Rt)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), + (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; + def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), + (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; +} +def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off), + (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>; +def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off), + (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>; def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; @@ -936,6 +932,11 @@ def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), (A2_combineii imm:$s8, imm:$s8)>; def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; +let AddedComplexity = 10 in +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, + Requires<[HasV62T]>; +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), + (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; // --(8) Shift/permute --------------------------------------------------- // @@ -946,14 +947,11 @@ def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisSubVecOfVec<1, 0>]>; def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; -def HexagonPACKHL: SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; -def: OpR_RR_pat, i64, I32>; - def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; // The complexity of the combines involving immediates should be greater @@ -974,14 +972,6 @@ let AddedComplexity = 75 in { (A2_combineii imm:$s8, imm:$S8)>; } -let Predicates = [UseHVX] in { - def: OpR_RR_pat, VecPI32, HVI32>; - def: OpR_RR_pat, VecI8, HVI8>; - def: OpR_RR_pat, VecI8, HVI8>; - def: OpR_RR_pat, VecI16, HVI16>; - def: OpR_RR_pat, VecI16, HVI16>; -} - def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)), (A2_swiz (HiReg $Rss)))>; @@ -1284,6 +1274,56 @@ def: AccRRI_pat, I32, s32_0ImmPred>; def: AccRRI_pat, I32, s32_0ImmPred>; def: AccRRR_pat, I32, I32>; +// Mulh for vectors +// +def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), + (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), + (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>; + +def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)), + (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)), + (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>; + +def Mulhub: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))), + (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>; + +// Equivalent of byte-wise arithmetic shift right by 7 in v8i8. +def Asr7: + OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>; + +def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), + (Mulhub $Rss, $Rtt)>; + +def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), + (A2_vsubub + (Mulhub $Rss, $Rtt), + (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)), + (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>; + +def Mpysh: + OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>; +def Mpyshh: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>; +def Mpyshl: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>; + +def Mulhsh: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)), + (LoReg (Mpyshh $Rss, $Rtt))), + (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)), + (LoReg (Mpyshl $Rss, $Rtt))))>; + +def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>; + +def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)), + (A2_vaddh + (Mulhsh $Rss, $Rtt), + (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)), + (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>; + def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), (M2_mpysin IntRegs:$Rs, imm:$u8)>; @@ -1633,7 +1673,31 @@ let AddedComplexity = 100 in { (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; } -// --(11) Load ----------------------------------------------------------- +// --(11) PIC ------------------------------------------------------------ +// + +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; + +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; + +// The HVX load patterns also match AT_PCREL directly. Make sure that +// if the selection of this opcode changes, it's updated in all places. + + +// --(12) Load ----------------------------------------------------------- // def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ @@ -1974,6 +2038,12 @@ multiclass HvxLd_pat { def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; + // The HVX selection code for shuffles can generate vector constants. + // Calling "Select" on the resulting loads from CP fails without these + // patterns. + def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>; + def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))), + (MI (C4_addipc imm:$A), 0)>; } @@ -1982,7 +2052,6 @@ let Predicates = [UseHVX] in { defm: HvxLd_pat; defm: HvxLd_pat; defm: HvxLd_pat; - defm: HvxLd_pat; } defm: HvxLdVs_pat; defm: HvxLdVs_pat; @@ -1992,7 +2061,6 @@ let Predicates = [UseHVX] in { defm: HvxLd_pat; defm: HvxLd_pat; defm: HvxLd_pat; - defm: HvxLd_pat; } defm: HvxLdWs_pat; defm: HvxLdWs_pat; @@ -2000,7 +2068,7 @@ let Predicates = [UseHVX] in { } -// --(12) Store ---------------------------------------------------------- +// --(13) Store ---------------------------------------------------------- // @@ -2115,9 +2183,13 @@ class Stoream_pat +class AtomSt : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, - F.OperandTransform>; + F.OperandTransform> { + let IsAtomic = F.IsAtomic; + let MemoryVT = F.MemoryVT; +} + def IMM_BYTE : SDNodeXForm; def: Storea_pat; def: Storea_pat; - def: Storea_pat, I32, addrgp, S2_storerbgp>; - def: Storea_pat, I32, addrgp, S2_storerhgp>; - def: Storea_pat, I32, addrgp, S2_storerigp>; - def: Storea_pat, I64, addrgp, S2_storerdgp>; + def: Storea_pat, I32, addrgp, S2_storerbgp>; + def: Storea_pat, I32, addrgp, S2_storerhgp>; + def: Storea_pat, I32, addrgp, S2_storerigp>; + def: Storea_pat, I64, addrgp, S2_storerdgp>; def: Stoream_pat; def: Stoream_pat; @@ -2253,10 +2325,10 @@ let AddedComplexity = 110 in { def: Storea_pat; def: Storea_pat; def: Storea_pat; - def: Storea_pat, I32, anyimm0, PS_storerbabs>; - def: Storea_pat, I32, anyimm1, PS_storerhabs>; - def: Storea_pat, I32, anyimm2, PS_storeriabs>; - def: Storea_pat, I64, anyimm3, PS_storerdabs>; + def: Storea_pat, I32, anyimm0, PS_storerbabs>; + def: Storea_pat, I32, anyimm1, PS_storerhabs>; + def: Storea_pat, I32, anyimm2, PS_storeriabs>; + def: Storea_pat, I64, anyimm3, PS_storerdabs>; def: Stoream_pat; def: Stoream_pat; @@ -2386,10 +2458,10 @@ let AddedComplexity = 40 in { defm: Storexim_pat; defm: Storexim_pat; - defm: Storexi_pat, I32, anyimm0, S2_storerb_io>; - defm: Storexi_pat, I32, anyimm1, S2_storerh_io>; - defm: Storexi_pat, I32, anyimm2, S2_storeri_io>; - defm: Storexi_pat, I64, anyimm3, S2_storerd_io>; + defm: Storexi_pat, I32, anyimm0, S2_storerb_io>; + defm: Storexi_pat, I32, anyimm1, S2_storerh_io>; + defm: Storexi_pat, I32, anyimm2, S2_storeri_io>; + defm: Storexi_pat, I64, anyimm3, S2_storerd_io>; } // Reg+Reg @@ -2430,10 +2502,10 @@ let AddedComplexity = 10 in { def: Storexim_base_pat; def: Storexim_base_pat; - def: Storexi_base_pat, I32, S2_storerb_io>; - def: Storexi_base_pat, I32, S2_storerh_io>; - def: Storexi_base_pat, I32, S2_storeri_io>; - def: Storexi_base_pat, I64, S2_storerd_io>; + def: Storexi_base_pat, I32, S2_storerb_io>; + def: Storexi_base_pat, I32, S2_storerh_io>; + def: Storexi_base_pat, I32, S2_storeri_io>; + def: Storexi_base_pat, I64, S2_storerd_io>; } // HVX stores @@ -2451,7 +2523,6 @@ let Predicates = [UseHVX] in { defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; - defm: HvxSt_pat; } defm: HvxStVs_pat; defm: HvxStVs_pat; @@ -2461,7 +2532,6 @@ let Predicates = [UseHVX] in { defm: HvxSt_pat; defm: HvxSt_pat; defm: HvxSt_pat; - defm: HvxSt_pat; } defm: HvxStWs_pat; defm: HvxStWs_pat; @@ -2469,7 +2539,7 @@ let Predicates = [UseHVX] in { } -// --(13) Memop ---------------------------------------------------------- +// --(14) Memop ---------------------------------------------------------- // def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ @@ -2747,27 +2817,6 @@ let AddedComplexity = 220 in { } -// --(14) PIC ------------------------------------------------------------ -// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// AT_GOT address-of-GOT, address-of-global, offset-in-global -def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; - -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; - - // --(15) Call ----------------------------------------------------------- // @@ -2897,3 +2946,101 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, [SDNPHasChain]>; def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; + + +def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>; + +def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; +def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; + +def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; +def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; + +def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt), + (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>; + +def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; +def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; + +let Predicates = [UseHVX] in { + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI16, HVI16>; + def: OpR_RR_pat, VecI16, HVI16>; +} + +def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; +def vzero: PatFrag<(ops), (HexagonVZERO)>; + +def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; +def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; +def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; +def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; + +let Predicates = [UseHVX] in { + def: Pat<(VecI8 vzero), (V6_vd0)>; + def: Pat<(VecI16 vzero), (V6_vd0)>; + def: Pat<(VecI32 vzero), (V6_vd0)>; + + def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + + def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + + def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + + def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(sub HVI8:$Vs, HVI8:$Vt), (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; + def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; + def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; + def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; + + def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; + def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; + def: Pat<(VecI32 (sext_invec HVI8:$Vs)), + (LoVec (VSxth (LoVec (VSxtb $Vs))))>; + + def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; + def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; + def: Pat<(VecI32 (zext_invec HVI8:$Vs)), + (LoVec (VZxth (LoVec (VZxtb $Vs))))>; + + def: Pat<(VecI8 (trunc HWI16:$Vss)), + (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; + def: Pat<(VecI16 (trunc HWI32:$Vss)), + (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; +} diff --git a/lib/Target/Hexagon/HexagonPatternsV65.td b/lib/Target/Hexagon/HexagonPatternsV65.td new file mode 100644 index 000000000000..50b76847b563 --- /dev/null +++ b/lib/Target/Hexagon/HexagonPatternsV65.td @@ -0,0 +1,70 @@ +//==- HexagonPatternsV65.td -------------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +multiclass vgathermh { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, IntRegs:$Rt, + ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +multiclass vgathermw { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, IntRegs:$Rt, + ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +multiclass vgathermhw { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, IntRegs:$Rt, + ModRegs:$Mu, RC:$Vv), + ".error \"should not emit\" ", + []>; +} + +defm V6_vgathermh_pseudo : vgathermh; +defm V6_vgathermw_pseudo : vgathermw; +defm V6_vgathermhw_pseudo : vgathermhw; + +multiclass vgathermhq { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, + ModRegs:$Mu, RC1:$Vv), + ".error \"should not emit\" ", + []>; +} + +multiclass vgathermwq { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, + ModRegs:$Mu, RC1:$Vv), + ".error \"should not emit\" ", + []>; +} + +multiclass vgathermhwq { + let isCodeGenOnly = 1, isPseudo = 1, mayLoad = 1, mayStore = 1 in + def NAME : CVI_GATHER_TMP_LD_Resource_NoOpcode<(outs ), + (ins IntRegs:$_dst_, RC2:$Vq, IntRegs:$Rt, + ModRegs:$Mu, RC1:$Vv), + ".error \"should not emit\" ", + []>; +} + +defm V6_vgathermhq_pseudo : vgathermhq; +defm V6_vgathermwq_pseudo : vgathermwq; +defm V6_vgathermhwq_pseudo : vgathermhwq; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index d794f83aaa49..3c588a89b0da 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -8,31 +8,30 @@ // This peephole pass optimizes in the following cases. // 1. Optimizes redundant sign extends for the following case // Transform the following pattern -// %vreg170 = SXTW %vreg166 +// %170 = SXTW %166 // ... -// %vreg176 = COPY %vreg170:isub_lo +// %176 = COPY %170:isub_lo // // Into -// %vreg176 = COPY vreg166 +// %176 = COPY %166 // // 2. Optimizes redundant negation of predicates. -// %vreg15 = CMPGTrr %vreg6, %vreg2 +// %15 = CMPGTrr %6, %2 // ... -// %vreg16 = NOT_p %vreg15 +// %16 = NOT_p killed %15 // ... -// JMP_c %vreg16, , %PC +// JMP_c killed %16, <%bb.1>, implicit dead %pc // // Into -// %vreg15 = CMPGTrr %vreg6, %vreg2; +// %15 = CMPGTrr %6, %2; // ... -// JMP_cNot %vreg15, , %PC; +// JMP_cNot killed %15, <%bb.1>, implicit dead %pc; // // Note: The peephole pass makes the instrucstions like -// %vreg170 = SXTW %vreg166 or %vreg16 = NOT_p %vreg15 +// %170 = SXTW %166 or %16 = NOT_p killed %15 // redundant and relies on some form of dead removal instructions, like // DCE or DIE to actually eliminate them. - //===----------------------------------------------------------------------===// #include "Hexagon.h" @@ -109,7 +108,7 @@ INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole", false, false) bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; QII = static_cast(MF.getSubtarget().getInstrInfo()); @@ -133,7 +132,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { NextI = std::next(I); MachineInstr &MI = *I; // Look for sign extends: - // %vreg170 = SXTW %vreg166 + // %170 = SXTW %166 if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); @@ -144,14 +143,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: - // %vreg170 = SXTW %vreg166 - // PeepholeMap[170] = vreg166 + // %170 = SXTW %166 + // PeepholeMap[170] = %166 PeepholeMap[DstReg] = SrcReg; } } - // Look for %vreg170 = COMBINE_ir_V4 (0, %vreg169) - // %vreg170:DoublRegs, %vreg169:IntRegs + // Look for %170 = COMBINE_ir_V4 (0, %169) + // %170:DoublRegs, %169:IntRegs if (!DisableOptExtTo64 && MI.getOpcode() == Hexagon::A4_combineir) { assert(MI.getNumOperands() == 3); MachineOperand &Dst = MI.getOperand(0); @@ -165,10 +164,10 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { } // Look for this sequence below - // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32 - // %vregIntReg = COPY %vregDoubleReg1:isub_lo. + // %DoubleReg1 = LSRd_ri %DoubleReg0, 32 + // %IntReg = COPY %DoubleReg1:isub_lo. // and convert into - // %vregIntReg = COPY %vregDoubleReg0:isub_hi. + // %IntReg = COPY %DoubleReg0:isub_hi. if (MI.getOpcode() == Hexagon::S2_lsr_i_p) { assert(MI.getNumOperands() == 3); MachineOperand &Dst = MI.getOperand(0); @@ -193,14 +192,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) { if (TargetRegisterInfo::isVirtualRegister(DstReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { // Map the following: - // %vreg170 = NOT_xx %vreg166 - // PeepholeMap[170] = vreg166 + // %170 = NOT_xx %166 + // PeepholeMap[170] = %166 PeepholeMap[DstReg] = SrcReg; } } // Look for copy: - // %vreg176 = COPY %vreg170:isub_lo + // %176 = COPY %170:isub_lo if (!DisableOptSZExt && MI.isCopy()) { assert(MI.getNumOperands() == 2); MachineOperand &Dst = MI.getOperand(0); diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp index c73a2304e07d..413bc8edf2b6 100644 --- a/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -280,7 +280,7 @@ bool HexagonDCE::rewrite(NodeAddr IA, SetVector &Remove) { } bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; if (RDFLimit.getPosition()) { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index f29f321214c5..85d6a6b4089e 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -123,6 +123,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { case Hexagon::ArchEnum::V55: case Hexagon::ArchEnum::V60: case Hexagon::ArchEnum::V62: + case Hexagon::ArchEnum::V65: return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3; } @@ -143,6 +144,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::R29); Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); + Reserved.set(Hexagon::VTMP); // Control registers. Reserved.set(Hexagon::SA0); // C0 Reserved.set(Hexagon::LC0); // C1 diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index afd63c691014..1d1e85e7ac7e 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -15,7 +15,6 @@ let Namespace = "Hexagon" in { class HexagonReg num, string n, list alt = [], list alias = []> : Register { - field bits<5> Num; let Aliases = alias; let HWEncoding{4-0} = num; } @@ -23,8 +22,6 @@ let Namespace = "Hexagon" in { class HexagonDoubleReg num, string n, list subregs, list alt = []> : RegisterWithSubRegs { - field bits<5> Num; - let AltNames = alt; let HWEncoding{4-0} = num; } @@ -32,28 +29,20 @@ let Namespace = "Hexagon" in { // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers. class Ri num, string n, list alt = []> : - HexagonReg { - let Num = num; - } + HexagonReg; // Rf - 32-bit floating-point registers. - class Rf num, string n> : HexagonReg { - let Num = num; - } - + class Rf num, string n> : HexagonReg; // Rd - 64-bit registers. class Rd num, string n, list subregs, list alt = []> : HexagonDoubleReg { - let Num = num; let SubRegs = subregs; } // Rp - predicate registers - class Rp num, string n> : HexagonReg { - let Num = num; - } + class Rp num, string n> : HexagonReg; // Rq - vector predicate registers @@ -64,21 +53,18 @@ let Namespace = "Hexagon" in { // Rc - control registers class Rc num, string n, list alt = [], list alias = []> : - HexagonReg { - let Num = num; - } + HexagonReg; // Rcc - 64-bit control registers. class Rcc num, string n, list subregs, list alt = []> : HexagonDoubleReg { - let Num = num; let SubRegs = subregs; } // Mx - address modifier registers - class Mx num, string n> : HexagonReg<{0b0000, num}, n> { - let Num = !cast>(num); + class Mx num, string n> : Register { + let HWEncoding{0} = num; } def isub_lo : SubRegIndex<32>; @@ -167,25 +153,27 @@ let Namespace = "Hexagon" in { // Control registers pairs. let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { - def C1_0: Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; - def C3_2: Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; - def C5_4: Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; - def C7_6: Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>; + def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; + def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; + def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; + def C7_6 : Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>; // Use C8 instead of USR as a subregister of C9_8. - def C9_8: Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; - def C11_10: Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; - def CS: Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; - def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI]>, DwarfRegNum<[80]>; - def C17_16: Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>; - def PKTCOUNT: Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>, + def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; + def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; + def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>; + def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI], ["upcycle"]>, + DwarfRegNum<[80]>; + def C17_16 : Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>; + def PKTCOUNT : Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>, DwarfRegNum<[85]>; - def UTIMER: Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>, + def UTIMER : Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>, DwarfRegNum<[97]>; } foreach i = 0-31 in { def V#i : Ri, DwarfRegNum<[!add(i, 99)]>; } + def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>; // Aliases of the V* registers used to hold double vec values. let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in { @@ -228,9 +216,6 @@ def VecI16 def VecI32 : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], [v16i32, v16i32, v32i32, v32i32, v16i32]>; -def VecI64 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v8i64, v8i64, v16i64, v16i64, v8i64]>; def VecPI8 : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], [v128i8, v128i8, v256i8, v256i8, v128i8]>; @@ -240,16 +225,24 @@ def VecPI16 def VecPI32 : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], [v32i32, v32i32, v64i32, v64i32, v32i32]>; -def VecPI64 +def VecQ8 : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v16i64, v16i64, v32i64, v32i64, v16i64]>; + [v64i1, v64i1, v128i1, v128i1, v64i1]>; +def VecQ16 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v32i1, v32i1, v64i1, v64i1, v32i1]>; +def VecQ32 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v16i1, v16i1, v32i1, v32i1, v16i1]>; + +// HVX register classes // Register classes. // // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)>; @@ -261,25 +254,27 @@ def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32, def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, (add R7, R6, R5, R4, R3, R2, R1, R0)> ; -def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, +def DoubleRegs : RegisterClass<"Hexagon", + [i64, f64, v64i1, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64, (add D11, D10, D9, D8, D3, D2, D1, D0)>; -def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecI64], 512, - (add (sequence "V%u", 0, 31))> { +def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32], 512, + (add (sequence "V%u", 0, 31), VTMP)> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>; } -def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPI64], 1024, +def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32], 1024, (add (sequence "W%u", 0, 15))> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<1024,1024,1024>, RegInfo<2048,2048,2048>, RegInfo<1024,1024,1024>]>; } -def HvxQR : RegisterClass<"Hexagon", [VecI1], 512, (add Q0, Q1, Q2, Q3)> { +def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 512, + (add Q0, Q1, Q2, Q3)> { let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>; } @@ -313,6 +308,11 @@ def V62Regs : RegisterClass<"Hexagon", [i32], 32, (add FRAMELIMIT, FRAMEKEY, C17_16, PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT, UTIMERLO, UTIMERHI, UTIMER)>; +// These registers are new for v65 and onward. +let Size = 32, isAllocatable = 0 in +def V65Regs : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>; + + def HexagonCSR : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index ffee03e72639..a1dfb66017a5 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -79,3 +79,8 @@ include "HexagonScheduleV60.td" include "HexagonScheduleV62.td" +//===----------------------------------------------------------------------===// +// V65 Machine Info + +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV65.td" diff --git a/lib/Target/Hexagon/HexagonScheduleV65.td b/lib/Target/Hexagon/HexagonScheduleV65.td new file mode 100644 index 000000000000..e3b1313923f5 --- /dev/null +++ b/lib/Target/Hexagon/HexagonScheduleV65.td @@ -0,0 +1,40 @@ +//=-HexagonScheduleV65.td - HexagonV65 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// ScalarItin and HVXItin contain some old itineraries +// still used by a handful of instructions. Hopefully, we will be able +// to get rid of them soon. + +def HexagonV65ItinList : DepScalarItinV65, ScalarItin, + DepHVXItinV65, HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV65_list, ScalarItin_list, + DepHVXItinV65_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV65 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM], + [Hex_FWD, HVX_FWD], + HexagonV65ItinList.ItinList>; + +def HexagonModelV65 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV65; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V65 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 75d6750322b0..c9f5400018e8 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -536,7 +536,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, Rs.insert(CmpR2); DEBUG({ - dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: "; + dbgs() << "For loop at " << printMBBReference(*HB) << " ind regs: "; dump_partition(dbgs(), Rs, *TRI); dbgs() << '\n'; }); @@ -1163,7 +1163,7 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Splitting double registers in function: " << MF.getName() << '\n'); - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; auto &ST = MF.getSubtarget(); diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index d1816cbc7528..300f6de33552 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -9,10 +9,10 @@ // Replace sequences of "narrow" stores to adjacent memory locations with // a fewer "wide" stores that have the same effect. // For example, replace: -// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte -// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte +// S4_storeirb_io %100, 0, 0 ; store-immediate-byte +// S4_storeirb_io %100, 1, 0 ; store-immediate-byte // with -// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword +// S4_storeirh_io %100, 0, 0 ; store-immediate-halfword // The above is the general idea. The actual cases handled by the code // may be a bit more complex. // The purpose of this pass is to reduce the number of outstanding stores, @@ -585,7 +585,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { } bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) { - if (skipFunction(*MFn.getFunction())) + if (skipFunction(MFn.getFunction())) return false; MF = &MFn; diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 7ec4c34504bd..6f1f6c46a107 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -92,8 +92,8 @@ static cl::opt EnableCheckBankConflict("hexagon-check-bank-conflict", HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) - : HexagonGenSubtargetInfo(TT, CPU, FS), - CPUString(Hexagon_MC::selectHexagonCPU(TT, CPU)), + : HexagonGenSubtargetInfo(TT, CPU, FS), OptLevel(TM.getOptLevel()), + CPUString(Hexagon_MC::selectHexagonCPU(CPU)), InstrInfo(initializeSubtargetDependencies(CPU, FS)), RegInfo(getHwMode()), TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) { @@ -110,6 +110,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { {"hexagonv55", Hexagon::ArchEnum::V55}, {"hexagonv60", Hexagon::ArchEnum::V60}, {"hexagonv62", Hexagon::ArchEnum::V62}, + {"hexagonv65", Hexagon::ArchEnum::V65}, }; auto FoundIt = CpuTable.find(CPUString); @@ -131,6 +132,11 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; + FeatureBitset Features = getFeatureBits(); + if (HexagonDisableDuplex) + setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); + setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); + return *this; } @@ -220,29 +226,29 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); // Prevent redundant register copies between two calls, which are caused by - // both the return value and the argument for the next call being in %R0. + // both the return value and the argument for the next call being in %r0. // Example: // 1: - // 2: %VregX = COPY %R0 - // 3: - // 4: %R0 = ... + // 2: %vreg = COPY %r0 + // 3: + // 4: %r0 = ... // 5: // The scheduler would often swap 3 and 4, so an additional register is // needed. This code inserts a Barrier dependence between 3 & 4 to prevent - // this. The same applies for %D0 and %V0/%W0, which are also handled. + // this. The same applies for %d0 and %v0/%w0, which are also handled. else if (SchedRetvalOptimization) { const MachineInstr *MI = DAG->SUnits[su].getInstr(); if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || MI->readsRegister(Hexagon::V0, &TRI))) { - // %vregX = COPY %R0 + // %vreg = COPY %r0 VRegHoldingRet = MI->getOperand(0).getReg(); RetRegister = MI->getOperand(1).getReg(); LastUseOfRet = nullptr; } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) - // + // LastUseOfRet = &DAG->SUnits[su]; else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) - // %R0 = ... + // %r0 = ... DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); } } @@ -294,6 +300,14 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) { } } +/// \brief Enable use of alias analysis during code generation (during MI +/// scheduling, DAGCombine, etc.). +bool HexagonSubtarget::useAA() const { + if (OptLevel != CodeGenOpt::None) + return true; + return false; +} + /// \brief Perform target specific adjustments to the latency of a schedule /// dependency. void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 76892454d8a6..af93f20d97fc 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -50,9 +50,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool UseLongCalls; bool ModeIEEERndNear; + bool HasMemNoShuf = false; + bool EnableDuplex = false; public: Hexagon::ArchEnum HexagonArchVersion; Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4; + CodeGenOpt::Level OptLevel; /// True if the target should use Back-Skip-Back scheduling. This is the /// default for V60. bool UseBSBScheduling; @@ -137,11 +140,18 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool hasV62TOpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V62; } + bool hasV65TOps() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V65; + } + bool hasV65TOpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V65; + } bool modeIEEERndNear() const { return ModeIEEERndNear; } bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; } bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } + bool hasMemNoShuf() const { return HasMemNoShuf; } bool useLongCalls() const { return UseLongCalls; } bool usePredicatedCalls() const; @@ -177,6 +187,10 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { std::vector> &Mutations) const override; + /// \brief Enable use of alias analysis during code generation (during MI + /// scheduling, DAGCombine, etc.). + bool useAA() const override; + /// \brief Perform target specific adjustments to the latency of a schedule /// dependency. void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; @@ -190,14 +204,38 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { llvm_unreachable("Invalid HVX vector length settings"); } - bool isHVXVectorType(MVT VecTy) const { + ArrayRef getHVXElementTypes() const { + static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 }; + return makeArrayRef(Types); + } + + bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const { if (!VecTy.isVector() || !useHVXOps()) return false; - unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits(); - if (ElemWidth < 8 || ElemWidth > 64) + MVT ElemTy = VecTy.getVectorElementType(); + if (!IncludeBool && ElemTy == MVT::i1) return false; + + unsigned HwLen = getVectorLength(); + unsigned NumElems = VecTy.getVectorNumElements(); + ArrayRef ElemTypes = getHVXElementTypes(); + + if (IncludeBool && ElemTy == MVT::i1) { + // Special case for the v512i1, etc. + if (8*HwLen == NumElems) + return true; + // Boolean HVX vector types are formed from regular HVX vector types + // by replacing the element type with i1. + for (MVT T : ElemTypes) + if (NumElems * T.getSizeInBits() == 8*HwLen) + return true; + return false; + } + unsigned VecWidth = VecTy.getSizeInBits(); - return VecWidth == 8*getVectorLength() || VecWidth == 16*getVectorLength(); + if (VecWidth != 8*HwLen && VecWidth != 16*HwLen) + return false; + return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; }); } unsigned getL1CacheLineSize() const; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 683fdea6122a..363b703fef28 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -146,6 +146,7 @@ namespace llvm { FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonEarlyIfConversion(); FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonGatherPacketize(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); FunctionPass *createHexagonGenMux(); @@ -257,10 +258,9 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) { }); } -TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(HexagonTTIImpl(this, F)); - }); +TargetTransformInfo +HexagonTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(HexagonTTIImpl(this, F)); } @@ -396,9 +396,15 @@ void HexagonPassConfig::addPreEmitPass() { // Generate MUX from pairs of conditional transfers. if (EnableGenMux) addPass(createHexagonGenMux()); + } + + // Create packets for 2 instructions that consitute a gather instruction. + // Do this regardless of the opt level. + addPass(createHexagonGatherPacketize(), false); + if (!NoOpt) addPass(createHexagonPacketizer(), false); - } + if (EnableVectorPrint) addPass(createHexagonVectorPrint(), false); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index acd41f920b53..a7c6a3437fbc 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -39,7 +39,7 @@ class HexagonTargetMachine : public LLVMTargetMachine { void adjustPassManager(PassManagerBuilder &PMB) override; TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; HexagonTargetObjectFile *getObjFileLowering() const override { return static_cast(TLOF.get()); diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index deb46f01c284..c2404235091c 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -57,6 +58,10 @@ static cl::opt DisablePacketizer("disable-packetizer", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon packetizer pass")); +cl::opt Slot1Store("slot1-store-slot0-load", cl::Hidden, + cl::ZeroOrMore, cl::init(true), + cl::desc("Allow slot1 store and slot0 load")); + static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", cl::ZeroOrMore, cl::Hidden, cl::init(true), cl::desc("Allow non-solo packetization of volatile memory references")); @@ -194,7 +199,7 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, } bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { - if (DisablePacketizer || skipFunction(*MF.getFunction())) + if (DisablePacketizer || skipFunction(MF.getFunction())) return false; HII = MF.getSubtarget().getInstrInfo(); @@ -772,8 +777,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, // If data definition is because of implicit definition of the register, // do not newify the store. Eg. - // %R9 = ZXTH %R12, %D6, %R12 - // S2_storerh_io %R8, 2, %R12; mem:ST2[%scevgep343] + // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12 + // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343] for (auto &MO : PacketMI.operands()) { if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) return false; @@ -787,8 +792,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, // Handle imp-use of super reg case. There is a target independent side // change that should prevent this situation but I am handling it for // just-in-case. For example, we cannot newify R2 in the following case: - // %R3 = A2_tfrsi 0; - // S2_storeri_io %R0, 0, %R2, %D1; + // %r3 = A2_tfrsi 0; + // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1; for (auto &MO : MI.operands()) { if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) return false; @@ -892,12 +897,12 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // Go through the packet instructions and search for an anti dependency between // them and DepReg from MI. Consider this case: // Trying to add -// a) %R1 = TFRI_cdNotPt %P3, 2 +// a) %r1 = TFRI_cdNotPt %p3, 2 // to this packet: // { -// b) %P0 = C2_or %P3, %P0 -// c) %P3 = C2_tfrrp %R23 -// d) %R1 = C2_cmovenewit %P3, 4 +// b) %p0 = C2_or killed %p3, killed %p0 +// c) %p3 = C2_tfrrp %r23 +// d) %r1 = C2_cmovenewit %p3, 4 // } // The P3 from a) and d) will be complements after // a)'s P3 is converted to .new form @@ -962,11 +967,11 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, // One corner case deals with the following scenario: // Trying to add - // a) %R24 = A2_tfrt %P0, %R25 + // a) %r24 = A2_tfrt %p0, %r25 // to this packet: // { - // b) %R25 = A2_tfrf %P0, %R24 - // c) %P0 = C2_cmpeqi %R26, 1 + // b) %r25 = A2_tfrf %p0, %r24 + // c) %p0 = C2_cmpeqi %r26, 1 // } // // On general check a) and b) are complements, but presence of c) will @@ -1050,6 +1055,10 @@ bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI, } bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { + // Ensure any bundles created by gather packetize remain seperate. + if (MI.isBundle()) + return true; + if (MI.isEHLabel() || MI.isCFIInstruction()) return true; @@ -1099,11 +1108,12 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, MJ.isCall() || MJ.isTerminator(); switch (MI.getOpcode()) { - case (Hexagon::S2_storew_locked): - case (Hexagon::S4_stored_locked): - case (Hexagon::L2_loadw_locked): - case (Hexagon::L4_loadd_locked): - case (Hexagon::Y4_l2fetch): { + case Hexagon::S2_storew_locked: + case Hexagon::S4_stored_locked: + case Hexagon::L2_loadw_locked: + case Hexagon::L4_loadd_locked: + case Hexagon::Y4_l2fetch: + case Hexagon::Y5_l2fetch: { // These instructions can only be grouped with ALU32 or non-floating-point // XTYPE instructions. Since there is no convenient way of identifying fp // XTYPE instructions, only allow grouping with ALU32 for now. @@ -1166,6 +1176,8 @@ static bool isSystemInstr(const MachineInstr &MI) { switch (Opc) { case Hexagon::Y2_barrier: case Hexagon::Y2_dcfetchbo: + case Hexagon::Y4_l2fetch: + case Hexagon::Y5_l2fetch: return true; } return false; @@ -1496,19 +1508,33 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // J is first, I is second. bool LoadJ = J.mayLoad(), StoreJ = J.mayStore(); bool LoadI = I.mayLoad(), StoreI = I.mayStore(); - if (StoreJ) { - // Two stores are only allowed on V4+. Load following store is never - // allowed. - if (LoadI && alias(J, I)) { + bool NVStoreJ = HII->isNewValueStore(J); + bool NVStoreI = HII->isNewValueStore(I); + bool IsVecJ = HII->isHVXVec(J); + bool IsVecI = HII->isHVXVec(I); + + if (Slot1Store && MF.getSubtarget().hasV65TOps() && + ((LoadJ && StoreI && !NVStoreI) || + (StoreJ && LoadI && !NVStoreJ)) && + (J.getOpcode() != Hexagon::S2_allocframe && + I.getOpcode() != Hexagon::S2_allocframe) && + (J.getOpcode() != Hexagon::L2_deallocframe && + I.getOpcode() != Hexagon::L2_deallocframe) && + (!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI)) + setmemShufDisabled(true); + else + if (StoreJ && LoadI && alias(J, I)) { + FoundSequentialDependence = true; + break; + } + + if (!StoreJ) + if (!LoadJ || (!LoadI && !StoreI)) { + // If J is neither load nor store, assume a dependency. + // If J is a load, but I is neither, also assume a dependency. FoundSequentialDependence = true; break; } - } else if (!LoadJ || (!LoadI && !StoreI)) { - // If J is neither load nor store, assume a dependency. - // If J is a load, but I is neither, also assume a dependency. - FoundSequentialDependence = true; - break; - } // Store followed by store: not OK on V2. // Store followed by load: not OK on all. // Load followed by store: OK on all. @@ -1543,7 +1569,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // There are certain anti-dependencies that cannot be ignored. // Specifically: - // J2_call ... %R0 ; SUJ + // J2_call ... implicit-def %r0 ; SUJ // R0 = ... ; SUI // Those cannot be packetized together, since the call will observe // the effect of the assignment to R0. @@ -1628,6 +1654,26 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { return false; } + +bool HexagonPacketizerList::foundLSInPacket() { + bool FoundLoad = false; + bool FoundStore = false; + + for (auto MJ : CurrentPacketMIs) { + unsigned Opc = MJ->getOpcode(); + if (Opc == Hexagon::S2_allocframe || Opc == Hexagon::L2_deallocframe) + continue; + if (HII->isMemOp(*MJ)) + continue; + if (MJ->mayLoad()) + FoundLoad = true; + if (MJ->mayStore() && !HII->isNewValueStore(*MJ)) + FoundStore = true; + } + return FoundLoad && FoundStore; +} + + MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); @@ -1703,8 +1749,31 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { + // Replace VLIWPacketizerList::endPacket(MBB, MI). + + bool memShufDisabled = getmemShufDisabled(); + if (memShufDisabled && !foundLSInPacket()) { + setmemShufDisabled(false); + DEBUG(dbgs() << " Not added to NoShufPacket\n"); + } + memShufDisabled = getmemShufDisabled(); + + if (CurrentPacketMIs.size() > 1) { + MachineBasicBlock::instr_iterator FirstMI(CurrentPacketMIs.front()); + MachineBasicBlock::instr_iterator LastMI(MI.getInstrIterator()); + finalizeBundle(*MBB, FirstMI, LastMI); + + auto BundleMII = std::prev(FirstMI); + if (memShufDisabled) + HII->setBundleNoShuf(BundleMII); + + setmemShufDisabled(false); + } OldPacketMIs = CurrentPacketMIs; - VLIWPacketizerList::endPacket(MBB, MI); + CurrentPacketMIs.clear(); + + ResourceTracker->clearResources(); + DEBUG(dbgs() << "End packet\n"); } bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index cbdd2367429d..764d9ae9059a 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -49,6 +49,8 @@ class HexagonPacketizerList : public VLIWPacketizerList { // schedule this instruction. bool FoundSequentialDependence; + bool MemShufDisabled = false; + // Track MIs with ignored dependence. std::vector IgnoreDepMIs; @@ -89,6 +91,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { // and SUJ. bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; + bool foundLSInPacket(); MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override; void endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) override; @@ -97,6 +100,12 @@ class HexagonPacketizerList : public VLIWPacketizerList { void unpacketizeSoloInstrs(MachineFunction &MF); protected: + bool getmemShufDisabled() { + return MemShufDisabled; + }; + void setmemShufDisabled(bool val) { + MemShufDisabled = val; + }; bool isCallDependent(const MachineInstr &MI, SDep::Kind DepType, unsigned DepReg); bool promoteToDotCur(MachineInstr &MI, SDep::Kind DepType, diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index 52e5dcd46388..39395dbd3aec 100644 --- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -548,13 +548,13 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() { findValueToReuse(); if (ReuseCandidate.isDefined()) { reuseValue(); - Changed = true; - Continue = true; - } - llvm::for_each(Dependences, std::default_delete()); - } while (Continue); - return Changed; -} + Changed = true; + Continue = true; + } + llvm::for_each(Dependences, std::default_delete()); + } while (Continue); + return Changed; +} void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, DepChain &D) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 31da9fa06d00..fe54c19370b3 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -655,7 +655,8 @@ class HexagonAsmBackend : public MCAsmBackend { assert(HexagonMCInstrInfo::isBundle(Inst) && "Hexagon relaxInstruction only works on bundles"); - Res = HexagonMCInstrInfo::createBundle(); + Res.setOpcode(Hexagon::BUNDLE); + Res.addOperand(MCOperand::createImm(Inst.getOperand(0).getImm())); // Copy the results into the bundle. bool Update = false; for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) { @@ -764,11 +765,12 @@ class HexagonAsmBackend : public MCAsmBackend { // MCAsmBackend MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T, - MCRegisterInfo const & /*MRI*/, - const Triple &TT, StringRef CPU, - const MCTargetOptions &Options) { + const MCSubtargetInfo &STI, + MCRegisterInfo const & /*MRI*/, + const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - StringRef CPUString = Hexagon_MC::selectHexagonCPU(TT, CPU); + StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU()); return new HexagonAsmBackend(T, TT, OSABI, CPUString); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index d1a6d38797d7..f5a376033757 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -68,8 +68,8 @@ namespace HexagonII { SoloAXPos = 7, SoloAXMask = 0x1, // Only A-type instruction in first slot or nothing. - SoloAin1Pos = 8, - SoloAin1Mask = 0x1, + RestrictSlot1AOKPos = 8, + RestrictSlot1AOKMask = 0x1, // Predicated instructions. PredicatedPos = 9, @@ -122,6 +122,16 @@ namespace HexagonII { ExtentAlignPos = 33, ExtentAlignMask = 0x3, + CofMax1Pos = 35, + CofMax1Mask = 0x1, + CofRelax1Pos = 36, + CofRelax1Mask = 0x1, + CofRelax2Pos = 37, + CofRelax2Mask = 0x1, + + RestrictNoSlot1StorePos = 38, + RestrictNoSlot1StoreMask = 0x1, + // Addressing mode for load/store instructions. AddrModePos = 41, AddrModeMask = 0x7, @@ -152,8 +162,9 @@ namespace HexagonII { PrefersSlot3Pos = 56, PrefersSlot3Mask = 0x1, - CofMax1Pos = 60, - CofMax1Mask = 0x1, + // v65 + HasTmpDstPos = 59, + HasTmpDstMask = 0x1, CVINewPos = 61, CVINewMask = 0x1 diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 142070ad73b6..53f3cba052bc 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -179,35 +179,6 @@ void HexagonMCChecker::init(MCInst const &MCI) { } } - // Figure out register definitions that produce new values. - if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) { - unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); - - if (HexagonMCInstrInfo::isCompound(MCII, MCI)) - compoundRegisterMap(R); // Compound insns have a limited register range. - - for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); ++SRI) - if (!MCSubRegIterator(*SRI, &RI).isValid()) - // No super-registers defined indirectly. - NewDefs[*SRI].push_back(NewSense::Def( - PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); - - // For fairly unique 2-dot-new producers, example: - // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. - if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { - unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); - - bool HasSubRegs = MCSubRegIterator(R2, &RI).isValid(); - for (MCRegAliasIterator SRI(R2, &RI, !HasSubRegs); SRI.isValid(); ++SRI) - if (!MCSubRegIterator(*SRI, &RI).isValid()) - NewDefs[*SRI].push_back(NewSense::Def( - PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); - } - } - // Figure out definitions of new predicate registers. if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i) @@ -217,21 +188,6 @@ void HexagonMCChecker::init(MCInst const &MCI) { if (isPredicateRegister(P)) NewPreds.insert(P); } - - // Figure out uses of new values. - if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) { - unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg(); - - if (!MCSubRegIterator(N, &RI).isValid()) { - // Super-registers cannot use new values. - if (MCID.isBranch()) - NewUses[N] = NewSense::Jmp( - HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ); - else - NewUses[N] = NewSense::Use( - PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); - } - } } HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, @@ -242,13 +198,17 @@ HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, init(); } +HexagonMCChecker::HexagonMCChecker(HexagonMCChecker const &Other, + MCSubtargetInfo const &STI, + bool CopyReportErrors) + : Context(Other.Context), MCB(Other.MCB), RI(Other.RI), MCII(Other.MCII), + STI(STI), ReportErrors(CopyReportErrors ? Other.ReportErrors : false) {} + bool HexagonMCChecker::check(bool FullCheck) { - bool chkB = checkBranches(); bool chkP = checkPredicates(); bool chkNV = checkNewValues(); bool chkR = checkRegisters(); bool chkRRO = checkRegistersReadOnly(); - bool chkELB = checkEndloopBranches(); checkRegisterCurDefs(); bool chkS = checkSolo(); bool chkSh = true; @@ -258,30 +218,14 @@ bool HexagonMCChecker::check(bool FullCheck) { if (FullCheck) chkSl = checkSlots(); bool chkAXOK = checkAXOK(); - bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkELB && chkS && - chkSh && chkSl && chkAXOK; + bool chkCofMax1 = checkCOFMax1(); + bool chkHWLoop = checkHWLoop(); + bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl && + chkAXOK && chkCofMax1 && chkHWLoop; return chk; } -bool HexagonMCChecker::checkEndloopBranches() { - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { - MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); - if (Desc.isBranch() || Desc.isCall()) { - auto Inner = HexagonMCInstrInfo::isInnerLoop(MCB); - if (Inner || HexagonMCInstrInfo::isOuterLoop(MCB)) { - reportError(I.getLoc(), - Twine("packet marked with `:endloop") + - (Inner ? "0" : "1") + "' " + - "cannot contain instructions that modify register " + "`" + - Twine(RI.getName(Hexagon::PC)) + "'"); - return false; - } - } - } - return true; -} - static bool isDuplexAGroup(unsigned Opcode) { switch (Opcode) { case Hexagon::SA1_addi: @@ -355,6 +299,65 @@ bool HexagonMCChecker::checkAXOK() { return true; } +void HexagonMCChecker::reportBranchErrors() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) + reportNote(I.getLoc(), "Branching instruction"); + } +} + +bool HexagonMCChecker::checkHWLoop() { + if (!HexagonMCInstrInfo::isInnerLoop(MCB) && + !HexagonMCInstrInfo::isOuterLoop(MCB)) + return true; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) { + reportError(MCB.getLoc(), + "Branches cannot be in a packet with hardware loops"); + reportBranchErrors(); + return false; + } + } + return true; +} + +bool HexagonMCChecker::checkCOFMax1() { + SmallVector BranchLocations; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall() || Desc.isReturn()) + BranchLocations.push_back(&I); + } + for (unsigned J = 0, N = BranchLocations.size(); J < N; ++J) { + MCInst const &I = *BranchLocations[J]; + if (HexagonMCInstrInfo::isCofMax1(MCII, I)) { + bool Relax1 = HexagonMCInstrInfo::isCofRelax1(MCII, I); + bool Relax2 = HexagonMCInstrInfo::isCofRelax2(MCII, I); + if (N > 1 && !Relax1 && !Relax2) { + reportError(I.getLoc(), + "Instruction may not be in a packet with other branches"); + reportBranchErrors(); + return false; + } + if (N > 1 && J == 0 && !Relax1) { + reportError(I.getLoc(), + "Instruction may not be the first branch in packet"); + reportBranchErrors(); + return false; + } + if (N > 1 && J == 1 && !Relax2) { + reportError(I.getLoc(), + "Instruction may not be the second branch in packet"); + reportBranchErrors(); + return false; + } + } + } + return true; +} + bool HexagonMCChecker::checkSlots() { unsigned slotsUsed = 0; for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { @@ -374,45 +377,6 @@ bool HexagonMCChecker::checkSlots() { return true; } -// Check legal use of branches. -bool HexagonMCChecker::checkBranches() { - if (HexagonMCInstrInfo::isBundle(MCB)) { - bool hasConditional = false; - unsigned Branches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, - Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; - - for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; - i < MCB.size(); ++i) { - MCInst const &MCI = *MCB.begin()[i].getInst(); - - if (HexagonMCInstrInfo::isImmext(MCI)) - continue; - if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() || - HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) { - ++Branches; - if (HexagonMCInstrInfo::isPredicated(MCII, MCI) || - HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) { - hasConditional = true; - Conditional = i; // Record the position of the conditional branch. - } else { - Unconditional = i; // Record the position of the unconditional branch. - } - } - } - - if (Branches > 1) - if (!hasConditional || Conditional > Unconditional) { - // Error out if more than one unconditional branch or - // the conditional branch appears after the unconditional one. - reportError( - "unconditional branch cannot precede another branch in packet"); - return false; - } - } - - return true; -} - // Check legal use of predicate registers. bool HexagonMCChecker::checkPredicates() { // Check for proper use of new predicate registers. @@ -446,16 +410,85 @@ bool HexagonMCChecker::checkPredicates() { // Check legal use of new values. bool HexagonMCChecker::checkNewValues() { - for (auto &I : NewUses) { - unsigned R = I.first; - NewSense &US = I.second; - - if (!hasValidNewValueDef(US, NewDefs[R])) { - reportErrorNewValue(R); + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (!HexagonMCInstrInfo::isNewValue(MCII, I)) + continue; + auto Consumer = HexagonMCInstrInfo::predicateInfo(MCII, I); + bool Branch = HexagonMCInstrInfo::getDesc(MCII, I).isBranch(); + MCOperand const &Op = HexagonMCInstrInfo::getNewValueOperand(MCII, I); + assert(Op.isReg()); + auto Producer = registerProducer(Op.getReg(), Consumer); + if (std::get<0>(Producer) == nullptr) { + reportError(I.getLoc(), "New value register consumer has no producer"); + return false; + } + if (!RelaxNVChecks) { + // Checks that statically prove correct new value consumption + if (std::get<2>(Producer).isPredicated() && + (!Consumer.isPredicated() || + llvm::HexagonMCInstrInfo::getType(MCII, I) == HexagonII::TypeNCJ)) { + reportNote( + std::get<0>(Producer)->getLoc(), + "Register producer is predicated and consumer is unconditional"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); + return false; + } + if (std::get<2>(Producer).Register != Hexagon::NoRegister && + std::get<2>(Producer).Register != Consumer.Register) { + reportNote(std::get<0>(Producer)->getLoc(), + "Register producer does not use the same predicate " + "register as the consumer"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); + return false; + } + } + if (std::get<2>(Producer).Register == Consumer.Register && + Consumer.PredicatedTrue != std::get<2>(Producer).PredicatedTrue) { + reportNote( + std::get<0>(Producer)->getLoc(), + "Register producer has the opposite predicate sense as consumer"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); + return false; + } + MCInstrDesc const &Desc = + HexagonMCInstrInfo::getDesc(MCII, *std::get<0>(Producer)); + if (Desc.OpInfo[std::get<1>(Producer)].RegClass == + Hexagon::DoubleRegsRegClassID) { + reportNote(std::get<0>(Producer)->getLoc(), + "Double registers cannot be new-value producers"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); + return false; + } + if ((Desc.mayLoad() && std::get<1>(Producer) == 1) || + (Desc.mayStore() && std::get<1>(Producer) == 0)) { + unsigned Mode = + HexagonMCInstrInfo::getAddrMode(MCII, *std::get<0>(Producer)); + StringRef ModeError; + if (Mode == HexagonII::AbsoluteSet) + ModeError = "Absolute-set"; + if (Mode == HexagonII::PostInc) + ModeError = "Auto-increment"; + if (!ModeError.empty()) { + reportNote(std::get<0>(Producer)->getLoc(), + ModeError + " registers cannot be a new-value " + "producer"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); + return false; + } + } + if (Branch && HexagonMCInstrInfo::isFloat(MCII, *std::get<0>(Producer))) { + reportNote(std::get<0>(Producer)->getLoc(), + "FPU instructions cannot be new-value producers for jumps"); + reportError(I.getLoc(), + "Instruction does not have a valid new register producer"); return false; } } - return true; } @@ -489,6 +522,34 @@ bool HexagonMCChecker::registerUsed(unsigned Register) { return false; } +std::tuple +HexagonMCChecker::registerProducer( + unsigned Register, HexagonMCInstrInfo::PredicateInfo ConsumerPredicate) { + std::tuple + WrongSense; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + auto ProducerPredicate = HexagonMCInstrInfo::predicateInfo(MCII, I); + for (unsigned J = 0, N = Desc.getNumDefs(); J < N; ++J) + for (auto K = MCRegAliasIterator(I.getOperand(J).getReg(), &RI, true); + K.isValid(); ++K) + if (*K == Register) { + if (RelaxNVChecks || + (ProducerPredicate.Register == ConsumerPredicate.Register && + (ProducerPredicate.Register == Hexagon::NoRegister || + ProducerPredicate.PredicatedTrue == + ConsumerPredicate.PredicatedTrue))) + return std::make_tuple(&I, J, ProducerPredicate); + std::get<0>(WrongSense) = &I; + std::get<1>(WrongSense) = J; + std::get<2>(WrongSense) = ProducerPredicate; + } + if (Register == Hexagon::VTMP && HexagonMCInstrInfo::hasTmpDst(MCII, I)) + return std::make_tuple(&I, 0, HexagonMCInstrInfo::PredicateInfo()); + } + return WrongSense; +} + void HexagonMCChecker::checkRegisterCurDefs() { for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { if (HexagonMCInstrInfo::isCVINew(MCII, I) && @@ -638,35 +699,6 @@ void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { } } -bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, - const NewSenseList &Defs) const { - bool Strict = !RelaxNVChecks; - - for (unsigned i = 0, n = Defs.size(); i < n; ++i) { - const NewSense &Def = Defs[i]; - // NVJ cannot use a new FP value [7.6.1] - if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0)) - continue; - // If the definition was not predicated, then it does not matter if - // the use is. - if (Def.PredReg == 0) - return true; - // With the strict checks, both the definition and the use must be - // predicated on the same register and condition. - if (Strict) { - if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond) - return true; - } else { - // With the relaxed checks, if the definition was predicated, the only - // detectable violation is if the use is predicated on the opposing - // condition, otherwise, it's ok. - if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond) - return true; - } - } - return false; -} - void HexagonMCChecker::reportErrorRegisters(unsigned Register) { reportError("register `" + Twine(RI.getName(Register)) + "' modified more than once"); @@ -687,6 +719,14 @@ void HexagonMCChecker::reportError(SMLoc Loc, Twine const &Msg) { Context.reportError(Loc, Msg); } +void HexagonMCChecker::reportNote(SMLoc Loc, llvm::Twine const &Msg) { + if (ReportErrors) { + auto SM = Context.getSourceManager(); + if (SM) + SM->PrintMessage(Loc, SourceMgr::DK_Note, Msg); + } +} + void HexagonMCChecker::reportWarning(Twine const &Msg) { if (ReportErrors) { auto SM = Context.getSourceManager(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index 957950156e85..7577baace20c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCCHECKER_H #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCCHECKER_H +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -48,40 +49,6 @@ class HexagonMCChecker { using DefsIterator = DenseMap::iterator; DenseMap Defs; - /// Information about how a new-value register is defined or used: - /// PredReg = predicate register, 0 if use/def not predicated, - /// Cond = true/false for if(PredReg)/if(!PredReg) respectively, - /// IsFloat = true if definition produces a floating point value - /// (not valid for uses), - /// IsNVJ = true if the use is a new-value branch (not valid for - /// definitions). - struct NewSense { - unsigned PredReg; - bool IsFloat, IsNVJ, Cond; - - // The special-case "constructors": - static NewSense Jmp(bool isNVJ) { - NewSense NS = {/*PredReg=*/0, /*IsFloat=*/false, /*IsNVJ=*/isNVJ, - /*Cond=*/false}; - return NS; - } - static NewSense Use(unsigned PR, bool True) { - NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/false, /*IsNVJ=*/false, - /*Cond=*/True}; - return NS; - } - static NewSense Def(unsigned PR, bool True, bool Float) { - NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/Float, /*IsNVJ=*/false, - /*Cond=*/True}; - return NS; - } - }; - - /// Set of definitions that produce new register: - using NewSenseList = SmallVector; - using NewDefsIterator = DenseMap::iterator; - DenseMap NewDefs; - /// Set of weak definitions whose clashes should be enforced selectively. using SoftDefsIterator = std::set::iterator; std::set SoftDefs; @@ -102,10 +69,6 @@ class HexagonMCChecker { using UsesIterator = std::set::iterator; std::set Uses; - /// Set of new values used: new register, if new-value jump. - using NewUsesIterator = DenseMap::iterator; - DenseMap NewUses; - /// Pre-defined set of read-only registers. using ReadOnlyIterator = std::set::iterator; std::set ReadOnly; @@ -115,6 +78,9 @@ class HexagonMCChecker { void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); bool registerUsed(unsigned Register); + std::tuple + registerProducer(unsigned Register, + HexagonMCInstrInfo::PredicateInfo Predicated); // Checks performed. bool checkBranches(); @@ -122,12 +88,13 @@ class HexagonMCChecker { bool checkNewValues(); bool checkRegisters(); bool checkRegistersReadOnly(); - bool checkEndloopBranches(); void checkRegisterCurDefs(); bool checkSolo(); bool checkShuffle(); bool checkSlots(); bool checkAXOK(); + bool checkHWLoop(); + bool checkCOFMax1(); static void compoundRegisterMap(unsigned &); @@ -141,19 +108,21 @@ class HexagonMCChecker { Hexagon::LC1 == R); } - bool hasValidNewValueDef(const NewSense &Use, const NewSenseList &Defs) const; - public: explicit HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, const MCRegisterInfo &ri, bool ReportErrors = true); + explicit HexagonMCChecker(HexagonMCChecker const &Check, + MCSubtargetInfo const &STI, bool CopyReportErrors); bool check(bool FullCheck = true); void reportErrorRegisters(unsigned Register); void reportErrorNewValue(unsigned Register); void reportError(SMLoc Loc, Twine const &Msg); + void reportNote(SMLoc Loc, Twine const &Msg); void reportError(Twine const &Msg); void reportWarning(Twine const &Msg); + void reportBranchErrors(); }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index a39b178805e7..4c18af60efd1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -272,7 +272,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { case Hexagon::J2_jumpr: case Hexagon::PS_jmpret: // jumpr r31 - // Actual form JMPR %PC, %R31, %R0. + // Actual form JMPR implicit-def %pc, implicit %r31, implicit internal %r0. DstReg = MCI.getOperand(0).getReg(); if (Hexagon::R31 == DstReg) return HexagonII::HSIG_L2; @@ -305,7 +305,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { case Hexagon::L4_return_tnew_pt: case Hexagon::L4_return_fnew_pt: // [if ([!]p0[.new])] dealloc_return - SrcReg = MCI.getOperand(0).getReg(); + SrcReg = MCI.getOperand(1).getReg(); if (Hexagon::P0 == SrcReg) { return HexagonII::HSIG_L2; } @@ -388,7 +388,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { } break; case Hexagon::S2_allocframe: - if (inRange<5, 3>(MCI, 0)) + if (inRange<5, 3>(MCI, 2)) return HexagonII::HSIG_S2; break; // @@ -471,7 +471,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { case Hexagon::C2_cmovenewif: // if ([!]P0[.new]) Rd = #0 // Actual form: - // %R16 = C2_cmovenewit %P0, 0, %R16; + // %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16; DstReg = MCI.getOperand(0).getReg(); // Rd PredReg = MCI.getOperand(1).getReg(); // P0 if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) && @@ -742,7 +742,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { break; // 1,2,3 SUBInst $Rx = add($_src_, $Rs) case Hexagon::S2_allocframe: Result.setOpcode(Hexagon::SS2_allocframe); - addOps(Result, Inst, 0); + addOps(Result, Inst, 2); break; // 1 SUBInst allocframe(#$u5_3) case Hexagon::A2_andir: if (minConstant(Inst, 2) == 255) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 691e269cb91f..454219945e14 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -124,7 +124,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, MCSectionSubPair P = getCurrentSection(); SwitchSection(&Section); - if (ELFSymbol->isUndefined(false)) { + if (ELFSymbol->isUndefined()) { EmitValueToAlignment(ByteAlignment, 0, 1, 0); EmitLabel(Symbol); EmitZeros(Size); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 94919b1e4869..19308cd425e8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -33,6 +33,10 @@ using namespace llvm; +bool HexagonMCInstrInfo::PredicateInfo::isPredicated() const { + return Register != Hexagon::NoRegister; +} + Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst) : MCII(MCII), BundleCurrent(Inst.begin() + @@ -50,6 +54,7 @@ Hexagon::PacketIterator &Hexagon::PacketIterator::operator++() { if (DuplexCurrent == DuplexEnd) { DuplexCurrent = BundleEnd; DuplexEnd = BundleEnd; + ++BundleCurrent; } return *this; } @@ -90,6 +95,7 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, // Create the extender. MCInst *XMCI = new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp)); + XMCI->setLoc(MCI.getLoc()); MCB.addOperand(MCOperand::createInst(XMCI)); } @@ -131,7 +137,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, // Examine the packet and convert pairs of instructions to duplex // instructions when possible. MCInst InstBundlePreDuplex = MCInst(MCB); - if (!HexagonDisableDuplex) { + if (STI.getFeatureBits() [Hexagon::FeatureDuplex]) { SmallVector possibleDuplexes; possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB); @@ -169,13 +175,6 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, } } -MCInst HexagonMCInstrInfo::createBundle() { - MCInst Result; - Result.setOpcode(Hexagon::BUNDLE); - Result.addOperand(MCOperand::createImm(0)); - return Result; -} - MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, MCOperand const &MO) { @@ -233,6 +232,13 @@ unsigned HexagonMCInstrInfo::getMemAccessSize(MCInstrInfo const &MCII, return HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(S)); } +unsigned HexagonMCInstrInfo::getAddrMode(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return static_cast((F >> HexagonII::AddrModePos) & + HexagonII::AddrModeMask); +} + MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII, MCInst const &MCI) { return MCII.get(MCI.getOpcode()); @@ -365,13 +371,20 @@ unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII, MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI) { - unsigned O = HexagonMCInstrInfo::getNewValueOp(MCII, MCI); - MCOperand const &MCO = MCI.getOperand(O); - - assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || - HexagonMCInstrInfo::hasNewValue(MCII, MCI)) && - MCO.isReg()); - return (MCO); + if (HexagonMCInstrInfo::hasTmpDst(MCII, MCI)) { + // VTMP doesn't actually exist in the encodings for these 184 + // 3 instructions so go ahead and create it here. + static MCOperand MCO = MCOperand::createReg(Hexagon::VTMP); + return (MCO); + } else { + unsigned O = HexagonMCInstrInfo::getNewValueOp(MCII, MCI); + MCOperand const &MCO = MCI.getOperand(O); + + assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) || + HexagonMCInstrInfo::hasNewValue(MCII, MCI)) && + MCO.isReg()); + return (MCO); + } } /// Return the new value or the newly produced value. @@ -439,8 +452,8 @@ bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; - for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCI)) { - if (HexagonMCInstrInfo::isDuplex(MCII, I)) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { + if (HexagonMCInstrInfo::isDuplex(MCII, *I.getInst())) return true; } @@ -451,7 +464,7 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { return extenderForIndex(MCB, Index) != nullptr; } -bool HexagonMCInstrInfo::hasImmExt( MCInst const &MCI) { +bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; @@ -540,6 +553,18 @@ bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) { return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask); } +bool HexagonMCInstrInfo::isCofRelax1(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::CofRelax1Pos) & HexagonII::CofRelax1Mask); +} + +bool HexagonMCInstrInfo::isCofRelax2(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::CofRelax2Pos) & HexagonII::CofRelax2Mask); +} + bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, MCInst const &MCI) { return (getType(MCII, MCI) == HexagonII::TypeCJ); @@ -576,6 +601,11 @@ bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) { return ((F >> HexagonII::FPPos) & HexagonII::FPMask); } +bool HexagonMCInstrInfo::isHVX(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t V = getType(MCII, MCI); + return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; +} + bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) { return MCI.getOpcode() == Hexagon::A4_ext; } @@ -655,10 +685,18 @@ bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) { } /// Return whether the insn can be packaged only with an A-type insn in slot #1. -bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII, - MCInst const &MCI) { +bool HexagonMCInstrInfo::isRestrictSlot1AOK(MCInstrInfo const &MCII, + MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask); + return ((F >> HexagonII::RestrictSlot1AOKPos) & + HexagonII::RestrictSlot1AOKMask); +} + +bool HexagonMCInstrInfo::isRestrictNoSlot1Store(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::RestrictNoSlot1StorePos) & + HexagonII::RestrictNoSlot1StoreMask); } /// Return whether the insn is solo, i.e., cannot be in a packet. @@ -673,12 +711,6 @@ bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) { return (Flags & memReorderDisabledMask) != 0; } -bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) { - assert(isBundle(MCI)); - auto Flags = MCI.getOperand(0).getImm(); - return (Flags & memStoreReorderEnabledMask) != 0; -} - bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) { switch (MCI.getOpcode()) { default: @@ -800,12 +832,29 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) { MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop))); } +HexagonMCInstrInfo::PredicateInfo +HexagonMCInstrInfo::predicateInfo(MCInstrInfo const &MCII, MCInst const &MCI) { + if (!isPredicated(MCII, MCI)) + return {0, 0, false}; + MCInstrDesc const &Desc = getDesc(MCII, MCI); + for (auto I = Desc.getNumDefs(), N = Desc.getNumOperands(); I != N; ++I) + if (Desc.OpInfo[I].RegClass == Hexagon::PredRegsRegClassID) + return {MCI.getOperand(I).getReg(), I, isPredicatedTrue(MCII, MCI)}; + return {0, 0, false}; +} + bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask; } +/// return true if instruction has hasTmpDst attribute. +bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask; +} + void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate) { assert(Candidate.packetIndexI < MCB.size()); @@ -833,13 +882,6 @@ void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) { assert(isMemReorderDisabled(MCI)); } -void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) { - assert(isBundle(MCI)); - MCOperand &Operand = MCI.getOperand(0); - Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask); - assert(isMemStoreReorderEnabled(MCI)); -} - void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) { assert(isBundle(MCI)); MCOperand &Operand = MCI.getOperand(0); @@ -854,7 +896,7 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15) if (Consumer >= Hexagon::V0 && Consumer <= Hexagon::V31) return (Consumer - Hexagon::V0) & 0x1; - if (Consumer == Producer2) - return 0x1; + if (Producer2 != Hexagon::NoRegister) + return Consumer == Producer; return 0; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index b6b01709a6ca..28d89429266b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -75,10 +75,6 @@ int64_t const outerLoopMask = 1 << outerLoopOffset; size_t const memReorderDisabledOffset = 2; int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset; -// allow re-ordering of memory stores by default stores cannot be re-ordered -size_t const memStoreReorderEnabledOffset = 3; -int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset; - size_t const bundleInstructionsOffset = 1; void addConstant(MCInst &MI, uint64_t Value, MCContext &Context); @@ -110,8 +106,6 @@ MCInst deriveSubInst(MCInst const &Inst); // Clamp off upper 26 bits of extendable operand for emission void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); -MCInst createBundle(); - // Return the extender for instruction at Index or nullptr if none MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, @@ -120,6 +114,9 @@ void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, // Return memory access size in bytes unsigned getMemAccessSize(MCInstrInfo const &MCII, MCInst const &MCI); +// Return memory access size +unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI); + MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI); // Return which duplex group this instruction belongs to @@ -184,6 +181,7 @@ bool hasImmExt(MCInst const &MCI); // Return whether the instruction is a legal new-value producer. bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI); unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); int64_t minConstant(MCInst const &MCI, size_t Index); @@ -209,6 +207,8 @@ bool isBundle(MCInst const &MCI); // Return whether the insn is an actual insn. bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI); bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCofRelax1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCofRelax2(MCInstrInfo const &MCII, MCInst const &MCI); bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the instruction needs to be constant extended. @@ -236,6 +236,8 @@ bool isExtended(MCInstrInfo const &MCII, MCInst const &MCI); /// Return whether it is a floating-point insn. bool isFloat(MCInstrInfo const &MCII, MCInst const &MCI); +bool isHVX(MCInstrInfo const &MCII, MCInst const &MCI); + // Returns whether this instruction is an immediate extender bool isImmext(MCInst const &MCI); @@ -248,7 +250,6 @@ bool isIntReg(unsigned Reg); // Is this register suitable for use in a duplex subinst bool isIntRegForSubInst(unsigned Reg); bool isMemReorderDisabled(MCInst const &MCI); -bool isMemStoreReorderEnabled(MCInst const &MCI); // Return whether the insn is a new-value consumer. bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI); @@ -283,7 +284,8 @@ bool isSolo(MCInstrInfo const &MCII, MCInst const &MCI); bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI); /// Return whether the insn can be packaged only with an A-type insn in slot #1. -bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI); +bool isRestrictSlot1AOK(MCInstrInfo const &MCII, MCInst const &MCI); +bool isRestrictNoSlot1Store(MCInstrInfo const &MCII, MCInst const &MCI); bool isSubInstruction(MCInst const &MCI); bool isVector(MCInstrInfo const &MCII, MCInst const &MCI); bool mustExtend(MCExpr const &Expr); @@ -291,6 +293,17 @@ bool mustNotExtend(MCExpr const &Expr); // Pad the bundle with nops to satisfy endloop requirements void padEndloop(MCInst &MCI, MCContext &Context); +class PredicateInfo { +public: + PredicateInfo() : Register(0), Operand(0), PredicatedTrue(false) {} + PredicateInfo(unsigned Register, unsigned Operand, bool PredicatedTrue) + : Register(Register), Operand(Operand), PredicatedTrue(PredicatedTrue) {} + bool isPredicated() const; + unsigned Register; + unsigned Operand; + bool PredicatedTrue; +}; +PredicateInfo predicateInfo(MCInstrInfo const &MCII, MCInst const &MCI); bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); // Replace the instructions inside MCB, represented by Candidate @@ -300,7 +313,6 @@ bool s27_2_reloc(MCExpr const &Expr); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); void setMemReorderDisabled(MCInst &MCI); -void setMemStoreReorderEnabled(MCInst &MCI); void setMustExtend(MCExpr const &Expr, bool Val = true); void setMustNotExtend(MCExpr const &Expr, bool Val = true); void setS27_2_reloc(MCExpr const &Expr, bool Val = true); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index ea589c7a82ab..7bd54fdfa3d5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -113,9 +113,10 @@ bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal, if (!HexagonMCInstrInfo::bundleSize(MCB)) { // There once was a bundle: - // BUNDLE %D2, %R4, %R5, %D7, ... - // * %D2 = IMPLICIT_DEF; flags: - // * %D7 = IMPLICIT_DEF; flags: + // BUNDLE implicit-def %d2, implicit-def %r4, implicit-def %r5, + // implicit-def %d7, ... + // * %d2 = IMPLICIT_DEF; flags: + // * %d7 = IMPLICIT_DEF; flags: // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. DEBUG(dbgs() << "Skipping empty bundle"); @@ -137,9 +138,10 @@ llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, if (!HexagonMCInstrInfo::bundleSize(MCB)) { // There once was a bundle: - // BUNDLE %D2, %R4, %R5, %D7, ... - // * %D2 = IMPLICIT_DEF; flags: - // * %D7 = IMPLICIT_DEF; flags: + // BUNDLE implicit-def %d2, implicit-def %r4, implicit-def %r5, + // implicit-def %d7, ... + // * %d2 = IMPLICIT_DEF; flags: + // * %d7 = IMPLICIT_DEF; flags: // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. DEBUG(dbgs() << "Skipping empty bundle"); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 6f48169be8cf..3fbe2197f937 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -13,11 +13,13 @@ #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "Hexagon.h" +#include "HexagonDepArch.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" @@ -57,41 +59,55 @@ cl::opt llvm::HexagonDisableDuplex ("mno-pairing", cl::desc("Disable looking for duplex instructions for Hexagon")); -static cl::opt HexagonV4ArchVariant("mv4", cl::Hidden, cl::init(false), - cl::desc("Build for Hexagon V4")); +namespace { // These flags are to be deprecated +cl::opt MV4("mv4", cl::Hidden, cl::desc("Build for Hexagon V4"), + cl::init(false)); +cl::opt MV5("mv5", cl::Hidden, cl::desc("Build for Hexagon V5"), + cl::init(false)); +cl::opt MV55("mv55", cl::Hidden, cl::desc("Build for Hexagon V55"), + cl::init(false)); +cl::opt MV60("mv60", cl::Hidden, cl::desc("Build for Hexagon V60"), + cl::init(false)); +cl::opt MV62("mv62", cl::Hidden, cl::desc("Build for Hexagon V62"), + cl::init(false)); +cl::opt MV65("mv65", cl::Hidden, cl::desc("Build for Hexagon V65"), + cl::init(false)); +} // namespace + +cl::opt + EnableHVX("mhvx", + cl::desc("Enable Hexagon Vector eXtensions"), + cl::values( + clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"), + clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"), + clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"), + // Sentinal for no value specified + clEnumValN(Hexagon::ArchEnum::V5, "", "")), + // Sentinal for flag not present + cl::init(Hexagon::ArchEnum::V4), cl::ValueOptional); +static cl::opt + DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions")); -static cl::opt HexagonV5ArchVariant("mv5", cl::Hidden, cl::init(false), - cl::desc("Build for Hexagon V5")); - -static cl::opt HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false), - cl::desc("Build for Hexagon V55")); - -static cl::opt HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false), - cl::desc("Build for Hexagon V60")); - -static cl::opt HexagonV62ArchVariant("mv62", cl::Hidden, cl::init(false), - cl::desc("Build for Hexagon V62")); - -static cl::opt EnableHVX("mhvx", cl::Hidden, cl::init(false), - cl::desc("Enable Hexagon Vector Extension (HVX)")); static StringRef DefaultArch = "hexagonv60"; static StringRef HexagonGetArchVariant() { - if (HexagonV4ArchVariant) + if (MV4) return "hexagonv4"; - if (HexagonV5ArchVariant) + if (MV5) return "hexagonv5"; - if (HexagonV55ArchVariant) + if (MV55) return "hexagonv55"; - if (HexagonV60ArchVariant) + if (MV60) return "hexagonv60"; - if (HexagonV62ArchVariant) + if (MV62) return "hexagonv62"; + if (MV65) + return "hexagonv65"; return ""; } -StringRef Hexagon_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) { +StringRef Hexagon_MC::selectHexagonCPU(StringRef CPU) { StringRef ArchV = HexagonGetArchVariant(); if (!ArchV.empty() && !CPU.empty()) { if (ArchV != CPU) @@ -146,7 +162,11 @@ class HexagonTargetAsmStreamer : public HexagonTargetStreamer { OS << Indent << InstTxt << Separator; HeadTail = HeadTail.second.split('\n'); } - OS << "\t}" << PacketBundle.second; + + if (HexagonMCInstrInfo::isMemReorderDisabled(Inst)) + OS << "\n\t}:mem_noshuf" << PacketBundle.second; + else + OS << "\t}" << PacketBundle.second; } }; @@ -251,15 +271,37 @@ static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) return (FB & (1ULL << F)) != 0; } -StringRef Hexagon_MC::ParseHexagonTriple(const Triple &TT, StringRef CPU) { - StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU); - StringRef FS = ""; - if (EnableHVX) { - if (CPUName.equals_lower("hexagonv60") || - CPUName.equals_lower("hexagonv62")) - FS = "+hvx"; +namespace { +std::string selectHexagonFS(StringRef CPU, StringRef FS) { + SmallVector Result; + if (!FS.empty()) + Result.push_back(FS); + + switch (EnableHVX) { + case Hexagon::ArchEnum::V55: + break; + case Hexagon::ArchEnum::V60: + Result.push_back("+hvxv60"); + break; + case Hexagon::ArchEnum::V62: + Result.push_back("+hvxv62"); + break; + case Hexagon::ArchEnum::V65: + Result.push_back("+hvxv65"); + break; + case Hexagon::ArchEnum::V5:{ + Result.push_back(StringSwitch(CPU) + .Case("hexagonv60", "+hvxv60") + .Case("hexagonv62", "+hvxv62") + .Case("hexagonv65", "+hvxv65")); + break; } - return FS; + case Hexagon::ArchEnum::V4: + // Sentinal if -mhvx isn't specified + break; + } + return join(Result.begin(), Result.end(), ","); +} } static bool isCPUValid(std::string CPU) @@ -271,16 +313,76 @@ static bool isCPUValid(std::string CPU) "hexagonv55", "hexagonv60", "hexagonv62", + "hexagonv65", }; return std::find(table.begin(), table.end(), CPU) != table.end(); } +namespace { +std::pair selectCPUAndFS(StringRef CPU, + StringRef FS) { + std::pair Result; + Result.first = Hexagon_MC::selectHexagonCPU(CPU); + Result.second = selectHexagonFS(Result.first, FS); + return Result; +} +} + +FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { + using namespace Hexagon; + // Make sure that +hvx-length turns hvx on, and that "hvx" alone + // turns on hvxvNN, corresponding to the existing ArchVNN. + FeatureBitset FB = S; + unsigned CpuArch = ArchV4; + for (unsigned F : {ArchV65, ArchV62, ArchV60, ArchV55, ArchV5, ArchV4}) { + if (!FB.test(F)) + continue; + CpuArch = F; + break; + } + bool UseHvx = false; + for (unsigned F : {ExtensionHVX, ExtensionHVX64B, ExtensionHVX128B, + ExtensionHVXDbl}) { + if (!FB.test(F)) + continue; + UseHvx = true; + break; + } + bool HasHvxVer = false; + for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65}) { + if (!FB.test(F)) + continue; + HasHvxVer = true; + UseHvx = true; + break; + } + + if (!UseHvx || HasHvxVer) + return FB; + + // HasHvxVer is false, and UseHvx is true. + switch (CpuArch) { + case ArchV65: + FB.set(ExtensionHVXV65); + LLVM_FALLTHROUGH; + case ArchV62: + FB.set(ExtensionHVXV62); + LLVM_FALLTHROUGH; + case ArchV60: + FB.set(ExtensionHVXV60); + break; + } + return FB; +} + MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { - StringRef ArchFS = (FS.size()) ? FS : Hexagon_MC::ParseHexagonTriple(TT, CPU); - StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU); + std::pair Features = selectCPUAndFS(CPU, FS); + StringRef CPUName = Features.first; + StringRef ArchFS = Features.second; + if (!isCPUValid(CPUName.str())) { errs() << "error: invalid CPU \"" << CPUName.str().c_str() << "\" specified\n"; @@ -288,10 +390,12 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, } MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS); - if (X->getFeatureBits()[Hexagon::ExtensionHVX128B]) { + if (HexagonDisableDuplex) { llvm::FeatureBitset Features = X->getFeatureBits(); - X->setFeatureBits(Features.set(Hexagon::ExtensionHVX)); + X->setFeatureBits(Features.set(Hexagon::FeatureDuplex, false)); } + + X->setFeatureBits(completeHVXFeatures(X->getFeatureBits())); return X; } @@ -302,6 +406,7 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { {"hexagonv55", ELF::EF_HEXAGON_MACH_V55}, {"hexagonv60", ELF::EF_HEXAGON_MACH_V60}, {"hexagonv62", ELF::EF_HEXAGON_MACH_V62}, + {"hexagonv65", ELF::EF_HEXAGON_MACH_V65}, }; auto F = ElfFlags.find(STI.getCPU()); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 30d75dbc84e2..71545a5c02c9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -16,11 +16,13 @@ #include "llvm/Support/CommandLine.h" #include +#include namespace llvm { struct InstrItinerary; struct InstrStage; +class FeatureBitset; class MCAsmBackend; class MCCodeEmitter; class MCContext; @@ -44,9 +46,9 @@ MCInstrInfo *createHexagonMCInstrInfo(); MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT); namespace Hexagon_MC { - StringRef ParseHexagonTriple(const Triple &TT, StringRef CPU); - StringRef selectHexagonCPU(const Triple &TT, StringRef CPU); + StringRef selectHexagonCPU(StringRef CPU); + FeatureBitset completeHVXFeatures(const FeatureBitset &FB); /// Create a Hexagon MCSubtargetInfo instance. This is exposed so Asm parser, /// etc. do not need to go through TargetRegistry. MCSubtargetInfo *createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, @@ -59,8 +61,8 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); std::unique_ptr @@ -80,6 +82,7 @@ unsigned HexagonGetLastSlot(); // Defines symbolic names for the Hexagon instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_SCHED_ENUM #include "HexagonGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index cdf1cabe65c5..7709a0f61624 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -115,6 +116,7 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VS_VX] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT, 1); (*TUL)[HexagonII::TypeCVI_VINLANESAT] = (CPU == "hexagonv60") ? UnitsAndLanes(CVI_SHIFT, 1) @@ -128,6 +130,14 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + (*TUL)[HexagonII::TypeCVI_GATHER] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_SCATTER] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_SCATTER_DV] = + UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_SCATTER_NEW_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); } HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL, @@ -211,30 +221,89 @@ static struct { } jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; #define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) +void HexagonShuffler::restrictSlot1AOK() { + bool HasRestrictSlot1AOK = false; + SMLoc RestrictLoc; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const &Inst = ISJ->getDesc(); + if (HexagonMCInstrInfo::isRestrictSlot1AOK(MCII, Inst)) { + HasRestrictSlot1AOK = true; + RestrictLoc = Inst.getLoc(); + } + } + if (HasRestrictSlot1AOK) + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const &Inst = ISJ->getDesc(); + unsigned Type = HexagonMCInstrInfo::getType(MCII, Inst); + if (Type != HexagonII::TypeALU32_2op && + Type != HexagonII::TypeALU32_3op && + Type != HexagonII::TypeALU32_ADDI) { + unsigned Units = ISJ->Core.getUnits(); + if (Units & 2U) { + AppliedRestrictions.push_back(std::make_pair( + Inst.getLoc(), + "Instruction was restricted from being in slot 1")); + AppliedRestrictions.push_back( + std::make_pair(RestrictLoc, "Instruction can only be combine " + "with an ALU instruction in slot 1")); + ISJ->Core.setUnits(Units & ~2U); + } + } + } +} + +void HexagonShuffler::restrictNoSlot1Store() { + bool HasRestrictNoSlot1Store = false; + SMLoc RestrictLoc; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const &Inst = ISJ->getDesc(); + if (HexagonMCInstrInfo::isRestrictNoSlot1Store(MCII, Inst)) { + HasRestrictNoSlot1Store = true; + RestrictLoc = Inst.getLoc(); + } + } + if (HasRestrictNoSlot1Store) { + bool AppliedRestriction = false; + for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { + MCInst const &Inst = ISJ->getDesc(); + if (HexagonMCInstrInfo::getDesc(MCII, Inst).mayStore()) { + unsigned Units = ISJ->Core.getUnits(); + if (Units & 2U) { + AppliedRestriction = true; + AppliedRestrictions.push_back(std::make_pair( + Inst.getLoc(), + "Instruction was restricted from being in slot 1")); + ISJ->Core.setUnits(Units & ~2U); + } + } + } + if (AppliedRestriction) + AppliedRestrictions.push_back(std::make_pair( + RestrictLoc, "Instruction does not allow a store in slot 1")); + } +} + +void HexagonShuffler::applySlotRestrictions() { + restrictSlot1AOK(); + restrictNoSlot1Store(); +} + /// Check that the packet is legal and enforce relative insn order. bool HexagonShuffler::check() { // Descriptive slot masks. - const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, + const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotThree = 0x8, // slotFirstJump = 0x8, slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; // Highest slots for branches and stores used to keep their original order. // unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; - // Number of branches, solo branches, indirect branches. - unsigned jumps = 0, jump1 = 0; // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; // Number of duplex insns unsigned duplex = 0; - // Number of insns restricting other insns in slot #1 to A type. - unsigned onlyAin1 = 0; - // Number of insns restricting any insn in slot #1, except A2_nop. - unsigned onlyNo1 = 0; unsigned pSlot3Cnt = 0; - unsigned nvstores = 0; unsigned memops = 0; - unsigned deallocs = 0; iterator slot3ISJ = end(); std::vector foundBranches; unsigned reservedSlots = 0; @@ -243,15 +312,11 @@ bool HexagonShuffler::check() { for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const &ID = ISJ->getDesc(); - if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) - ++onlyAin1; if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) { ++pSlot3Cnt; slot3ISJ = ISJ; } reservedSlots |= HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID); - if (HexagonMCInstrInfo::isCofMax1(MCII, ID)) - ++jump1; switch (HexagonMCInstrInfo::getType(MCII, ID)) { case HexagonII::TypeS_2op: @@ -259,30 +324,30 @@ bool HexagonShuffler::check() { case HexagonII::TypeALU64: break; case HexagonII::TypeJ: - ++jumps; foundBranches.push_back(ISJ); break; case HexagonII::TypeCVI_VM_VP_LDU: - ++onlyNo1; - LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_GATHER: + case HexagonII::TypeCVI_GATHER_RST: case HexagonII::TypeLD: ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad || HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) ++load0; - if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) { - ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) foundBranches.push_back(ISJ); - } break; case HexagonII::TypeCVI_VM_STU: - ++onlyNo1; - LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: + case HexagonII::TypeCVI_SCATTER: + case HexagonII::TypeCVI_SCATTER_DV: + case HexagonII::TypeCVI_SCATTER_RST: + case HexagonII::TypeCVI_SCATTER_NEW_RST: + case HexagonII::TypeCVI_SCATTER_NEW_ST: case HexagonII::TypeST: ++stores; ++memory; @@ -299,7 +364,6 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeNCJ: ++memory; // NV insns are memory-like. - ++jumps, ++jump1; foundBranches.push_back(ISJ); break; case HexagonII::TypeV2LDST: @@ -314,65 +378,35 @@ bool HexagonShuffler::check() { assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()); ++memory; ++stores; - if (HexagonMCInstrInfo::isNewValue(MCII, ID)) - ++nvstores; } break; case HexagonII::TypeCR: // Legacy conditional branch predicated on a register. case HexagonII::TypeCJ: - if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) { - ++jumps; + if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) foundBranches.push_back(ISJ); - } break; case HexagonII::TypeDUPLEX: { ++duplex; MCInst const &Inst0 = *ID.getOperand(0).getInst(); MCInst const &Inst1 = *ID.getOperand(1).getInst(); - if (HexagonMCInstrInfo::isCofMax1(MCII, Inst0)) - ++jump1; - if (HexagonMCInstrInfo::isCofMax1(MCII, Inst1)) - ++jump1; - if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch()) { - ++jumps; + if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch()) foundBranches.push_back(ISJ); - } - if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch()) { - ++jumps; + if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch()) foundBranches.push_back(ISJ); - } - if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) { - ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) foundBranches.push_back(ISJ); - } - if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) { - ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) foundBranches.push_back(ISJ); - } break; } } } + applySlotRestrictions(); // Check if the packet is legal. - if ((load0 > 1 || store0 > 1) || - (duplex > 1 || (duplex && memory))) { - reportError(Twine("invalid instruction packet")); - return false; - } - - if (jump1 && jumps > 1) { - // Error if single branch with another branch. - reportError(Twine("too many branches in packet")); - return false; - } - if ((nvstores || memops) && stores > 1) { - reportError(Twine("slot 0 instruction does not allow slot 1 store")); - return false; - } - if (deallocs && stores) { - reportError(Twine("slot 0 instruction does not allow slot 1 store")); + if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory))) { + reportError(llvm::Twine("invalid instruction packet")); return false; } @@ -387,31 +421,46 @@ bool HexagonShuffler::check() { return false; } - // Exclude from slot #1 any insn but A2_nop. - if (HexagonMCInstrInfo::getDesc(MCII, ID).getOpcode() != Hexagon::A2_nop) - if (onlyNo1) - ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); - - // Exclude from slot #1 any insn but A-type. - if (HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_2op && - HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_3op && - HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_ADDI) - if (onlyAin1) - ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); - // A single load must use slot #0. if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) { if (loads == 1 && loads == memory && memops == 0) // Pin the load to slot #0. - ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); + switch (ID.getOpcode()) { + case Hexagon::V6_vgathermw: + case Hexagon::V6_vgathermh: + case Hexagon::V6_vgathermhw: + case Hexagon::V6_vgathermwq: + case Hexagon::V6_vgathermhq: + case Hexagon::V6_vgathermhwq: + // Slot1 only loads + break; + default: + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); + break; + } + else if (loads >= 1 && isMemReorderDisabled()) { // }:mem_noshuf + // Loads must keep the original order ONLY if + // isMemReorderDisabled() == true + if (slotLoadStore < slotLastLoadStore) { + // Error if no more slots available for loads. + reportError( + llvm::Twine("invalid instruction packet: too many loads")); + return false; + } + // Pin the load to the highest slot available to it. + ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); + // Update the next highest slot available to loads. + slotLoadStore >>= 1; + } } // A single store must use slot #0. if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) { if (!store0) { - if (stores == 1) + if (stores == 1 && (loads == 0 || !isMemReorderDisabled())) + // Pin the store to slot #0 only if isMemReorderDisabled() == false ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); - else if (stores > 1) { + else if (stores >= 1) { if (slotLoadStore < slotLastLoadStore) { // Error if no more slots available for stores. reportError(Twine("invalid instruction packet: too many stores")); @@ -443,7 +492,7 @@ bool HexagonShuffler::check() { // preserve branch order bool validateSlots = true; - if (jumps > 1) { + if (foundBranches.size() > 1) { if (foundBranches.size() > 2) { reportError(Twine("too many branches in packet")); return false; @@ -465,7 +514,7 @@ bool HexagonShuffler::check() { foundBranches[1]->Core.setUnits(jumpSlots[i].second); HexagonUnitAuction AuctionCore(reservedSlots); - std::sort(begin(), end(), HexagonInstr::lessCore); + std::stable_sort(begin(), end(), HexagonInstr::lessCore); // see if things ok with that instruction being pinned to slot "slotJump" bool bFail = false; @@ -487,7 +536,8 @@ bool HexagonShuffler::check() { } } - if (jumps <= 1 && !bOnlySlot3 && pSlot3Cnt == 1 && slot3ISJ != end()) { + if (foundBranches.size() <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && + slot3ISJ != end()) { validateSlots = true; // save off slot mask of instruction marked with A_PREFER_SLOT3 // and then pin it to slot #3 @@ -495,7 +545,7 @@ bool HexagonShuffler::check() { slot3ISJ->Core.setUnits(saveUnits & slotThree); HexagonUnitAuction AuctionCore(reservedSlots); - std::sort(begin(), end(), HexagonInstr::lessCore); + std::stable_sort(begin(), end(), HexagonInstr::lessCore); // see if things ok with that instruction being pinned to slot #3 bool bFail = false; @@ -519,7 +569,7 @@ bool HexagonShuffler::check() { if (validateSlots) { HexagonUnitAuction AuctionCore(reservedSlots); - std::sort(begin(), end(), HexagonInstr::lessCore); + std::stable_sort(begin(), end(), HexagonInstr::lessCore); for (iterator I = begin(); I != end(); ++I) if (!AuctionCore.bid(I->Core.getUnits())) { @@ -528,7 +578,7 @@ bool HexagonShuffler::check() { } } // Verify the CVI slot subscriptions. - std::sort(begin(), end(), HexagonInstr::lessCVI); + std::stable_sort(begin(), end(), HexagonInstr::lessCVI); // create vector of hvx instructions to check HVXInstsT hvxInsts; hvxInsts.clear(); @@ -584,7 +634,7 @@ bool HexagonShuffler::shuffle() { if (slotWeight) // Sort the packet, favoring source order, // beginning after the previous slot. - std::sort(ISJ, Packet.end()); + std::stable_sort(ISJ, Packet.end()); else // Skip unused slot. ++emptySlots; @@ -604,6 +654,12 @@ bool HexagonShuffler::shuffle() { } void HexagonShuffler::reportError(Twine const &Msg) { - if (ReportErrors) + if (ReportErrors) { + for (auto const &I : AppliedRestrictions) { + auto SM = Context.getSourceManager(); + if (SM) + SM->PrintMessage(I.first, SourceMgr::DK_Note, I.second); + } Context.reportError(Loc, Msg); + } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index df3fb0a1efb3..37f90bc46ac7 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONSHUFFLER_H #include "Hexagon.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -152,6 +153,10 @@ class HexagonShuffler { MCSubtargetInfo const &STI; SMLoc Loc; bool ReportErrors; + std::vector> AppliedRestrictions; + void applySlotRestrictions(); + void restrictSlot1AOK(); + void restrictNoSlot1Store(); public: using iterator = HexagonPacket::iterator; @@ -168,6 +173,10 @@ class HexagonShuffler { unsigned size() const { return (Packet.size()); } + bool isMemReorderDisabled() const { + return (BundleFlags & HexagonMCInstrInfo::memReorderDisabledMask) != 0; + } + iterator begin() { return (Packet.begin()); } iterator end() { return (Packet.end()); } diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 50ebcd5302c4..d1f6e5a4c8ef 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -247,7 +247,7 @@ raw_ostream &operator<< (raw_ostream &OS, if (T != MI.operands_end()) { OS << ' '; if (T->isMBB()) - OS << "BB#" << T->getMBB()->getNumber(); + OS << printMBBReference(*T->getMBB()); else if (T->isGlobal()) OS << T->getGlobal()->getName(); else if (T->isSymbol()) @@ -284,13 +284,13 @@ raw_ostream &operator<< (raw_ostream &OS, auto PrintBBs = [&OS] (std::vector Ns) -> void { unsigned N = Ns.size(); for (int I : Ns) { - OS << "BB#" << I; + OS << "%bb." << I; if (--N) OS << ", "; } }; - OS << Print(P.Obj.Id, P.G) << ": --- BB#" << BB->getNumber() + OS << Print(P.Obj.Id, P.G) << ": --- " << printMBBReference(*BB) << " --- preds(" << NP << "): "; for (MachineBasicBlock *B : BB->predecessors()) Ns.push_back(B->getNumber()); @@ -766,7 +766,7 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { RegisterSet DataFlowGraph::getLandingPadLiveIns() const { RegisterSet LR; - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr; const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1123,8 +1123,8 @@ void DataFlowGraph::pushDefs(NodeAddr IA, DefStackMap &DefM) { if (!Defined.insert(RR.Reg).second) { MachineInstr *MI = NodeAddr(IA).Addr->getCode(); dbgs() << "Multiple definitions of register: " - << Print(RR, *this) << " in\n " << *MI - << "in BB#" << MI->getParent()->getNumber() << '\n'; + << Print(RR, *this) << " in\n " << *MI << "in " + << printMBBReference(*MI->getParent()) << '\n'; llvm_unreachable(nullptr); } #endif diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index 399b401c5ff6..e3abb0e22f76 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -111,7 +111,7 @@ // // DFG dump:[ // f1: Function foo -// b2: === BB#0 === preds(0), succs(0): +// b2: === %bb.0 === preds(0), succs(0): // p3: phi [d4(,d12,u9):] // p5: phi [d6(,,u10):] // s7: add [d8(,,u13):, u9(d4):, u10(d6):] @@ -183,7 +183,7 @@ // This is typically used to prevent keeping registers artificially live // in cases when they are defined via predicated instructions. For example: // r0 = add-if-true cond, r10, r11 (1) -// r0 = add-if-false cond, r12, r13, r0 (2) +// r0 = add-if-false cond, r12, r13, implicit r0 (2) // ... = r0 (3) // Before (1), r0 is not intended to be live, and the use of r0 in (3) is // not meant to be reached by any def preceding (1). However, since the diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 740cd11136b4..67150d536bc9 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -628,7 +628,7 @@ void Liveness::computePhiInfo() { // Collect the set PropUp of uses that are reached by the current // phi PA, and are not covered by any intervening def between the - // currently visited use UA and the the upward phi P. + // currently visited use UA and the upward phi P. if (MidDefs.hasCoverOf(UR)) continue; @@ -814,7 +814,7 @@ void Liveness::computeLiveIns() { for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) LV.push_back(RegisterRef(I->PhysReg, I->LaneMask)); std::sort(LV.begin(), LV.end()); - dbgs() << "BB#" << B.getNumber() << "\t rec = {"; + dbgs() << printMBBReference(B) << "\t rec = {"; for (auto I : LV) dbgs() << ' ' << Print(I, DFG); dbgs() << " }\n"; @@ -963,7 +963,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { } if (Trace) { - dbgs() << "\n-- BB#" << B->getNumber() << ": " << __func__ + dbgs() << "\n-- " << printMBBReference(*B) << ": " << __func__ << " after recursion into: {"; for (auto I : *N) dbgs() << ' ' << I->getBlock()->getNumber(); diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp index 7e4fd24b60e6..17567436384e 100644 --- a/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/lib/Target/Lanai/LanaiISelLowering.cpp @@ -513,7 +513,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments( // The Lanai ABI for returning structs by value requires that we copy // the sret argument into rv for the return. Save the argument into // a virtual register so that we can access it from the return points. - if (MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction().hasStructRetAttr()) { unsigned Reg = LanaiMFI->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32)); @@ -568,7 +568,7 @@ LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // the sret argument into rv for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out // and into rv. - if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + if (DAG.getMachineFunction().getFunction().hasStructRetAttr()) { MachineFunction &MF = DAG.getMachineFunction(); LanaiMachineFunctionInfo *LanaiMFI = MF.getInfo(); unsigned Reg = LanaiMFI->getSRetReturnReg(); diff --git a/lib/Target/Lanai/LanaiInstrFormats.td b/lib/Target/Lanai/LanaiInstrFormats.td index 30289ea4ac0b..1bb6b3d26a49 100644 --- a/lib/Target/Lanai/LanaiInstrFormats.td +++ b/lib/Target/Lanai/LanaiInstrFormats.td @@ -482,7 +482,7 @@ class InstSLI pattern> // Memory(ea) <- (least significant half-word of Rr) // If `YS' = 10 (bYte load): Rr <- Memory(ea) // If `YS' = 00 (halfword load): Rr <- Memory(ea) -// [Note: here ea is determined as in the the RM instruction. ] +// [Note: here ea is determined as in the RM instruction. ] // If `SE' = 01 then the value is zEro extended // before being loaded into Rd. // If `SE' = 00 then the value is sign extended diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp index 9a73c95d6516..2c21a53b13bb 100644 --- a/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -74,10 +74,9 @@ LanaiTargetMachine::LanaiTargetMachine(const Target &T, const Triple &TT, initAsmInfo(); } -TargetIRAnalysis LanaiTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(LanaiTTIImpl(this, F)); - }); +TargetTransformInfo +LanaiTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(LanaiTTIImpl(this, F)); } namespace { diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h index 2fb1a0536104..0db286ec13e7 100644 --- a/lib/Target/Lanai/LanaiTargetMachine.h +++ b/lib/Target/Lanai/LanaiTargetMachine.h @@ -42,7 +42,7 @@ class LanaiTargetMachine : public LLVMTargetMachine { return &Subtarget; } - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &pass_manager) override; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index c4935746f5ad..e3eaa4d30a90 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -165,9 +165,10 @@ LanaiAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { } // namespace MCAsmBackend *llvm::createLanaiAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo & /*MRI*/, - const Triple &TT, StringRef /*CPU*/, const MCTargetOptions & /*Options*/) { + const Triple &TT = STI.getTargetTriple(); if (!TT.isOSBinFormatELF()) llvm_unreachable("OS not supported"); diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h index 5bc84ad83870..ddb4e9b0d728 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h @@ -38,8 +38,8 @@ MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TheTriple, StringRef CPU, +MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 424b5ae418f7..87c320aa76aa 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -138,15 +138,15 @@ bool MSP430BSel::expandBranches(OffsetVector &BlockOffsets) { continue; } - DEBUG(dbgs() << " Found a branch that needs expanding, BB#" - << DestBB->getNumber() << ", Distance " << BranchDistance - << "\n"); + DEBUG(dbgs() << " Found a branch that needs expanding, " + << printMBBReference(*DestBB) << ", Distance " + << BranchDistance << "\n"); // If JCC is not the last instruction we need to split the MBB. if (MI->getOpcode() == MSP430::JCC && std::next(MI) != EE) { - DEBUG(dbgs() << " Found a basic block that needs to be split, BB#" - << MBB->getNumber() << "\n"); + DEBUG(dbgs() << " Found a basic block that needs to be split, " + << printMBBReference(*MBB) << "\n"); // Create a new basic block. MachineBasicBlock *NewBB = diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 7cfcb965899f..f5b2bda5d1e4 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -746,7 +746,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - if (MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction().hasStructRetAttr()) { MSP430MachineFunctionInfo *FuncInfo = MF.getInfo(); unsigned Reg = FuncInfo->getSRetReturnReg(); diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 7a3b7a8bd5ff..54e53e19eb54 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -38,7 +38,7 @@ MSP430RegisterInfo::MSP430RegisterInfo() const MCPhysReg* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MSP430FrameLowering *TFI = getFrameLowering(*MF); - const Function* F = MF->getFunction(); + const Function* F = &MF->getFunction(); static const MCPhysReg CalleeSavedRegs[] = { MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7, MSP430::R8, MSP430::R9, MSP430::R10, diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 4db5e3c8cca5..345b081500a4 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -512,6 +512,9 @@ class MipsAsmParser : public MCTargetAsmParser { IsLittleEndian = false; else IsLittleEndian = true; + + if (getSTI().getCPU() == "mips64r6" && inMicroMipsMode()) + report_fatal_error("microMIPS64R6 is not supported", false); } /// True if all of $fcc0 - $fcc7 exist for the current ISA. @@ -1987,9 +1990,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::DDIV: case Mips::DDIVU: case Mips::DIVU_MMR6: - case Mips::DDIVU_MM64R6: case Mips::DIV_MMR6: - case Mips::DDIV_MM64R6: if (Inst.getOperand(SecondOp).getReg() == Mips::ZERO || Inst.getOperand(SecondOp).getReg() == Mips::ZERO_64) { if (Inst.getOperand(FirstOp).getReg() == Mips::ZERO || @@ -5114,8 +5115,6 @@ MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, return Match_Success; case Mips::DATI: case Mips::DAHI: - case Mips::DATI_MM64R6: - case Mips::DAHI_MM64R6: if (static_cast(*Operands[1]) .isValidForTie(static_cast(*Operands[2]))) return Match_Success; @@ -5128,7 +5127,6 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // As described by the MIPSR6 spec, daui must not use the zero operand for // its source operand. case Mips::DAUI: - case Mips::DAUI_MM64R6: if (Inst.getOperand(1).getReg() == Mips::ZERO || Inst.getOperand(1).getReg() == Mips::ZERO_64) return Match_RequiresNoZeroRegister; @@ -5201,8 +5199,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) return Match_RequiresDifferentOperands; return Match_Success; - case Mips::DINS: - case Mips::DINS_MM64R6: { + case Mips::DINS: { assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && "Operands must be immediates for dins!"); const signed Pos = Inst.getOperand(2).getImm(); @@ -5212,9 +5209,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } case Mips::DINSM: - case Mips::DINSM_MM64R6: - case Mips::DINSU: - case Mips::DINSU_MM64R6: { + case Mips::DINSU: { assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && "Operands must be immediates for dinsm/dinsu!"); const signed Pos = Inst.getOperand(2).getImm(); @@ -5223,8 +5218,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_RequiresPosSizeRange33_64; return Match_Success; } - case Mips::DEXT: - case Mips::DEXT_MM64R6: { + case Mips::DEXT: { assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && "Operands must be immediates for DEXTM!"); const signed Pos = Inst.getOperand(2).getImm(); @@ -5234,9 +5228,7 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } case Mips::DEXTM: - case Mips::DEXTU: - case Mips::DEXTM_MM64R6: - case Mips::DEXTU_MM64R6: { + case Mips::DEXTU: { assert(Inst.getOperand(2).isImm() && Inst.getOperand(3).isImm() && "Operands must be immediates for dextm/dextu!"); const signed Pos = Inst.getOperand(2).getImm(); @@ -6794,6 +6786,9 @@ bool MipsAsmParser::parseSetArchDirective() { if (ArchFeatureName.empty()) return reportParseError("unsupported architecture"); + if (ArchFeatureName == "mips64r6" && inMicroMipsMode()) + return reportParseError("mips64r6 does not support microMIPS"); + selectArch(ArchFeatureName); getTargetStreamer().emitDirectiveSetArch(Arch); return false; @@ -7125,6 +7120,10 @@ bool MipsAsmParser::parseDirectiveSet() { Parser.eatToEndOfStatement(); return false; } else if (Tok.getString() == "micromips") { + if (hasMips64r6()) { + Error(Tok.getLoc(), ".set micromips directive is not supported with MIPS64R6"); + return false; + } return parseSetFeature(Mips::FeatureMicroMips); } else if (Tok.getString() == "mips0") { return parseSetMips0Directive(); @@ -7157,6 +7156,10 @@ bool MipsAsmParser::parseDirectiveSet() { } else if (Tok.getString() == "mips64r5") { return parseSetFeature(Mips::FeatureMips64r5); } else if (Tok.getString() == "mips64r6") { + if (inMicroMipsMode()) { + Error(Tok.getLoc(), "MIPS64R6 is not supported with microMIPS"); + return false; + } return parseSetFeature(Mips::FeatureMips64r6); } else if (Tok.getString() == "dsp") { return parseSetFeature(Mips::FeatureDSP); diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index d8e2eef6a9fd..3d29a0dac25f 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -277,11 +277,6 @@ static DecodeStatus DecodeMemEVA(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLoadByte9(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -300,11 +295,6 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodePrefeOpMM(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1068,26 +1058,16 @@ static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address, unsigned Lsb = fieldFromInstruction(Insn, 6, 5); unsigned Size = 0; unsigned Pos = 0; - bool IsMicroMips = false; switch (MI.getOpcode()) { - case Mips::DEXT_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DEXT: Pos = Lsb; Size = Msbd + 1; break; - case Mips::DEXTM_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DEXTM: Pos = Lsb; Size = Msbd + 1 + 32; break; - case Mips::DEXTU_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DEXTU: Pos = Lsb + 32; Size = Msbd + 1; @@ -1096,14 +1076,10 @@ static DecodeStatus DecodeDEXT(MCInst &MI, InsnType Insn, uint64_t Address, llvm_unreachable("Unknown DEXT instruction!"); } - MI.setOpcode(IsMicroMips ? Mips::DEXT_MM64R6 : Mips::DEXT); + MI.setOpcode(Mips::DEXT); - // Although the format of the instruction is similar, rs and rt are swapped - // for microMIPS64R6. InsnType Rs = fieldFromInstruction(Insn, 21, 5); InsnType Rt = fieldFromInstruction(Insn, 16, 5); - if (IsMicroMips) - std::swap(Rs, Rt); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt))); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs))); @@ -1122,26 +1098,16 @@ static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address, unsigned Lsb = fieldFromInstruction(Insn, 6, 5); unsigned Size = 0; unsigned Pos = 0; - bool IsMicroMips = false; switch (MI.getOpcode()) { - case Mips::DINS_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DINS: Pos = Lsb; Size = Msbd + 1 - Pos; break; - case Mips::DINSM_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DINSM: Pos = Lsb; Size = Msbd + 33 - Pos; break; - case Mips::DINSU_MM64R6: - IsMicroMips = true; - LLVM_FALLTHROUGH; case Mips::DINSU: Pos = Lsb + 32; // mbsd = pos + size - 33 @@ -1152,14 +1118,10 @@ static DecodeStatus DecodeDINS(MCInst &MI, InsnType Insn, uint64_t Address, llvm_unreachable("Unknown DINS instruction!"); } - // Although the format of the instruction is similar, rs and rt are swapped - // for microMIPS64R6. InsnType Rs = fieldFromInstruction(Insn, 21, 5); InsnType Rt = fieldFromInstruction(Insn, 16, 5); - if (IsMicroMips) - std::swap(Rs, Rt); - MI.setOpcode(IsMicroMips ? Mips::DINS_MM64R6 : Mips::DINS); + MI.setOpcode(Mips::DINS); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rt))); MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID, Rs))); MI.addOperand(MCOperand::createImm(Pos)); @@ -1240,7 +1202,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, if (hasMips32r6()) { DEBUG(dbgs() << "Trying MicroMipsR616 table (16-bit instructions):\n"); // Calling the auto-generated decoder function for microMIPS32R6 - // (and microMIPS64R6) 16-bit instructions. + // 16-bit instructions. Result = decodeInstruction(DecoderTableMicroMipsR616, Instr, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -1566,24 +1528,6 @@ static DecodeStatus DecodeMemEVA(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeLoadByte9(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - int Offset = SignExtend32<9>(Insn & 0x1ff); - unsigned Base = fieldFromInstruction(Insn, 16, 5); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - - Inst.addOperand(MCOperand::createReg(Reg)); - Inst.addOperand(MCOperand::createReg(Base)); - Inst.addOperand(MCOperand::createImm(Offset)); - - return MCDisassembler::Success; -} - static DecodeStatus DecodeLoadByte15(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -1670,24 +1614,6 @@ static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - int Offset = SignExtend32<9>(Insn & 0x1ff); - unsigned Reg = fieldFromInstruction(Insn, 21, 5); - unsigned Base = fieldFromInstruction(Insn, 16, 5); - - Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg); - Base = getReg(Decoder, Mips::GPR32RegClassID, Base); - - Inst.addOperand(MCOperand::createReg(Reg)); - Inst.addOperand(MCOperand::createReg(Base)); - Inst.addOperand(MCOperand::createImm(Offset)); - - return MCDisassembler::Success; -} - static DecodeStatus DecodeSyncI(MCInst &Inst, unsigned Insn, uint64_t Address, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 1ad524c06969..acbc6d37e24b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -476,8 +476,9 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { } MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return new MipsAsmBackend(T, MRI, TT, CPU, Options.ABIName == "n32"); + return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), + Options.ABIName == "n32"); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 6d2f098a6b32..3c67743947cb 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -225,6 +225,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, switch (Kind) { case Mips::fixup_Mips_NONE: return ELF::R_MIPS_NONE; + case FK_Data_1: + report_fatal_error("MIPS does not support one byte relocations"); case Mips::fixup_Mips_16: case FK_Data_2: return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index eae0f975080b..2f6dd0035de3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -86,18 +86,6 @@ static void LowerLargeShift(MCInst& Inst) { case Mips::DROTR: Inst.setOpcode(Mips::DROTR32); return; - case Mips::DSLL_MM64R6: - Inst.setOpcode(Mips::DSLL32_MM64R6); - return; - case Mips::DSRL_MM64R6: - Inst.setOpcode(Mips::DSRL32_MM64R6); - return; - case Mips::DSRA_MM64R6: - Inst.setOpcode(Mips::DSRA32_MM64R6); - return; - case Mips::DROTR_MM64R6: - Inst.setOpcode(Mips::DROTR32_MM64R6); - return; } } @@ -178,10 +166,6 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, case Mips::DSRL: case Mips::DSRA: case Mips::DROTR: - case Mips::DSLL_MM64R6: - case Mips::DSRL_MM64R6: - case Mips::DSRA_MM64R6: - case Mips::DROTR_MM64R6: LowerLargeShift(TmpInst); break; // Compact branches, enforce encoding restrictions. @@ -204,7 +188,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, // so we have to special check for them. unsigned Opcode = TmpInst.getOpcode(); if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && - (Opcode != Mips::SLL_MM) && !Binary) + (Opcode != Mips::SLL_MM) && (Opcode != Mips::SLL_MMR6) && !Binary) llvm_unreachable("unimplemented opcode in encodeInstruction()"); int NewOpcode = -1; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index abbf08ed212f..5dab6c3e81d6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -45,8 +45,8 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createMipsAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td index e1f1f9262b90..1f4d8d26bbd7 100644 --- a/lib/Target/Mips/MicroMips32r6InstrFormats.td +++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td @@ -17,7 +17,7 @@ class MMR6Arch { string DecoderNamespace = "MicroMipsR6"; } -// Class used for microMIPS32r6 and microMIPS64r6 instructions. +// Class used for microMIPS32r6 instructions. class MicroMipsR6Inst16 : PredicateControl { string DecoderNamespace = "MicroMipsR6"; let InsnPredicates = [HasMicroMips32r6]; @@ -174,22 +174,6 @@ class ADDI_FM_MMR6 op> : MMR6Arch { let Inst{15-0} = imm16; } -class POOL32C_ST_EVA_FM_MMR6 op, bits<3> funct> : MipsR6Inst { - bits<21> addr; - bits<5> hint; - bits<5> base = addr{20-16}; - bits<9> offset = addr{8-0}; - - bits<32> Inst; - - let Inst{31-26} = op; - let Inst{25-21} = hint; - let Inst{20-16} = base; - let Inst{15-12} = 0b1010; - let Inst{11-9} = funct; - let Inst{8-0} = offset; -} - class LB32_FM_MMR6 : MipsR6Inst { bits<21> addr; bits<5> rt; @@ -218,20 +202,6 @@ class LBU32_FM_MMR6 : MipsR6Inst { let Inst{15-0} = offset; } -class POOL32C_LB_LBU_FM_MMR6 funct> : MipsR6Inst { - bits<21> addr; - bits<5> rt; - - bits<32> Inst; - - let Inst{31-26} = 0b011000; - let Inst{25-21} = rt; - let Inst{20-16} = addr{20-16}; - let Inst{15-12} = 0b0110; - let Inst{11-9} = funct; - let Inst{8-0} = addr{8-0}; -} - class SIGN_EXTEND_FM_MMR6 funct> : MMR6Arch { bits<5> rd; @@ -436,38 +406,6 @@ class SB32_SH32_STORE_FM_MMR6 op> { let Inst{15-0} = offset; } -class POOL32C_STORE_EVA_FM_MMR6 funct> { - bits<5> rt; - bits<21> addr; - bits<5> base = addr{20-16}; - bits<9> offset = addr{8-0}; - - bits<32> Inst; - - let Inst{31-26} = 0b011000; - let Inst{25-21} = rt; - let Inst{20-16} = base; - let Inst{15-12} = 0b1010; - let Inst{11-9} = funct; - let Inst{8-0} = offset; -} - -class LOAD_WORD_EVA_FM_MMR6 funct> { - bits<5> rt; - bits<21> addr; - bits<5> base = addr{20-16}; - bits<9> offset = addr{8-0}; - - bits<32> Inst; - - let Inst{31-26} = 0b011000; - let Inst{25-21} = rt; - let Inst{20-16} = base; - let Inst{15-12} = 0b0110; - let Inst{11-9} = funct; - let Inst{8-0} = offset; -} - class LOAD_WORD_FM_MMR6 { bits<5> rt; bits<21> addr; @@ -631,23 +569,6 @@ class SW32_FM_MMR6 op> : MMR6Arch { let Inst{15-0} = addr{15-0}; } -class POOL32C_SWE_FM_MMR6 op, bits<4> fmt, - bits<3> funct> : MMR6Arch { - bits<5> rt; - bits<21> addr; - bits<5> base = addr{20-16}; - bits<9> offset = addr{8-0}; - - bits<32> Inst; - - let Inst{31-26} = op; - let Inst{25-21} = rt; - let Inst{20-16} = base; - let Inst{15-12} = fmt; - let Inst{11-9} = funct; - let Inst{8-0} = offset; -} - class POOL32F_ARITH_FM_MMR6 fmt, bits<8> funct> : MMR6Arch, MipsR6Inst { bits<5> ft; diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index 3ff3f07654d9..138ea7b58f7a 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -147,19 +147,14 @@ class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>; class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>; class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>; class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>; -class SWE_MMR6_ENC : POOL32C_SWE_FM_MMR6<"swe", 0x18, 0xa, 0x7>; class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>; class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>; class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>; class SWP_MMR6_ENC : POOL32B_LWP_SWP_FM_MMR6<0x9>; -class PREFE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b010>; -class CACHEE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b011>; class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x3c5>; class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x1ec>; class LB_MMR6_ENC : LB32_FM_MMR6; class LBU_MMR6_ENC : LBU32_FM_MMR6; -class LBE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b100>; -class LBUE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b000>; class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>; class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6; class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">; @@ -187,12 +182,7 @@ class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>; class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>; class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>; class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>; -class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>; -class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>; class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>; -class SHE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b101>; -class LLE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b110>; -class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>; class LW_MMR6_ENC : LOAD_WORD_FM_MMR6; class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6; class JALRC_HB_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc.hb", 0b0001111100>; @@ -441,17 +431,6 @@ class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd, class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd, II_PREF>; -class PREFE_CACHEE_MMR6_DESC_BASE - : CACHE_HINT_MMR6_DESC { - string DecoderMethod = "DecodePrefeOpMM"; -} - -class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, - GPR32Opnd, II_PREFE>; -class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, - GPR32Opnd, II_CACHEE>; - class LB_LBU_MMR6_DESC_BASE : MMR6Arch { @@ -466,16 +445,6 @@ class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd, II_LB>; class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd, II_LBU>; -class LBE_LBUE_MMR6_DESC_BASE - : LB_LBU_MMR6_DESC_BASE { - let DecoderMethod = "DecodeLoadByte9"; -} -class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd, - II_LBE>; -class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd, - II_LBUE>; - class CLO_CLZ_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); @@ -704,21 +673,9 @@ class ORI_MMR6_DESC : ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16, class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>; class XORI_MMR6_DESC : ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>; - -class SWE_MMR6_DESC_BASE : - InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> { - let DecoderMethod = "DecodeMem"; - let mayStore = 1; -} class SW_MMR6_DESC : Store<"sw", GPR32Opnd> { InstrItinClass Itinerary = II_SW; } -class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9, II_SWE>; - class WRPGPR_WSBH_MMR6_DESC_BASE : MMR6Arch { dag InOperandList = (ins RO:$rs); @@ -1154,32 +1111,7 @@ class STORE_MMR6_DESC_BASE; -class STORE_EVA_MMR6_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs); - dag InOperandList = (ins RO:$rt, mem_simm9:$addr); - string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); - string DecoderMethod = "DecodeStoreEvaOpMM"; - bit mayStore = 1; - InstrItinClass Itinerary = Itin; -} -class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd, II_SBE>; -class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd, II_SCE>; class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd, II_SH>; -class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd, II_SHE>; -class LOAD_WORD_EVA_MMR6_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs RO:$rt); - dag InOperandList = (ins mem_simm9:$addr); - string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); - string DecoderMethod = "DecodeMemMMImm9"; - bit mayLoad = 1; - InstrItinClass Itinerary = Itin; -} -class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd, II_LLE>; -class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd, II_LWE>; class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, MMR6Arch<"addu16"> { int AddedComplexity = 1; @@ -1530,16 +1462,11 @@ def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6; def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6; def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6; def SWP_MMR6 : StdMMR6Rel, SWP_MMR6_ENC, SWP_MMR6_DESC, ISA_MICROMIPS32R6; -def PREFE_MMR6 : StdMMR6Rel, PREFE_MMR6_ENC, PREFE_MMR6_DESC, ISA_MICROMIPS32R6; -def CACHEE_MMR6 : StdMMR6Rel, CACHEE_MMR6_ENC, CACHEE_MMR6_DESC, - ISA_MICROMIPS32R6; def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC, ISA_MICROMIPS32R6; def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6; def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6; def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6; -def LBE_MMR6 : R6MMR6Rel, LBE_MMR6_ENC, LBE_MMR6_DESC, ISA_MICROMIPS32R6; -def LBUE_MMR6 : R6MMR6Rel, LBUE_MMR6_ENC, LBUE_MMR6_DESC, ISA_MICROMIPS32R6; def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6; def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6; def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6; @@ -1554,9 +1481,6 @@ def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; let DecoderMethod = "DecodeMemMMImm16" in { def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6; } -let DecoderMethod = "DecodeMemMMImm9" in { - def SWE_MMR6 : StdMMR6Rel, SWE_MMR6_DESC, SWE_MMR6_ENC, ISA_MICROMIPS32R6; -} /// Floating Point Instructions def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC, ISA_MICROMIPS32R6; @@ -1655,12 +1579,7 @@ def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC, def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC, ISA_MICROMIPS32R6; def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6; -def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6; -def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6; def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6; -def SHE_MMR6 : StdMMR6Rel, SHE_MMR6_DESC, SHE_MMR6_ENC, ISA_MICROMIPS32R6; -def LLE_MMR6 : StdMMR6Rel, LLE_MMR6_DESC, LLE_MMR6_ENC, ISA_MICROMIPS32R6; -def LWE_MMR6 : StdMMR6Rel, LWE_MMR6_DESC, LWE_MMR6_ENC, ISA_MICROMIPS32R6; def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6; def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC, diff --git a/lib/Target/Mips/MicroMips64r6InstrFormats.td b/lib/Target/Mips/MicroMips64r6InstrFormats.td deleted file mode 100644 index 26062bfb2b8e..000000000000 --- a/lib/Target/Mips/MicroMips64r6InstrFormats.td +++ /dev/null @@ -1,267 +0,0 @@ -//=- MicroMips64r6InstrFormats.td - Instruction Formats -*- tablegen -* -=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes microMIPS64r6 instruction formats. -// -//===----------------------------------------------------------------------===// - -class DAUI_FM_MMR6 { - bits<5> rt; - bits<5> rs; - bits<16> imm; - - bits<32> Inst; - - let Inst{31-26} = 0b111100; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-0} = imm; -} - -class POOL32I_ADD_IMM_FM_MMR6 funct> { - bits<5> rs; - bits<16> imm; - - bits<32> Inst; - - let Inst{31-26} = 0b010000; - let Inst{25-21} = funct; - let Inst{20-16} = rs; - let Inst{15-0} = imm; -} - -class POOL32S_EXTBITS_FM_MMR6 funct> { - bits<5> rt; - bits<5> rs; - bits<5> size; - bits<5> pos; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = size; - let Inst{10-6} = pos; - let Inst{5-0} = funct; -} - -class POOL32S_DALIGN_FM_MMR6 { - bits<5> rs; - bits<5> rt; - bits<5> rd; - bits<3> bp; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rs; - let Inst{20-16} = rt; - let Inst{15-11} = rd; - let Inst{10-8} = bp; - let Inst{7-6} = 0b00; - let Inst{5-0} = 0b011100; -} - -class POOL32A_DIVMOD_FM_MMR6 funct> - : MMR6Arch { - bits<5> rt; - bits<5> rs; - bits<5> rd; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = rd; - let Inst{10-9} = 0b00; - let Inst{8-0} = funct; -} - -class POOL32S_DMFTC0_FM_MMR6 funct> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<5> rs; - bits<3> sel; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-14} = 0; - let Inst{13-11} = sel; - let Inst{10-6} = funct; - let Inst{5-0} = 0b111100; -} - -class POOL32S_ARITH_FM_MMR6 funct> - : MMR6Arch { - bits<5> rt; - bits<5> rs; - bits<5> rd; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = rd; - let Inst{10-9} = 0b00; - let Inst{8-0} = funct; -} - -class DADDIU_FM_MMR6 : MMR6Arch { - bits<5> rt; - bits<5> rs; - bits<16> imm16; - - bits<32> Inst; - - let Inst{31-26} = 0b010111; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-0} = imm16; -} - -class PCREL18_FM_MMR6 funct> : MipsR6Inst { - bits<5> rt; - bits<18> imm; - - bits<32> Inst; - - let Inst{31-26} = 0b011110; - let Inst{25-21} = rt; - let Inst{20-18} = funct; - let Inst{17-0} = imm; -} - -class POOL32S_2R_FM_MMR6 funct> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<5> rs; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-6} = funct; - let Inst{5-0} = 0b111100; -} - -class POOL32S_2RSA5B0_FM_MMR6 funct> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<5> rs; - bits<5> sa; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = sa; - let Inst{10-9} = 0b00; - let Inst{8-0} = funct; -} - -class LD_SD_32_2R_OFFSET16_FM_MMR6 op> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<21> addr; - bits<5> base = addr{20-16}; - bits<16> offset = addr{15-0}; - - bits<32> Inst; - - let Inst{31-26} = op; - let Inst{25-21} = rt; - let Inst{20-16} = base; - let Inst{15-0} = offset; -} - -class POOL32C_2R_OFFSET12_FM_MMR6 funct> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<21> addr; - bits<5> base = addr{20-16}; - bits<12> offset = addr{11-0}; - - bits<32> Inst; - - let Inst{31-26} = 0b011000; - let Inst{25-21} = rt; - let Inst{20-16} = base; - let Inst{15-12} = funct; - let Inst{11-0} = offset; -} - -class POOL32S_3R_FM_MMR6 funct> - : MMR6Arch, MipsR6Inst { - bits<5> rt; - bits<5> rs; - bits<5> rd; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = rd; - let Inst{10-9} = 0b00; - let Inst{8-0} = funct; -} - -class POOL32S_DBITSWAP_FM_MMR6 : MMR6Arch, - MipsR6Inst { - bits<5> rt; - bits<5> rd; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rd; - let Inst{15-12} = 0b0000; - let Inst{11-6} = 0b101100; - let Inst{5-0} = 0b111100; -} - -class POOL32S_3RSA_FM_MMR6 : MMR6Arch, - MipsR6Inst { - bits<5> rt; - bits<5> rs; - bits<5> rd; - bits<2> sa; - - bits<32> Inst; - - let Inst{31-26} = 0b010110; - let Inst{25-21} = rt; - let Inst{20-16} = rs; - let Inst{15-11} = rd; - let Inst{10-9} = sa; - let Inst{8-6} = 0b100; - let Inst{5-0} = 0b000100; -} - -class PCREL_1ROFFSET19_FM_MMR6 : MMR6Arch, - MipsR6Inst { - bits<5> rt; - bits<19> offset; - - bits<32> Inst; - - let Inst{31-26} = 0b011110; - let Inst{25-21} = rt; - let Inst{20-19} = 0b10; - let Inst{18-0} = offset; -} diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td deleted file mode 100644 index 4f705feed0aa..000000000000 --- a/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ /dev/null @@ -1,581 +0,0 @@ -//=- MicroMips64r6InstrInfo.td - Instruction Information -*- tablegen -*- -=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes MicroMips64r6 instructions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// -// Instruction Encodings -// -//===----------------------------------------------------------------------===// - -class DAUI_MMR6_ENC : DAUI_FM_MMR6; -class DAHI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10001>; -class DATI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10000>; -class DEXT_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b101100>; -class DEXTM_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b100100>; -class DEXTU_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b010100>; -class DALIGN_MMR6_ENC : POOL32S_DALIGN_FM_MMR6; -class DDIV_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddiv", 0b100011000>; -class DMOD_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmod", 0b101011000>; -class DDIVU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddivu", 0b110011000>; -class DMODU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmodu", 0b111011000>; -class DINSU_MM64R6_ENC : POOL32S_EXTBITS_FM_MMR6<0b110100>; -class DINSM_MM64R6_ENC : POOL32S_EXTBITS_FM_MMR6<0b000100>; -class DINS_MM64R6_ENC : POOL32S_EXTBITS_FM_MMR6<0b001100>; -class DMTC0_MM64R6_ENC : POOL32S_DMFTC0_FM_MMR6<"dmtc0", 0b01011>; -class DMTC1_MM64R6_ENC : POOL32F_MFTC1_FM_MMR6<"dmtc1", 0b10110000>; -class DMTC2_MM64R6_ENC : POOL32A_MFTC2_FM_MMR6<"dmtc2", 0b0111110100>; -class DMFC0_MM64R6_ENC : POOL32S_DMFTC0_FM_MMR6<"dmfc0", 0b00011>; -class DMFC1_MM64R6_ENC : POOL32F_MFTC1_FM_MMR6<"dmfc1", 0b10010000>; -class DMFC2_MM64R6_ENC : POOL32A_MFTC2_FM_MMR6<"dmfc2", 0b0110110100>; -class DADD_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dadd", 0b100010000>; -class DADDIU_MM64R6_ENC : DADDIU_FM_MMR6<"daddiu">; -class DADDU_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"daddu", 0b101010000>; -class LDPC_MMR646_ENC : PCREL18_FM_MMR6<0b110>; -class DSUB_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dsub", 0b110010000>; -class DSUBU_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dsubu", 0b111010000>; -class DMUL_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dmul", 0b000011000>; -class DMUH_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dmuh", 0b001011000>; -class DMULU_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dmulu", 0b010011000>; -class DMUHU_MM64R6_ENC : POOL32S_ARITH_FM_MMR6<"dmuhu", 0b011011000>; -class DSBH_MM64R6_ENC : POOL32S_2R_FM_MMR6<"dsbh", 0b0111101100>; -class DSHD_MM64R6_ENC : POOL32S_2R_FM_MMR6<"dshd", 0b1111101100>; -class DSLL_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsll", 0b000000000>; -class DSLL32_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsll32", 0b000001000>; -class DSLLV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"dsllv", 0b000010000>; -class DSRAV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"dsrav", 0b010010000>; -class DSRA_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsra", 0b010000000>; -class DSRA32_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsra32", 0b010000100>; -class DCLO_MM64R6_ENC : POOL32S_2R_FM_MMR6<"dclo", 0b0100101100>; -class DCLZ_MM64R6_ENC : POOL32S_2R_FM_MMR6<"dclz", 0b0101101100>; -class DROTR_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"drotr", 0b011000000>; -class DROTR32_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"drotr32", 0b011001000>; -class DROTRV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"drotrv", 0b011010000>; -class LD_MM64R6_ENC : LD_SD_32_2R_OFFSET16_FM_MMR6<"ld", 0b110111>; -class LLD_MM64R6_ENC : POOL32C_2R_OFFSET12_FM_MMR6<"lld", 0b0111>; -class LWU_MM64R6_ENC : POOL32C_2R_OFFSET12_FM_MMR6<"lwu", 0b1110>; -class SD_MM64R6_ENC : LD_SD_32_2R_OFFSET16_FM_MMR6<"sd", 0b110110>; -class DSRL_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsrl", 0b001000000>; -class DSRL32_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsrl32", 0b001001000>; -class DSRLV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"dsrlv", 0b001010000>; -class DBITSWAP_MM64R6_ENC : POOL32S_DBITSWAP_FM_MMR6<"dbitswap">; -class DLSA_MM64R6_ENC : POOL32S_3RSA_FM_MMR6<"dlsa">; -class LWUPC_MM64R6_ENC : PCREL_1ROFFSET19_FM_MMR6<"lwupc">; - -//===----------------------------------------------------------------------===// -// -// Instruction Descriptions -// -//===----------------------------------------------------------------------===// - -class DAUI_MMR6_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs GPROpnd:$rt); - dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm); - string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); - list Pattern = []; - InstrItinClass Itinerary = Itin; -} -class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd, II_DAUI>; - -class DAHI_DATI_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs GPROpnd:$rs); - dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm); - string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); - string Constraints = "$rs = $rt"; - InstrItinClass Itinerary = Itin; -} -class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd, II_DAHI>; -class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd, II_DATI>; - -class EXTBITS_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs RO:$rt); - dag InOperandList = (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size); - string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $pos, $size"); - list Pattern = [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))]; - InstrItinClass Itinerary = II_EXT; - Format Form = FrmR; - string BaseOpcode = instr_asm; -} -class DEXT_MMR6_DESC : EXTBITS_DESC_BASE<"dext", GPR64Opnd, uimm5_report_uimm6, - uimm5_plus1_report_uimm6, MipsExt>; -class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5, - uimm5_plus33, MipsExt>; -class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32, - uimm5_plus1, MipsExt>; - -class DALIGN_DESC_BASE - : MMR6Arch, MipsR6Inst { - dag OutOperandList = (outs GPROpnd:$rd); - dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp); - string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp"); - list Pattern = []; - InstrItinClass Itinerary = itin; -} - -class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3, - II_DALIGN>; - -class DDIV_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddiv", GPR64Opnd, II_DDIV, - sdiv>; -class DMOD_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmod", GPR64Opnd, II_DMOD, - srem>; -class DDIVU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddivu", GPR64Opnd, II_DDIVU, - udiv>; -class DMODU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmodu", GPR64Opnd, II_DMODU, - urem>; - -class DCLO_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins GPR64Opnd:$rs); - string AsmString = !strconcat("dclo", "\t$rt, $rs"); - list Pattern = [(set GPR64Opnd:$rt, (ctlz (not GPR64Opnd:$rs)))]; - InstrItinClass Itinerary = II_DCLO; - Format Form = FrmR; - string BaseOpcode = "dclo"; -} - -class DCLZ_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins GPR64Opnd:$rs); - string AsmString = !strconcat("dclz", "\t$rt, $rs"); - list Pattern = [(set GPR64Opnd:$rt, (ctlz GPR64Opnd:$rs))]; - InstrItinClass Itinerary = II_DCLZ; - Format Form = FrmR; - string BaseOpcode = "dclz"; -} - -class DINSU_MM64R6_DESC : InsBase<"dinsu", GPR64Opnd, uimm5_plus32, - uimm5_inssize_plus1, immZExt5Plus32, - immZExt5Plus1>; -class DINSM_MM64R6_DESC : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64, - immZExt5, immZExtRange2To64>; -class DINS_MM64R6_DESC : InsBase<"dins", GPR64Opnd, uimm5_report_uimm6, - uimm5_inssize_plus1, immZExt5, immZExt5Plus1>; -class DMTC0_MM64R6_DESC : MTC0_MMR6_DESC_BASE<"dmtc0", COP0Opnd, GPR64Opnd, - II_DMTC0>; -class DMTC1_MM64R6_DESC : MTC1_MMR6_DESC_BASE<"dmtc1", FGR64Opnd, GPR64Opnd, - II_DMTC1, bitconvert>; -class DMTC2_MM64R6_DESC : MTC2_MMR6_DESC_BASE<"dmtc2", COP2Opnd, GPR64Opnd, - II_DMTC2>; -class DMFC0_MM64R6_DESC : MFC0_MMR6_DESC_BASE<"dmfc0", GPR64Opnd, COP0Opnd, - II_DMFC0>; -class DMFC1_MM64R6_DESC : MFC1_MMR6_DESC_BASE<"dmfc1", GPR64Opnd, FGR64Opnd, - II_DMFC1, bitconvert>; -class DMFC2_MM64R6_DESC : MFC2_MMR6_DESC_BASE<"dmfc2", GPR64Opnd, COP2Opnd, - II_DMFC2>; -class DADD_MM64R6_DESC : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>; -class DADDIU_MM64R6_DESC : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, - II_DADDIU, immSExt16, add>, - IsAsCheapAsAMove; -class DADDU_MM64R6_DESC : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>; - -class DSUB_DESC_BASE - : MipsR6Inst { - dag OutOperandList = (outs RO:$rd); - dag InOperandList = (ins RO:$rs, RO:$rt); - string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list Pattern = [(set RO:$rd, (OpNode RO:$rs, RO:$rt))]; - InstrItinClass Itinerary = Itin; - Format Form = FrmR; - string BaseOpcode = instr_asm; - let isCommutable = 0; - let isReMaterializable = 1; - let TwoOperandAliasConstraint = "$rd = $rs"; -} -class DSUB_MM64R6_DESC : DSUB_DESC_BASE<"dsub", GPR64Opnd, II_DSUB>; -class DSUBU_MM64R6_DESC : DSUB_DESC_BASE<"dsubu", GPR64Opnd, II_DSUBU, sub>; - -class LDPC_MM64R6_DESC : PCREL_MMR6_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, - II_LDPC>; - -class MUL_MM64R6_DESC_BASE : MipsR6Inst { - dag OutOperandList = (outs GPROpnd:$rd); - dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); - string AsmString = !strconcat(opstr, "\t$rd, $rs, $rt"); - InstrItinClass Itinerary = Itin; - list Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))]; -} - -class DMUL_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>; -class DMUH_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmuh", GPR64Opnd, II_DMUH, - mulhs>; -class DMULU_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMULU>; -class DMUHU_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmuhu", GPR64Opnd, II_DMUHU, - mulhu>; - -class DSBH_DSHD_DESC_BASE { - dag OutOperandList = (outs GPROpnd:$rt); - dag InOperandList = (ins GPROpnd:$rs); - string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); - bit hasSideEffects = 0; - list Pattern = []; - InstrItinClass Itinerary = Itin; - Format Form = FrmR; - string BaseOpcode = instr_asm; -} - -class DSBH_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dsbh", GPR64Opnd, II_DSBH>; -class DSHD_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dshd", GPR64Opnd, II_DSHD>; - -class SHIFT_ROTATE_IMM_MM64R6 { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins GPR64Opnd:$rs, ImmOpnd:$sa); - string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); - list Pattern = [(set GPR64Opnd:$rt, (OpNode GPR64Opnd:$rs, PO:$sa))]; - InstrItinClass Itinerary = itin; - Format Form = FrmR; - string TwoOperandAliasConstraint = "$rs = $rt"; - string BaseOpcode = instr_asm; -} - -class SHIFT_ROTATE_REG_MM64R6 { - dag OutOperandList = (outs GPR64Opnd:$rd); - dag InOperandList = (ins GPR64Opnd:$rt, GPR32Opnd:$rs); - string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs"); - list Pattern = [(set GPR64Opnd:$rd, - (OpNode GPR64Opnd:$rt, GPR32Opnd:$rs))]; - InstrItinClass Itinerary = itin; - Format Form = FrmR; - string BaseOpcode = instr_asm; -} - -class DSLL_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsll", uimm6, II_DSLL, shl, - immZExt6>; -class DSLL32_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsll32", uimm5, II_DSLL32>; -class DSLLV_MM64R6_DESC : SHIFT_ROTATE_REG_MM64R6<"dsllv", II_DSLLV, shl>; -class DSRAV_MM64R6_DESC : SHIFT_ROTATE_REG_MM64R6<"dsrav", II_DSRAV, sra>; -class DSRA_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsra", uimm6, II_DSRA, sra, - immZExt6>; -class DSRA32_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsra32", uimm5, II_DSRA32>; -class DROTR_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"drotr", uimm6, II_DROTR, - rotr, immZExt6>; -class DROTR32_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"drotr32", uimm5, - II_DROTR32>; -class DROTRV_MM64R6_DESC : SHIFT_ROTATE_REG_MM64R6<"drotrv", II_DROTRV, rotr>; -class DSRL_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsrl", uimm6, II_DSRL, srl, - immZExt6>; -class DSRL32_MM64R6_DESC : SHIFT_ROTATE_IMM_MM64R6<"dsrl32", uimm5, II_DSRL32>; -class DSRLV_MM64R6_DESC : SHIFT_ROTATE_REG_MM64R6<"dsrlv", II_DSRLV, srl>; - -class Load_MM64R6 { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins MemOpnd:$addr); - string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); - list Pattern = [(set GPR64Opnd:$rt, (OpNode addr:$addr))]; - InstrItinClass Itinerary = itin; - Format Form = FrmI; - bit mayLoad = 1; - bit canFoldAsLoad = 1; - string BaseOpcode = instr_asm; -} - -class LD_MM64R6_DESC : Load_MM64R6<"ld", mem_simm16, II_LD, load> { - string DecoderMethod = "DecodeMemMMImm16"; -} -class LWU_MM64R6_DESC : Load_MM64R6<"lwu", mem_simm12, II_LWU, zextloadi32>{ - string DecoderMethod = "DecodeMemMMImm12"; -} - -class LLD_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins mem_simm12:$addr); - string AsmString = "lld\t$rt, $addr"; - list Pattern = []; - bit mayLoad = 1; - InstrItinClass Itinerary = II_LLD; - string BaseOpcode = "lld"; - string DecoderMethod = "DecodeMemMMImm12"; -} - -class SD_MM64R6_DESC { - dag OutOperandList = (outs); - dag InOperandList = (ins GPR64Opnd:$rt, mem_simm16:$addr); - string AsmString = "sd\t$rt, $addr"; - list Pattern = [(store GPR64Opnd:$rt, addr:$addr)]; - InstrItinClass Itinerary = II_SD; - Format Form = FrmI; - bit mayStore = 1; - string BaseOpcode = "sd"; - string DecoderMethod = "DecodeMemMMImm16"; -} - -class DBITSWAP_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rd); - dag InOperandList = (ins GPR64Opnd:$rt); - string AsmString = !strconcat("dbitswap", "\t$rd, $rt"); - list Pattern = []; - InstrItinClass Itinerary = II_DBITSWAP; -} - -class DLSA_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rd); - dag InOperandList = (ins GPR64Opnd:$rt, GPR64Opnd:$rs, uimm2_plus1:$sa); - string AsmString = "dlsa\t$rt, $rs, $rd, $sa"; - list Pattern = []; - InstrItinClass Itinerary = II_DLSA; -} - -class LWUPC_MM64R6_DESC { - dag OutOperandList = (outs GPR64Opnd:$rt); - dag InOperandList = (ins simm19_lsl2:$offset); - string AsmString = "lwupc\t$rt, $offset"; - list Pattern = []; - InstrItinClass Itinerary = II_LWUPC; - bit mayLoad = 1; - bit IsPCRelativeLoad = 1; -} - -//===----------------------------------------------------------------------===// -// -// Instruction Definitions -// -//===----------------------------------------------------------------------===// - -let DecoderNamespace = "MicroMipsR6" in { - def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6; - let DecoderMethod = "DecodeDAHIDATIMMR6" in { - def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6; - def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6; - } - let DecoderMethod = "DecodeDEXT" in { - def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC, - ISA_MICROMIPS64R6; - def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC, - ISA_MICROMIPS64R6; - def DEXTU_MM64R6 : StdMMR6Rel, DEXTU_MMR6_DESC, DEXTU_MMR6_ENC, - ISA_MICROMIPS64R6; - } - def DALIGN_MM64R6 : StdMMR6Rel, DALIGN_MMR6_DESC, DALIGN_MMR6_ENC, - ISA_MICROMIPS64R6; - def DDIV_MM64R6 : R6MMR6Rel, DDIV_MM64R6_DESC, DDIV_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMOD_MM64R6 : R6MMR6Rel, DMOD_MM64R6_DESC, DMOD_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DDIVU_MM64R6 : R6MMR6Rel, DDIVU_MM64R6_DESC, DDIVU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMODU_MM64R6 : R6MMR6Rel, DMODU_MM64R6_DESC, DMODU_MM64R6_ENC, - ISA_MICROMIPS64R6; - let DecoderMethod = "DecodeDINS" in { - def DINSU_MM64R6: R6MMR6Rel, DINSU_MM64R6_DESC, DINSU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DINSM_MM64R6: R6MMR6Rel, DINSM_MM64R6_DESC, DINSM_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DINS_MM64R6: R6MMR6Rel, DINS_MM64R6_DESC, DINS_MM64R6_ENC, - ISA_MICROMIPS64R6; - } - def DMTC0_MM64R6 : StdMMR6Rel, DMTC0_MM64R6_ENC, DMTC0_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DMTC1_MM64R6 : StdMMR6Rel, DMTC1_MM64R6_DESC, DMTC1_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMTC2_MM64R6 : StdMMR6Rel, DMTC2_MM64R6_ENC, DMTC2_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DMFC0_MM64R6 : StdMMR6Rel, DMFC0_MM64R6_ENC, DMFC0_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DMFC1_MM64R6 : StdMMR6Rel, DMFC1_MM64R6_DESC, DMFC1_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMFC2_MM64R6 : StdMMR6Rel, DMFC2_MM64R6_ENC, DMFC2_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DADD_MM64R6: StdMMR6Rel, DADD_MM64R6_DESC, DADD_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DADDIU_MM64R6: StdMMR6Rel, DADDIU_MM64R6_DESC, DADDIU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DADDU_MM64R6: StdMMR6Rel, DADDU_MM64R6_DESC, DADDU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def LDPC_MM64R6 : R6MMR6Rel, LDPC_MMR646_ENC, LDPC_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSUB_MM64R6 : StdMMR6Rel, DSUB_MM64R6_DESC, DSUB_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DSUBU_MM64R6 : StdMMR6Rel, DSUBU_MM64R6_DESC, DSUBU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMUL_MM64R6 : R6MMR6Rel, DMUL_MM64R6_DESC, DMUL_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMUH_MM64R6 : R6MMR6Rel, DMUH_MM64R6_DESC, DMUH_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMULU_MM64R6 : R6MMR6Rel, DMULU_MM64R6_DESC, DMULU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DMUHU_MM64R6 : R6MMR6Rel, DMUHU_MM64R6_DESC, DMUHU_MM64R6_ENC, - ISA_MICROMIPS64R6; - def DSBH_MM64R6 : R6MMR6Rel, DSBH_MM64R6_ENC, DSBH_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSHD_MM64R6 : R6MMR6Rel, DSHD_MM64R6_ENC, DSHD_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSLL_MM64R6 : StdMMR6Rel, DSLL_MM64R6_ENC, DSLL_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSLL32_MM64R6 : StdMMR6Rel, DSLL32_MM64R6_ENC, DSLL32_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSLLV_MM64R6 : StdMMR6Rel, DSLLV_MM64R6_ENC, DSLLV_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRAV_MM64R6 : StdMMR6Rel, DSRAV_MM64R6_ENC, DSRAV_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRA_MM64R6 : StdMMR6Rel, DSRA_MM64R6_ENC, DSRA_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRA32_MM64R6 : StdMMR6Rel, DSRA32_MM64R6_ENC, DSRA32_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DCLO_MM64R6 : StdMMR6Rel, R6MMR6Rel, DCLO_MM64R6_ENC, DCLO_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DCLZ_MM64R6 : StdMMR6Rel, R6MMR6Rel, DCLZ_MM64R6_ENC, DCLZ_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DROTR_MM64R6 : StdMMR6Rel, DROTR_MM64R6_ENC, DROTR_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DROTR32_MM64R6 : StdMMR6Rel, DROTR32_MM64R6_ENC, DROTR32_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DROTRV_MM64R6 : StdMMR6Rel, DROTRV_MM64R6_ENC, DROTRV_MM64R6_DESC, - ISA_MICROMIPS64R6; - def LD_MM64R6 : StdMMR6Rel, LD_MM64R6_ENC, LD_MM64R6_DESC, - ISA_MICROMIPS64R6; - def LLD_MM64R6 : StdMMR6Rel, R6MMR6Rel, LLD_MM64R6_ENC, LLD_MM64R6_DESC, - ISA_MICROMIPS64R6; - def LWU_MM64R6 : StdMMR6Rel, LWU_MM64R6_ENC, LWU_MM64R6_DESC, - ISA_MICROMIPS64R6; - def SD_MM64R6 : StdMMR6Rel, SD_MM64R6_ENC, SD_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRL_MM64R6 : StdMMR6Rel, DSRL_MM64R6_ENC, DSRL_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRL32_MM64R6 : StdMMR6Rel, DSRL32_MM64R6_ENC, DSRL32_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DSRLV_MM64R6 : StdMMR6Rel, DSRLV_MM64R6_ENC, DSRLV_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DBITSWAP_MM64R6 : R6MMR6Rel, DBITSWAP_MM64R6_ENC, DBITSWAP_MM64R6_DESC, - ISA_MICROMIPS64R6; - def DLSA_MM64R6 : R6MMR6Rel, DLSA_MM64R6_ENC, DLSA_MM64R6_DESC, - ISA_MICROMIPS64R6; - def LWUPC_MM64R6 : R6MMR6Rel, LWUPC_MM64R6_ENC, LWUPC_MM64R6_DESC, - ISA_MICROMIPS64R6; -} - -let AdditionalPredicates = [InMicroMips] in -defm : MaterializeImms; - -//===----------------------------------------------------------------------===// -// -// Arbitrary patterns that map to one or more instructions -// -//===----------------------------------------------------------------------===// - -defm : MipsHiLoRelocs, SYM_32, - ISA_MICROMIPS64R6; - -defm : MipsHighestHigherHiLoRelocs, SYM_64, - ISA_MICROMIPS64R6; - -def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs), - (DADDU_MM64R6 GPR64:$lhs, GPR64:$rhs)>, ISA_MICROMIPS64R6; -def : MipsPat<(addc GPR64:$lhs, immSExt16:$imm), - (DADDIU_MM64R6 GPR64:$lhs, imm:$imm)>, ISA_MICROMIPS64R6; - - -def : MipsPat<(rotr GPR64:$rt, (i32 (trunc GPR64:$rs))), - (DROTRV_MM64R6 GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>, - ISA_MICROMIPS64R6; - - -def : WrapperPat, ISA_MICROMIPS64R6; -def : WrapperPat, ISA_MICROMIPS64R6; -def : WrapperPat, ISA_MICROMIPS64R6; -def : WrapperPat, ISA_MICROMIPS64R6; -def : WrapperPat, ISA_MICROMIPS64R6; -def : WrapperPat, ISA_MICROMIPS64R6; - -// Carry pattern -def : MipsPat<(subc GPR64:$lhs, GPR64:$rhs), - (DSUBU_MM64R6 GPR64:$lhs, GPR64:$rhs)>, ISA_MICROMIPS64R6; - -def : MipsPat<(atomic_load_64 addr:$a), (LD_MM64R6 addr:$a)>, ISA_MICROMIPS64R6; - -//===----------------------------------------------------------------------===// -// -// Instruction aliases -// -//===----------------------------------------------------------------------===// - -def : MipsInstAlias<"dmtc0 $rt, $rd", - (DMTC0_MM64R6 COP0Opnd:$rd, GPR64Opnd:$rt, 0), 0>; -def : MipsInstAlias<"dmfc0 $rt, $rd", - (DMFC0_MM64R6 GPR64Opnd:$rt, COP0Opnd:$rd, 0), 0>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"daddu $rs, $rt, $imm", - (DADDIU_MM64R6 GPR64Opnd:$rs, - GPR64Opnd:$rt, - simm16_64:$imm), - 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"daddu $rs, $imm", - (DADDIU_MM64R6 GPR64Opnd:$rs, - GPR64Opnd:$rs, - simm16_64:$imm), - 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsubu $rt, $rs, $imm", - (DADDIU_MM64R6 GPR64Opnd:$rt, - GPR64Opnd:$rs, - InvertedImOperand64:$imm), - 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsubu $rs, $imm", - (DADDIU_MM64R6 GPR64Opnd:$rs, - GPR64Opnd:$rs, - InvertedImOperand64:$imm), - 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dneg $rt, $rs", - (DSUB_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dneg $rt", - (DSUB_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dnegu $rt, $rs", - (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dnegu $rt", - (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsll $rd, $rt, $rs", - (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, - GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsrl $rd, $rt, $rs", - (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, - GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsrl $rd, $rt", - (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, - GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dsll $rd, $rt", - (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, - GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dins $rt, $rs, $pos, $size", - (DINSM_MM64R6 GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5:$pos, - uimm_range_2_64:$size), 0>, ISA_MICROMIPS64R6; -def : MipsInstAlias<"dins $rt, $rs, $pos, $size", - (DINSU_MM64R6 GPR64Opnd:$rt, GPR64Opnd:$rs, - uimm5_plus32:$pos, uimm5_plus1:$size), 0>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dext $rt, $rs, $pos, $size", - (DEXTM_MM64R6 GPR64Opnd:$rt, GPR64Opnd:$rs, uimm5:$pos, - uimm5_plus33:$size), 0>, - ISA_MICROMIPS64R6; -def : MipsInstAlias<"dext $rt, $rs, $pos, $size", - (DEXTU_MM64R6 GPR64Opnd:$rt, GPR64Opnd:$rs, - uimm5_plus32:$pos, uimm5_plus1:$size), 0>, - ISA_MICROMIPS64R6; - diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 19af30d4fbbf..64fe55e9776b 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -587,24 +587,24 @@ class UncondBranchMM16 : } def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, - ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6; + ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6; def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>, - LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6_64R6; + LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6; def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, LOGIC_FM_MM16<0x3>, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>, - SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6; + SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6; def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>, - SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6; + SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6; def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, - ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6; + ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6; def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>, - LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6_64R6; + LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6; def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU, mem_mm_4>, LOAD_STORE_FM_MM16<0x02>; def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU, @@ -632,7 +632,7 @@ def MFHI16_MM : MoveFromHILOMM<"mfhi", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x10>; def MFLO16_MM : MoveFromHILOMM<"mflo", GPR32Opnd, AC0>, MFHILO_FM_MM16<0x12>; def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>; def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; def LI16_MM : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, LI_FM_MM16, IsAsCheapAsAMove; def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>, @@ -647,9 +647,9 @@ def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>, BEQNEZ_FM_MM16<0x2b>; def B16_MM : UncondBranchMM16<"b16">, B16_FM; def BREAK16_MM : BrkSdbbp16MM<"break16", II_BREAK>, BRKSDBBP16_FM_MM<0x28>, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, BRKSDBBP16_FM_MM<0x2C>, - ISA_MICROMIPS_NOT_32R6_64R6; + ISA_MICROMIPS_NOT_32R6; let DecoderNamespace = "MicroMips" in { /// Load and Store Instructions - multiple diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp index cf2bf0be556c..f2e014084e46 100644 --- a/lib/Target/Mips/MicroMipsSizeReduction.cpp +++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -495,8 +495,7 @@ bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) { Subtarget = &static_cast(MF.getSubtarget()); - // TODO: Add support for other subtargets: - // microMIPS32r6 and microMIPS64r6 + // TODO: Add support for the subtarget microMIPS32R6. if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2() || Subtarget->hasMips32r6()) return false; diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index 28b911e69be2..62f045e77fdb 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -822,9 +822,7 @@ let AdditionalPredicates = [NotInMicroMips] in { def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6, HARDFLOAT; def BC2EQZ : BC2EQZ_ENC, BC2EQZ_DESC, ISA_MIPS32R6; def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6; -} -def BC : R6MMR6Rel, BC_ENC, BC_DESC, ISA_MIPS32R6; -let AdditionalPredicates = [NotInMicroMips] in { + def BC : R6MMR6Rel, BC_ENC, BC_DESC, ISA_MIPS32R6; def BEQC : R6MMR6Rel, BEQC_ENC, BEQC_DESC, ISA_MIPS32R6; def BEQZALC : R6MMR6Rel, BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6; def BEQZC : R6MMR6Rel, BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index dbd47de4dad1..e008aeafaa2b 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -99,8 +99,8 @@ let DecoderNamespace = "Mips64" in { def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd, II_DADDI>, ADDI_FM<0x18>, ISA_MIPS3_NOT_32R6_64R6; let AdditionalPredicates = [NotInMicroMips] in { - def DADDiu : StdMMR6Rel, ArithLogicI<"daddiu", simm16_64, GPR64Opnd, - II_DADDIU, immSExt16, add>, + def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU, + immSExt16, add>, ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3; } @@ -120,13 +120,13 @@ def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) let AdditionalPredicates = [NotInMicroMips] in { - def DADD : StdMMR6Rel, ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, - ADD_FM<0, 0x2c>, ISA_MIPS3; - def DADDu : StdMMR6Rel, ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, - ADD_FM<0, 0x2d>, ISA_MIPS3; - def DSUBu : StdMMR6Rel, ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, ADD_FM<0, 0x2f>, + def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>, ISA_MIPS3; - def DSUB : StdMMR6Rel, ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB>, ADD_FM<0, 0x2e>, + def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, + ADD_FM<0, 0x2d>, ISA_MIPS3; + def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, + ADD_FM<0, 0x2f>, ISA_MIPS3; + def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB>, ADD_FM<0, 0x2e>, ISA_MIPS3; } @@ -141,40 +141,35 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>; /// Shift Instructions let AdditionalPredicates = [NotInMicroMips] in { - def DSLL : StdMMR6Rel, shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, - shl, immZExt6>, + def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, + immZExt6>, SRA_FM<0x38, 0>, ISA_MIPS3; - def DSRL : StdMMR6Rel, shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, - srl, immZExt6>, + def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, + immZExt6>, SRA_FM<0x3a, 0>, ISA_MIPS3; - def DSRA : StdMMR6Rel, shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, - sra, immZExt6>, + def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, + immZExt6>, SRA_FM<0x3b, 0>, ISA_MIPS3; - def DSLLV : StdMMR6Rel, shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, + def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, SRLV_FM<0x14, 0>, ISA_MIPS3; - def DSRAV : StdMMR6Rel, shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, + def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, SRLV_FM<0x17, 0>, ISA_MIPS3; - def DSRLV : StdMMR6Rel, shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, + def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, SRLV_FM<0x16, 0>, ISA_MIPS3; - def DSLL32 : StdMMR6Rel, shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, - II_DSLL32>, + def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>, SRA_FM<0x3c, 0>, ISA_MIPS3; - def DSRL32 : StdMMR6Rel, shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd, - II_DSRL32>, + def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd, II_DSRL32>, SRA_FM<0x3e, 0>, ISA_MIPS3; - def DSRA32 : StdMMR6Rel, shift_rotate_imm<"dsra32", uimm5, GPR64Opnd, - II_DSRA32>, + def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd, II_DSRA32>, SRA_FM<0x3f, 0>, ISA_MIPS3; // Rotate Instructions - def DROTR : StdMMR6Rel, shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, - rotr, immZExt6>, + def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr, + immZExt6>, SRA_FM<0x3a, 1>, ISA_MIPS64R2; - def DROTRV : StdMMR6Rel, shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, - rotr>, + def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>, SRLV_FM<0x16, 1>, ISA_MIPS64R2; - def DROTR32 : StdMMR6Rel, shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, - II_DROTR32>, + def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>, SRA_FM<0x3e, 1>, ISA_MIPS64R2; } @@ -192,11 +187,11 @@ def SW64 : Store<"sw", GPR64Opnd, truncstorei32, II_SW>, LW_FM<0x2b>; } let AdditionalPredicates = [NotInMicroMips] in { - def LWu : StdMMR6Rel, MMRel, Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, + def LWu : MMRel, Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>, ISA_MIPS3; - def LD : StdMMR6Rel, LoadMemory<"ld", GPR64Opnd, mem_simm16, load, II_LD>, + def LD : LoadMemory<"ld", GPR64Opnd, mem_simm16, load, II_LD>, LW_FM<0x37>, ISA_MIPS3; - def SD : StdMMR6Rel, StoreMemory<"sd", GPR64Opnd, mem_simm16, store, II_SD>, + def SD : StoreMemory<"sd", GPR64Opnd, mem_simm16, store, II_SD>, LW_FM<0x3f>, ISA_MIPS3; } @@ -221,7 +216,7 @@ def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>, /// Load-linked, Store-conditional let AdditionalPredicates = [NotInMicroMips] in { - def LLD : StdMMR6Rel, LLBase<"lld", GPR64Opnd, mem_simm16>, LW_FM<0x34>, + def LLD : LLBase<"lld", GPR64Opnd, mem_simm16>, LW_FM<0x34>, ISA_MIPS3_NOT_32R6_64R6; } def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6; @@ -299,10 +294,10 @@ def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>, /// Count Leading let AdditionalPredicates = [NotInMicroMips] in { - def DCLZ : StdMMR6Rel, CountLeading0<"dclz", GPR64Opnd, II_DCLZ>, - CLO_FM<0x24>, ISA_MIPS64_NOT_64R6; - def DCLO : StdMMR6Rel, CountLeading1<"dclo", GPR64Opnd, II_DCLO>, - CLO_FM<0x25>, ISA_MIPS64_NOT_64R6; + def DCLZ : CountLeading0<"dclz", GPR64Opnd, II_DCLZ>, CLO_FM<0x24>, + ISA_MIPS64_NOT_64R6; + def DCLO : CountLeading1<"dclo", GPR64Opnd, II_DCLO>, CLO_FM<0x25>, + ISA_MIPS64_NOT_64R6; /// Double Word Swap Bytes/HalfWords def DSBH : SubwordSwap<"dsbh", GPR64Opnd, II_DSBH>, SEB_FM<2, 0x24>, @@ -568,74 +563,70 @@ defm : MipsHiLoRelocs, SYM_32; def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; def : MipsPat<(MipsGotHi texternalsym:$in), (LUi64 texternalsym:$in)>; -multiclass MipsHighestHigherHiLoRelocs { +// highest/higher/hi/lo relocs +let AdditionalPredicates = [NotInMicroMips] in { def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)), - (JAL texternalsym:$dst)>; + (JAL texternalsym:$dst)>, SYM_64; def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), - (Lui tglobaladdr:$in)>; + (LUi64 tglobaladdr:$in)>, SYM_64; def : MipsPat<(MipsHighest (i64 tblockaddress:$in)), - (Lui tblockaddress:$in)>; + (LUi64 tblockaddress:$in)>, SYM_64; def : MipsPat<(MipsHighest (i64 tjumptable:$in)), - (Lui tjumptable:$in)>; + (LUi64 tjumptable:$in)>, SYM_64; def : MipsPat<(MipsHighest (i64 tconstpool:$in)), - (Lui tconstpool:$in)>; + (LUi64 tconstpool:$in)>, SYM_64; def : MipsPat<(MipsHighest (i64 tglobaltlsaddr:$in)), - (Lui tglobaltlsaddr:$in)>; + (LUi64 tglobaltlsaddr:$in)>, SYM_64; def : MipsPat<(MipsHighest (i64 texternalsym:$in)), - (Lui texternalsym:$in)>; + (LUi64 texternalsym:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), - (Daddiu ZERO_64, tglobaladdr:$in)>; + (DADDiu ZERO_64, tglobaladdr:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 tblockaddress:$in)), - (Daddiu ZERO_64, tblockaddress:$in)>; + (DADDiu ZERO_64, tblockaddress:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 tjumptable:$in)), - (Daddiu ZERO_64, tjumptable:$in)>; + (DADDiu ZERO_64, tjumptable:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 tconstpool:$in)), - (Daddiu ZERO_64, tconstpool:$in)>; + (DADDiu ZERO_64, tconstpool:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 tglobaltlsaddr:$in)), - (Daddiu ZERO_64, tglobaltlsaddr:$in)>; + (DADDiu ZERO_64, tglobaltlsaddr:$in)>, SYM_64; def : MipsPat<(MipsHigher (i64 texternalsym:$in)), - (Daddiu ZERO_64, texternalsym:$in)>; + (DADDiu ZERO_64, texternalsym:$in)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + (DADDiu GPR64:$hi, tglobaladdr:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tblockaddress:$lo))), - (Daddiu GPR64:$hi, tblockaddress:$lo)>; + (DADDiu GPR64:$hi, tblockaddress:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tjumptable:$lo))), - (Daddiu GPR64:$hi, tjumptable:$lo)>; + (DADDiu GPR64:$hi, tjumptable:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))), - (Daddiu GPR64:$hi, tconstpool:$lo)>; + (DADDiu GPR64:$hi, tconstpool:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaltlsaddr:$lo))), - (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>; + (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + (DADDiu GPR64:$hi, tglobaladdr:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tblockaddress:$lo))), - (Daddiu GPR64:$hi, tblockaddress:$lo)>; + (DADDiu GPR64:$hi, tblockaddress:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tjumptable:$lo))), - (Daddiu GPR64:$hi, tjumptable:$lo)>; + (DADDiu GPR64:$hi, tjumptable:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))), - (Daddiu GPR64:$hi, tconstpool:$lo)>; + (DADDiu GPR64:$hi, tconstpool:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaltlsaddr:$lo))), - (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>; + (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + (DADDiu GPR64:$hi, tglobaladdr:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tblockaddress:$lo))), - (Daddiu GPR64:$hi, tblockaddress:$lo)>; + (DADDiu GPR64:$hi, tblockaddress:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tjumptable:$lo))), - (Daddiu GPR64:$hi, tjumptable:$lo)>; + (DADDiu GPR64:$hi, tjumptable:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tconstpool:$lo))), - (Daddiu GPR64:$hi, tconstpool:$lo)>; + (DADDiu GPR64:$hi, tconstpool:$lo)>, SYM_64; def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))), - (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>; - + (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>, SYM_64; } -// highest/higher/hi/lo relocs -let AdditionalPredicates = [NotInMicroMips] in -defm : MipsHighestHigherHiLoRelocs, SYM_64; - // gp_rel relocs def : MipsPat<(add GPR64:$gp, (MipsGPRel tglobaladdr:$in)), (DADDiu GPR64:$gp, tglobaladdr:$in)>, ABI_N64; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index dabf4e0a52e2..1cd43ee6f1c3 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -117,21 +117,21 @@ let AdditionalPredicates = [NotInMicroMips] in { } def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; - def DBITSWAP : R6MMR6Rel, DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; - def DCLO_R6 : R6MMR6Rel, DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; - def DCLZ_R6 : R6MMR6Rel, DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; + def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; + def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; + def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6; def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6; def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6; def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6; - def DLSA_R6 : R6MMR6Rel, DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6; + def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6; def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6; def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6; def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6; def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6; - def LLD_R6 : R6MMR6Rel, LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS64R6; + def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS64R6; } -def LDPC: R6MMR6Rel, LDPC_ENC, LDPC_DESC, ISA_MIPS64R6; +def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6; def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6; let DecoderNamespace = "Mips32r6_64r6_GP64" in { def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index fbf7b5e28b7c..f9de78dc281f 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -381,7 +381,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { MCInstLowering.Initialize(&MF->getContext()); - bool IsNakedFunction = MF->getFunction()->hasFnAttribute(Attribute::Naked); + bool IsNakedFunction = MF->getFunction().hasFnAttribute(Attribute::Naked); if (!IsNakedFunction) emitFrameDirective(); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index c53d80e4b0f6..999b6f896bae 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -54,9 +54,6 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - // Helper function that emits the XRay sleds we've collected for a particular - // function. - void EmitXRayTable(); private: /// MCP - Keep a pointer to constantpool entries of the current diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index 6a03ee9927d7..81a1cced93b7 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -101,9 +101,9 @@ void MipsCCState::PreAnalyzeReturnForF128( const MachineFunction &MF = getMachineFunction(); for (unsigned i = 0; i < Outs.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(MF.getFunction()->getReturnType(), nullptr)); + originalTypeIsF128(MF.getFunction().getReturnType(), nullptr)); OriginalArgWasFloat.push_back( - MF.getFunction()->getReturnType()->isFloatingPointTy()); + MF.getFunction().getReturnType()->isFloatingPointTy()); } } @@ -149,7 +149,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl &Ins) { const MachineFunction &MF = getMachineFunction(); for (unsigned i = 0; i < Ins.size(); ++i) { - Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); // SRet arguments cannot originate from f128 or {f128} returns so we just // push false. We have to handle this specially since SRet arguments @@ -161,7 +161,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( continue; } - assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size()); + assert(Ins[i].getOrigArgIndex() < MF.getFunction().arg_size()); std::advance(FuncArg, Ins[i].getOrigArgIndex()); OriginalArgWasF128.push_back( diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index 257e8f45a70e..a9abc171b423 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -430,7 +430,7 @@ bool MipsConstantIslands::isOffsetInRange LLVM_DUMP_METHOD void MipsConstantIslands::dumpBBs() { for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << format(" size=%#x\n", BBInfo[J].Size); } } @@ -991,11 +991,11 @@ bool MipsConstantIslands::isCPEntryInRange const BasicBlockInfo &BBI = BBInfo[Block]; dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() << " max delta=" << MaxDisp - << format(" insn address=%#x", UserOffset) - << " in BB#" << Block << ": " + << format(" insn address=%#x", UserOffset) << " in " + << printMBBReference(*MI->getParent()) << ": " << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI << format("CPE address=%#x offset=%+d: ", CPEOffset, - int(CPEOffset-UserOffset)); + int(CPEOffset - UserOffset)); }); } @@ -1197,7 +1197,7 @@ bool MipsConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // This is the least amount of required padding seen so far. BestGrowth = Growth; WaterIter = IP; - DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB) << " Growth=" << Growth << '\n'); // Keep looking unless it is perfect. @@ -1236,8 +1236,8 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { - DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() - << format(", expected CPE offset %#x\n", CPEOffset)); + DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) + << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, @@ -1470,11 +1470,11 @@ bool MipsConstantIslands::isBBInRange unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxDisp - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB) + << " from " << printMBBReference(*MI->getParent()) + << " max delta=" << MaxDisp << " from " << getOffsetOf(MI) + << " to " << DestOffset << " offset " + << int(DestOffset - BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { // Branch before the Dest. @@ -1615,9 +1615,9 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) { } MachineBasicBlock *NextBB = &*++MBB->getIterator(); - DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); + DEBUG(dbgs() << " Insert B to " << printMBBReference(*DestBB) + << " also invert condition and change dest. to " + << printMBBReference(*NextBB) << "\n"); // Insert a new conditional branch and a new unconditional branch. // Also update the ImmBranch as well as adding a new entry for the new branch. @@ -1661,7 +1661,7 @@ void MipsConstantIslands::prescanForConstants() { int64_t V = Literal.getImm(); DEBUG(dbgs() << "literal " << V << "\n"); Type *Int32Ty = - Type::getInt32Ty(MF->getFunction()->getContext()); + Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, V); unsigned index = MCP->getConstantPoolIndex(C, 4); I->getOperand(2).ChangeToImmediate(index); diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 2595333188a4..871135e3a22b 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1325,6 +1325,10 @@ def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; +def : BitconvertPat; def : DSPPat<(v2i16 (load addr:$a)), (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e06b57e41834..b12c7e7760ab 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -632,7 +632,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { // TODO: Implement an instruction mapping table of 16bit opcodes to // 32bit opcodes so that an instruction can be expanded. This would // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. - // TODO: Permit b16 when branching backwards to the the same function + // TODO: Permit b16 when branching backwards to the same function // if it is in range. DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode()))); } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 8bbac3ed7cfb..65dfbdc45648 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -1628,7 +1628,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (!MTI->getLength()->getType()->isIntegerTy(32)) return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -1637,7 +1637,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { return false; if (!MSI->getLength()->getType()->isIntegerTy(32)) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } } return false; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index a0fa240901bf..79ca9cc6b800 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1395,11 +1395,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case Mips::DMOD: case Mips::DMODU: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true, false); - case Mips::DDIV_MM64R6: - case Mips::DDIVU_MM64R6: - case Mips::DMOD_MM64R6: - case Mips::DMODU_MM64R6: - return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true, true); case Mips::PseudoSELECT_I: case Mips::PseudoSELECT_I64: @@ -2812,8 +2807,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, llvm_unreachable("Cannot handle this ValVT."); if (!Reg) { - unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() >> 3, - OrigAlign); + unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -3365,10 +3359,10 @@ SDValue MipsTargetLowering::LowerFormalArguments( MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - const Function *Func = DAG.getMachineFunction().getFunction(); - Function::const_arg_iterator FuncArg = Func->arg_begin(); + const Function &Func = DAG.getMachineFunction().getFunction(); + Function::const_arg_iterator FuncArg = Func.arg_begin(); - if (Func->hasFnAttribute("interrupt") && !Func->arg_empty()) + if (Func.hasFnAttribute("interrupt") && !Func.arg_empty()) report_fatal_error( "Functions with the interrupt attribute cannot have arguments!"); @@ -3606,7 +3600,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // the sret argument into $v0 for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out // and into $v0. - if (MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction().hasStructRetAttr()) { MipsFunctionInfo *MipsFI = MF.getInfo(); unsigned Reg = MipsFI->getSRetReturnReg(); @@ -3628,7 +3622,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(Flag); // ISRs must use "eret". - if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt")) + if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt")) return LowerInterruptReturn(RetOps, DL, DAG); // Standard return on Mips is a "jr $ra" @@ -3869,13 +3863,17 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'c': // register suitable for indirect jump if (VT == MVT::i32) return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass); - assert(VT == MVT::i64 && "Unexpected type."); - return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass); - case 'l': // register suitable for indirect jump + if (VT == MVT::i64) + return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass); + // This will generate an error message + return std::make_pair(0U, nullptr); + case 'l': // use the `lo` register to store values + // that are no bigger than a word if (VT == MVT::i32) return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); - case 'x': // register suitable for indirect jump + case 'x': // use the concatenated `hi` and `lo` registers + // to store doubleword values // Fixme: Not triggering the use of both hi and low // This will generate an error message return std::make_pair(0U, nullptr); diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 878497ca76fc..51ddc0d44c00 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -480,7 +480,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc, MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc)); // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an - // immediate 0 as an operand and requires the removal of it's %RA + // immediate 0 as an operand and requires the removal of it's implicit-def %ra // implicit operand as copying the implicit operations of the instructio we're // looking at will give us the correct flags. if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 || @@ -538,15 +538,19 @@ bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, } // ins, ext, dext*, dins have the following constraints: -// 0 <= pos < X -// 0 < size <= X -// 0 < pos+size <= x +// X <= pos < Y +// X < size <= Y +// X < pos+size <= Y // -// dinsm and dinsm have the following contraints: -// 0 <= pos < X -// 0 <= size <= X -// 0 < pos+size <= x - +// dinsm and dinsu have the following constraints: +// X <= pos < Y +// X <= size <= Y +// X < pos+size <= Y +// +// The callee of verifyInsExtInstruction however gives the bounds of +// dins[um] like the other (d)ins (d)ext(um) instructions, so that this +// function doesn't have to vary it's behaviour based on the instruction +// being checked. static bool verifyInsExtInstruction(const MachineInstr &MI, StringRef &ErrInfo, const int64_t PosLow, const int64_t PosHigh, const int64_t SizeLow, @@ -593,28 +597,25 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI, case Mips::INS: case Mips::INS_MM: case Mips::DINS: - case Mips::DINS_MM64R6: return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 0, 32, 0, 32); case Mips::DINSM: - case Mips::DINSM_MM64R6: - // The ISA spec has a subtle difference here in that it says: - // 2 <= size <= 64 for 'dinsm', so we change the bounds so that it - // is in line with the rest of instructions. + // The ISA spec has a subtle difference difference between dinsm and dextm + // in that it says: + // 2 <= size <= 64 for 'dinsm' but 'dextm' has 32 < size <= 64. + // To make the bounds checks similar, the range 1 < size <= 64 is checked + // for 'dinsm'. return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 1, 64, 32, 64); case Mips::DINSU: - case Mips::DINSU_MM64R6: - // The ISA spec has a subtle difference here in that it says: - // 2 <= size <= 64 for 'dinsm', so we change the bounds so that it - // is in line with the rest of instructions. - return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 1, 32, 32, 64); + // The ISA spec has a subtle difference between dinsu and dextu in that + // the size range of dinsu is specified as 1 <= size <= 32 whereas size + // for dextu is 0 < size <= 32. The range checked for dinsu here is + // 0 < size <= 32, which is equivalent and similar to dextu. + return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64); case Mips::DEXT: - case Mips::DEXT_MM64R6: return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 0, 32, 0, 63); case Mips::DEXTM: - case Mips::DEXTM_MM64R6: return verifyInsExtInstruction(MI, ErrInfo, 0, 32, 32, 64, 32, 64); case Mips::DEXTU: - case Mips::DEXTU_MM64R6: return verifyInsExtInstruction(MI, ErrInfo, 32, 64, 0, 32, 32, 64); default: return true; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e16059d2adcd..e0d818b749df 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -208,8 +208,6 @@ def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">, AssemblerPredicate<"!FeatureMips64r6">; def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">, AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">; -def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">, - AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">; def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; def NotInMips16Mode : Predicate<"!Subtarget->inMips16Mode()">, @@ -313,9 +311,6 @@ class ISA_MICROMIPS { list InsnPredicates = [InMicroMips]; } class ISA_MICROMIPS32R6 { list InsnPredicates = [HasMicroMips32r6]; } -class ISA_MICROMIPS64R6 { - list InsnPredicates = [HasMicroMips64r6]; -} class ISA_MICROMIPS32_NOT_MIPS32R6 { list InsnPredicates = [InMicroMips, NotMips32r6]; } @@ -393,8 +388,8 @@ class ASE_MT { // Class used for separating microMIPSr6 and microMIPS (r3) instruction. // It can be used only on instructions that doesn't inherit PredicateControl. -class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl { - let InsnPredicates = [InMicroMips, NotMips32r6, NotMips64r6]; +class ISA_MICROMIPS_NOT_32R6 : PredicateControl { + let InsnPredicates = [InMicroMips, NotMips32r6]; } class ASE_NOT_DSP { @@ -3014,10 +3009,6 @@ include "MicroMipsInstrFPU.td" include "MicroMips32r6InstrFormats.td" include "MicroMips32r6InstrInfo.td" -// Micromips64 r6 -include "MicroMips64r6InstrFormats.td" -include "MicroMips64r6InstrInfo.td" - // Micromips DSP include "MicroMipsDSPInstrFormats.td" include "MicroMipsDSPInstrInfo.td" diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 9af4f4b1cd42..bbf2050ce1eb 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -279,12 +279,16 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); - // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal + // We must select between the MIPS32r6/MIPS64r6 BALC (which is a normal // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an // pseudo-instruction wrapping BGEZAL). - unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR; + const unsigned BalOp = + Subtarget.hasMips32r6() + ? Subtarget.inMicroMipsMode() ? Mips::BALC_MMR6 : Mips::BALC + : Mips::BAL_BR; if (!ABI.IsN64()) { + // Pre R6: // $longbr: // addiu $sp, $sp, -8 // sw $ra, 0($sp) @@ -299,6 +303,20 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // $fallthrough: // + // R6: + // $longbr: + // addiu $sp, $sp, -8 + // sw $ra, 0($sp) + // lui $at, %hi($tgt - $baltgt) + // addiu $at, $at, %lo($tgt - $baltgt) + // balc $baltgt + // $baltgt: + // addu $at, $ra, $at + // lw $ra, 0($sp) + // addiu $sp, $sp, 8 + // jic $at, 0 + // $fallthrough: + Pos = LongBrMBB->begin(); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) @@ -307,7 +325,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP).addImm(0); // LUi and ADDiu instructions create 32-bit offset of the target basic - // block from the target of BAL instruction. We cannot use immediate + // block from the target of BAL(C) instruction. We cannot use immediate // value for this offset because it cannot be determined accurately when // the program has inline assembly statements. We therefore use the // relocation expressions %hi($tgt-$baltgt) and %lo($tgt-$baltgt) which @@ -324,12 +342,22 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT) .addMBB(TgtMBB).addMBB(BalTgtMBB); - MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) - .addReg(Mips::AT) - .addMBB(TgtMBB) - .addMBB(BalTgtMBB)); + + MachineInstrBuilder BalInstr = + BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB); + MachineInstrBuilder ADDiuInstr = + BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) + .addReg(Mips::AT) + .addMBB(TgtMBB) + .addMBB(BalTgtMBB); + if (Subtarget.hasMips32r6()) { + LongBrMBB->insert(Pos, ADDiuInstr); + LongBrMBB->insert(Pos, BalInstr); + } else { + LongBrMBB->insert(Pos, BalInstr); + LongBrMBB->insert(Pos, ADDiuInstr); + LongBrMBB->rbegin()->bundleWithPred(); + } Pos = BalTgtMBB->begin(); @@ -337,28 +365,37 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::RA).addReg(Mips::AT); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) .addReg(Mips::SP).addImm(0); + if (Subtarget.isTargetNaCl()) + // Bundle-align the target of indirect branch JR. + TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN); // In NaCl, modifying the sp is not allowed in branch delay slot. - if (Subtarget.isTargetNaCl()) + // For MIPS32R6, we can skip using a delay slot branch. + if (Subtarget.isTargetNaCl() || Subtarget.hasMips32r6()) BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) .addReg(Mips::SP).addImm(8); - if (Subtarget.hasMips32r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR)) - .addReg(Mips::ZERO).addReg(Mips::AT); - else + if (Subtarget.hasMips32r6()) { + const unsigned JICOp = + Subtarget.inMicroMipsMode() ? Mips::JIC_MMR6 : Mips::JIC; + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JICOp)) + .addReg(Mips::AT) + .addImm(0); + + } else { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); - if (Subtarget.isTargetNaCl()) { - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); - // Bundle-align the target of indirect branch JR. - TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN); - } else - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(8); + if (Subtarget.isTargetNaCl()) { + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::NOP)); + } else + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP) + .addImm(8); - BalTgtMBB->rbegin()->bundleWithPred(); + BalTgtMBB->rbegin()->bundleWithPred(); + } } else { + // Pre R6: // $longbr: // daddiu $sp, $sp, -16 // sd $ra, 0($sp) @@ -372,7 +409,21 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // jr64 $at // daddiu $sp, $sp, 16 // $fallthrough: - // + + // R6: + // $longbr: + // daddiu $sp, $sp, -16 + // sd $ra, 0($sp) + // daddiu $at, $zero, %hi($tgt - $baltgt) + // dsll $at, $at, 16 + // daddiu $at, $at, %lo($tgt - $baltgt) + // balc $baltgt + // $baltgt: + // daddu $at, $ra, $at + // ld $ra, 0($sp) + // daddiu $sp, $sp, 16 + // jic $at, 0 + // $fallthrough: // We assume the branch is within-function, and that offset is within // +/- 2GB. High 32 bits will therefore always be zero. @@ -401,13 +452,21 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) .addReg(Mips::AT_64).addImm(16); - MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB)) - .append( - BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64) - .addReg(Mips::AT_64) - .addMBB(TgtMBB, MipsII::MO_ABS_LO) - .addMBB(BalTgtMBB)); + MachineInstrBuilder BalInstr = + BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB); + MachineInstrBuilder DADDiuInstr = + BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64) + .addReg(Mips::AT_64) + .addMBB(TgtMBB, MipsII::MO_ABS_LO) + .addMBB(BalTgtMBB); + if (Subtarget.hasMips32r6()) { + LongBrMBB->insert(Pos, DADDiuInstr); + LongBrMBB->insert(Pos, BalInstr); + } else { + LongBrMBB->insert(Pos, BalInstr); + LongBrMBB->insert(Pos, DADDiuInstr); + LongBrMBB->rbegin()->bundleWithPred(); + } Pos = BalTgtMBB->begin(); @@ -416,29 +475,40 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) .addReg(Mips::SP_64).addImm(0); - if (Subtarget.hasMips64r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64)) - .addReg(Mips::ZERO_64).addReg(Mips::AT_64); - else + if (Subtarget.hasMips64r6()) { + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64) + .addImm(16); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JIC64)) + .addReg(Mips::AT_64) + .addImm(0); + } else { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); - - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) - .addReg(Mips::SP_64).addImm(16); - BalTgtMBB->rbegin()->bundleWithPred(); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64) + .addReg(Mips::SP_64) + .addImm(16); + BalTgtMBB->rbegin()->bundleWithPred(); + } } assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize); } else { - // $longbr: - // j $tgt - // nop + // Pre R6: R6: + // $longbr: $longbr: + // j $tgt bc $tgt + // nop $fallthrough // $fallthrough: // Pos = LongBrMBB->begin(); LongBrMBB->addSuccessor(TgtMBB); - MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::NOP))); + if (Subtarget.hasMips32r6()) + BuildMI(*LongBrMBB, Pos, DL, + TII->get(Subtarget.inMicroMipsMode() ? Mips::BC_MMR6 : Mips::BC)) + .addMBB(TgtMBB); + else + MIBundleBuilder(*LongBrMBB, Pos) + .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB)) + .append(BuildMI(*MF, DL, TII->get(Mips::NOP))); assert(LongBrMBB->size() == LongBranchSeqSize); } @@ -474,8 +544,8 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { IsPIC = TM.isPositionIndependent(); ABI = static_cast(TM).getABI(); - LongBranchSeqSize = - !IsPIC ? 2 : (ABI.IsN64() ? 10 : (!STI.isTargetNaCl() ? 9 : 10)); + LongBranchSeqSize = IsPIC ? ((ABI.IsN64() || STI.isTargetNaCl()) ? 10 : 9) + : (STI.hasMips32r6() ? 1 : 2); if (STI.inMips16Mode() || !STI.enableLongBranchPass()) return false; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 48d266fcc0cb..1ee56d830090 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -41,9 +41,7 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { STI.inMips16Mode() ? &Mips::CPU16RegsRegClass : STI.inMicroMipsMode() - ? STI.hasMips64() - ? &Mips::GPRMM16_64RegClass - : &Mips::GPRMM16RegClass + ? &Mips::GPRMM16RegClass : static_cast(MF.getTarget()) .getABI() .IsN64() diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index a783facae019..0e0d82270c89 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -54,8 +54,7 @@ MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF, case MipsPtrClass::Default: return ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; case MipsPtrClass::GPR16MM: - return ABI.ArePtrs64bit() ? &Mips::GPRMM16_64RegClass - : &Mips::GPRMM16RegClass; + return &Mips::GPRMM16RegClass; case MipsPtrClass::StackPointer: return ABI.ArePtrs64bit() ? &Mips::SP64RegClass : &Mips::SP32RegClass; case MipsPtrClass::GlobalPointer: @@ -94,8 +93,8 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const MCPhysReg * MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MipsSubtarget &Subtarget = MF->getSubtarget(); - const Function *F = MF->getFunction(); - if (F->hasFnAttribute("interrupt")) { + const Function &F = MF->getFunction(); + if (F.hasFnAttribute("interrupt")) { if (Subtarget.hasMips64()) return Subtarget.hasMips64r6() ? CSR_Interrupt_64R6_SaveList : CSR_Interrupt_64_SaveList; @@ -239,7 +238,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::RA_64); Reserved.set(Mips::T0); Reserved.set(Mips::T1); - if (MF.getFunction()->hasFnAttribute("saveS2") || MipsFI->hasSaveS2()) + if (MF.getFunction().hasFnAttribute("saveS2") || MipsFI->hasSaveS2()) Reserved.set(Mips::S2); } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f64d91aad858..c85ee20273c0 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -38,7 +38,7 @@ class MipsRegWithSubRegs Enc, string n, list subregs> let Namespace = "Mips"; } -// Mips CPU Registers +// Mips CPU Registers. class MipsGPRReg Enc, string n> : MipsReg; // Mips 64-bit CPU Registers @@ -349,12 +349,6 @@ def GPR64 : RegisterClass<"Mips", [i64], 64, (add // Reserved K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)>; -def GPRMM16_64 : RegisterClass<"Mips", [i64], 64, (add - // Callee save - S0_64, S1_64, - // Return Values and Arguments - V0_64, V1_64, A0_64, A1_64, A2_64, A3_64)>; - def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Return Values and Arguments V0, V1, A0, A1, A2, A3, diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 5d4fbffa20a8..eb1eea7925c0 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -434,7 +434,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - if (MF.getFunction()->hasFnAttribute("interrupt")) + if (MF.getFunction().hasFnAttribute("interrupt")) emitInterruptPrologueStub(MF, MBB); const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -582,7 +582,7 @@ void MipsSEFrameLowering::emitInterruptPrologueStub( // Perform ISR handling like GCC StringRef IntKind = - MF.getFunction()->getFnAttribute("interrupt").getValueAsString(); + MF.getFunction().getFnAttribute("interrupt").getValueAsString(); const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass; // EIC interrupt handling needs to read the Cause register to disable @@ -726,7 +726,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, } } - if (MF.getFunction()->hasFnAttribute("interrupt")) + if (MF.getFunction().hasFnAttribute("interrupt")) emitInterruptEpilogueStub(MF, MBB); // Get the number of bytes from FrameInfo @@ -809,8 +809,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, // spilled to the stack frame. bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 || Reg == Mips::HI0 || Reg == Mips::HI0_64); - const Function *Func = MBB.getParent()->getFunction(); - if (IsLOHI && Func->hasFnAttribute("interrupt")) { + const Function &Func = MBB.getParent()->getFunction(); + if (IsLOHI && Func.hasFnAttribute("interrupt")) { DebugLoc DL = MI->getDebugLoc(); unsigned Op = 0; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 3c6a7d7a6651..97e9a83d7dfe 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -161,7 +161,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { // lui $v0, %hi(%neg(%gp_rel(fname))) // daddu $v1, $v0, $t9 // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); + const GlobalValue *FName = &MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) @@ -190,7 +190,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { // lui $v0, %hi(%neg(%gp_rel(fname))) // addu $v1, $v0, $t9 // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); + const GlobalValue *FName = &MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); @@ -288,7 +288,7 @@ void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { SDValue(Carry, 0)}; SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); - // My reading of the the MIPS DSP 3.01 specification isn't as clear as I + // My reading of the MIPS DSP 3.01 specification isn't as clear as I // would like about whether bit 20 always gets overwritten by addwc. // Hence take an extremely conservative view and presume it's sticky. We // therefore need to clear it. @@ -1247,7 +1247,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { // handled by the ldi case. if (ResNonZero) { IntegerType *Int32Ty = - IntegerType::get(MF->getFunction()->getContext(), 32); + IntegerType::get(MF->getFunction().getContext(), 32); const ConstantInt *Const32 = ConstantInt::get(Int32Ty, 32); SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val, CurDAG->getConstant(*Const32, DL, MVT::i32), diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 798d86622e5d..59b7679971cd 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -231,8 +231,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Hi, Lo are normally caller save but they are callee save // for interrupt handling. - const Function *Func = MBB.getParent()->getFunction(); - if (Func->hasFnAttribute("interrupt")) { + const Function &Func = MBB.getParent()->getFunction(); + if (Func.hasFnAttribute("interrupt")) { if (Mips::HI32RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0); SrcReg = Mips::K0; @@ -262,8 +262,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; - const Function *Func = MBB.getParent()->getFunction(); - bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") && + const Function &Func = MBB.getParent()->getFunction(); + bool ReqIndirectLoad = Func.hasFnAttribute("interrupt") && (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || DestReg == Mips::HI0 || DestReg == Mips::HI0_64); diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 2ff6b99e78ff..9b89d4077a77 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -88,10 +88,8 @@ static inline unsigned getLoadStoreOffsetSizeInBits(const unsigned Opcode, case Mips::SCE: return 16; case Mips::LLE_MM: - case Mips::LLE_MMR6: case Mips::LL_MM: case Mips::SCE_MM: - case Mips::SCE_MMR6: case Mips::SC_MM: return 12; case Mips::LL64_R6: diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td index fedfac24e4e7..440f93d5b7eb 100644 --- a/lib/Target/Mips/MipsScheduleP5600.td +++ b/lib/Target/Mips/MipsScheduleP5600.td @@ -18,8 +18,8 @@ def MipsP5600Model : SchedMachineModel { list UnsupportedFeatures = [HasMips32r6, HasMips64r6, HasMips64, HasMips64r2, HasCnMips, InMicroMips, InMips16Mode, - HasMicroMips32r6, HasMicroMips64r6, - HasDSP, HasDSPR2, HasMT]; + HasMicroMips32r6, HasDSP, + HasDSPR2, HasMT]; } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index cd462c75e7a0..cbc2ef79e4fe 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -57,6 +57,10 @@ static cl::opt GPOpt("mgpopt", cl::Hidden, cl::desc("Enable gp-relative addressing of mips small data items")); +bool MipsSubtarget::DspWarningPrinted = false; + +bool MipsSubtarget::MSAWarningPrinted = false; + void MipsSubtarget::anchor() {} MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, @@ -104,6 +108,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (IsFPXX && (isABI_N32() || isABI_N64())) report_fatal_error("FPXX is not permitted for the N32/N64 ABI's.", false); + if (hasMips64r6() && InMicroMipsMode) + report_fatal_error("microMIPS64R6 is not supported", false); + if (hasMips32r6()) { StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; @@ -126,6 +133,40 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, << "\n"; UseSmallSection = false; } + + if (hasDSPR2() && !DspWarningPrinted) { + if (hasMips64() && !hasMips64r2()) { + errs() << "warning: the 'dspr2' ASE requires MIPS64 revision 2 or " + << "greater\n"; + DspWarningPrinted = true; + } else if (hasMips32() && !hasMips32r2()) { + errs() << "warning: the 'dspr2' ASE requires MIPS32 revision 2 or " + << "greater\n"; + DspWarningPrinted = true; + } + } else if (hasDSP() && !DspWarningPrinted) { + if (hasMips64() && !hasMips64r2()) { + errs() << "warning: the 'dsp' ASE requires MIPS64 revision 2 or " + << "greater\n"; + DspWarningPrinted = true; + } else if (hasMips32() && !hasMips32r2()) { + errs() << "warning: the 'dsp' ASE requires MIPS32 revision 2 or " + << "greater\n"; + DspWarningPrinted = true; + } + } + + if (hasMSA() && !MSAWarningPrinted) { + if (hasMips64() && !hasMips64r5()) { + errs() << "warning: the 'msa' ASE requires MIPS64 revision 5 or " + << "greater\n"; + MSAWarningPrinted = true; + } else if (hasMips32() && !hasMips32r5()) { + errs() << "warning: the 'msa' ASE requires MIPS32 revision 5 or " + << "greater\n"; + MSAWarningPrinted = true; + } + } } bool MipsSubtarget::isPositionIndependent() const { diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index deea4af521c1..bdf71fce85a7 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -44,6 +44,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo { enum class CPU { P5600 }; + // Used to avoid printing dsp warnings multiple times. + static bool DspWarningPrinted; + + // Used to avoid printing msa warnings multiple times. + static bool MSAWarningPrinted; + // Mips architecture version MipsArchEnum MipsArchVersion; @@ -265,7 +271,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo { } bool inMicroMipsMode() const { return InMicroMipsMode; } bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); } - bool inMicroMips64r6Mode() const { return InMicroMipsMode && hasMips64r6(); } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } bool hasDSPR3() const { return HasDSPR3; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 9a12b98984e8..fb79a4bf40c5 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -200,7 +200,7 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { void MipsTargetMachine::resetSubtarget(MachineFunction *MF) { DEBUG(dbgs() << "resetSubtarget\n"); - Subtarget = const_cast(getSubtargetImpl(*MF->getFunction())); + Subtarget = const_cast(getSubtargetImpl(MF->getFunction())); MF->setSubtarget(Subtarget); } @@ -259,17 +259,16 @@ void MipsPassConfig::addPreRegAlloc() { addPass(createMipsOptimizePICCallPass()); } -TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - if (Subtarget->allowMixed16_32()) { - DEBUG(errs() << "No Target Transform Info Pass Added\n"); - // FIXME: This is no longer necessary as the TTI returned is per-function. - return TargetTransformInfo(F.getParent()->getDataLayout()); - } - - DEBUG(errs() << "Target Transform Info Pass Added\n"); - return TargetTransformInfo(BasicTTIImpl(this, F)); - }); +TargetTransformInfo +MipsTargetMachine::getTargetTransformInfo(const Function &F) { + if (Subtarget->allowMixed16_32()) { + DEBUG(errs() << "No Target Transform Info Pass Added\n"); + // FIXME: This is no longer necessary as the TTI returned is per-function. + return TargetTransformInfo(F.getParent()->getDataLayout()); + } + + DEBUG(errs() << "Target Transform Info Pass Added\n"); + return TargetTransformInfo(BasicTTIImpl(this, F)); } // Implemented by targets that want to run passes immediately before diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index ccfc9a938d9c..56e6e5d8daa2 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -44,7 +44,7 @@ class MipsTargetMachine : public LLVMTargetMachine { CodeGenOpt::Level OL, bool JIT, bool isLittle); ~MipsTargetMachine() override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; const MipsSubtarget *getSubtargetImpl() const { if (Subtarget) diff --git a/lib/Target/Mips/Relocation.txt b/lib/Target/Mips/Relocation.txt index f1a6fd8645f6..2f98e16886a1 100644 --- a/lib/Target/Mips/Relocation.txt +++ b/lib/Target/Mips/Relocation.txt @@ -69,40 +69,7 @@ to MIPS32 to compute addresses for the static relocation model. The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32 mode, as guarded by the predicate "SYM_32" and also for a submode of -LP64 where symbols are assumed to be 32 bits wide. A similar -multiclass for MIPS64 in LP64 mode is also defined: - - // lib/Target/Mips/Mips64InstrInfo.td - multiclass MipsHighestHigherHiLoRelocs { - ... - def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), - (Lui tglobaladdr:$in)>; - ... - def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), - (Daddiu ZERO_64, tglobaladdr:$in)>; - ... - def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; - ... - def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; - ... - def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), - (Daddiu GPR64:$hi, tglobaladdr:$lo)>; - } - -and it is instantiated twice: - - // lib/Target/Mips/Mips64InstrInfo.td - defm : MipsHighestHigherHiLoRelocs, SYM_64; - // lib/Target/Mips/MicroMips64r6InstrInfo.td - defm : MipsHighestHigherHiLoRelocs, SYM_64, - ISA_MICROMIPS64R6; - -These patterns are used during instruction selection to match -MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction -and operands. +LP64 where symbols are assumed to be 32 bits wide. More details on how multiclasses in TableGen work can be found in the section "Multiclass definitions and instances" in the document diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index d0b47f61e114..753cfff4cdae 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -457,8 +457,8 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, raw_ostream &O) { - const Function *F = MF.getFunction(); - printReturnValStr(F, O); + const Function &F = MF.getFunction(); + printReturnValStr(&F, O); } // Return true if MBB is the header of a loop marked with @@ -502,13 +502,13 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { raw_svector_ostream O(Str); if (!GlobalsEmitted) { - emitGlobals(*MF->getFunction()->getParent()); + emitGlobals(*MF->getFunction().getParent()); GlobalsEmitted = true; } // Set up MRI = &MF->getRegInfo(); - F = MF->getFunction(); + F = &MF->getFunction(); emitLinkageDirective(F, O); if (isKernelFunction(*F)) O << ".entry "; @@ -536,7 +536,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() { SmallString<128> Str; raw_svector_ostream O(Str); - emitDemotedVars(MF->getFunction(), O); + emitDemotedVars(&MF->getFunction(), O); OutStreamer->EmitRawText(O.str()); } @@ -1708,8 +1708,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, raw_ostream &O) { - const Function *F = MF.getFunction(); - emitFunctionParamList(F, O); + const Function &F = MF.getFunction(); + emitFunctionParamList(&F, O); } void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( @@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { llvm_unreachable("unsupported fp type"); APInt API = APF.bitcastToAPInt(); - std::string hexstr(utohexstr(API.getZExtValue())); - O << lead; - if (hexstr.length() < numHex) - O << std::string(numHex - hexstr.length(), '0'); - O << utohexstr(API.getZExtValue()); + O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); } void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { @@ -2156,7 +2152,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !MF ? nullptr : MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction().getParent()); report_fatal_error(OS.str()); } @@ -2170,7 +2166,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/ false, - !MF ? nullptr : MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction().getParent()); report_fatal_error(OS.str()); } diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 7d4be8e809cf..f02c33f9249a 100644 --- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -61,6 +62,11 @@ bool NVPTXAssignValidGlobalNames::runOnModule(Module &M) { } } + // Do the same for local functions. + for (Function &F : M.functions()) + if (F.hasLocalLinkage()) + F.setName(cleanUpName(F.getName())); + return true; } diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 714260d372b7..57e2acc0d7e0 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -1003,7 +1003,7 @@ static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget, return true; // Load wasn't explicitly invariant. Attempt to infer invariance. - if (!isKernelFunction(*F->getFunction())) + if (!isKernelFunction(F->getFunction())) return false; // We use GetUnderlyingObjects() here instead of diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index d6c1e9c1645e..f1e4251a44b5 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -123,10 +123,10 @@ bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { // If nvptx-f32ftz is used on the command-line, always honor it return FtzEnabled; } else { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // Otherwise, check for an nvptx-f32ftz attribute on the function - if (F->hasFnAttribute("nvptx-f32ftz")) - return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; + if (F.hasFnAttribute("nvptx-f32ftz")) + return F.getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; else return false; } @@ -1561,8 +1561,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, TheStoreType, MachinePointerInfo(), EltAlign, - /* Volatile */ false, /* ReadMem */ false, - /* WriteMem */ true, /* Size */ 0); + MachineMemOperand::MOStore); InFlag = Chain.getValue(1); // Cleanup. @@ -1623,8 +1622,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, MachinePointerInfo(), /* Align */ 0, - /* Volatile */ false, /* ReadMem */ false, - /* WriteMem */ true, /* Size */ 0); + MachineMemOperand::MOStore); InFlag = Chain.getValue(1); } @@ -1810,8 +1808,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; SDValue RetVal = DAG.getMemIntrinsicNode( Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, - MachinePointerInfo(), EltAlign, /* Volatile */ false, - /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0); + MachinePointerInfo(), EltAlign, + MachineMemOperand::MOLoad); for (unsigned j = 0; j < NumElts; ++j) { SDValue Ret = RetVal.getValue(j); @@ -2331,7 +2329,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const DataLayout &DL = DAG.getDataLayout(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - const Function *F = MF.getFunction(); + const Function *F = &MF.getFunction(); const AttributeList &PAL = F->getAttributes(); const TargetLowering *TLI = STI.getTargetLowering(); @@ -2527,7 +2525,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - Type *RetTy = MF.getFunction()->getReturnType(); + Type *RetTy = MF.getFunction().getReturnType(); bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); @@ -2596,8 +2594,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, MachinePointerInfo(), /* Align */ 1, - /* Volatile */ false, /* ReadMem */ false, - /* WriteMem */ true, /* Size */ 0); + MachineMemOperand::MOStore); // Cleanup vector state. StoreOperands.clear(); } @@ -3317,7 +3314,8 @@ static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { // of destination // pointer. In particular, the address space information. bool NVPTXTargetLowering::getTgtMemIntrinsic( - IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { + IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { default: return false; @@ -3328,8 +3326,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( // in order to model data exchange with other threads, but perform no real // memory accesses. Info.memVT = MVT::i1; - Info.readMem = true; // Our result depends on other thread's arguments. - Info.writeMem = true; // Other threads depend on our thread's argument. + + // Our result depends on both our and other thread's arguments. + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; return true; case Intrinsic::nvvm_wmma_load_a_f16_col: case Intrinsic::nvvm_wmma_load_a_f16_row: @@ -3359,9 +3358,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v8f16; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; } @@ -3382,9 +3379,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v4f16; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; } @@ -3405,9 +3400,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; } @@ -3428,9 +3421,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v4f16; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore; Info.align = 16; return true; } @@ -3451,9 +3442,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v8f32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore; Info.align = 16; return true; } @@ -3490,9 +3479,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; Info.align = 0; return true; } @@ -3510,9 +3497,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; @@ -3531,9 +3516,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = getValueType(DL, I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = cast(I.getArgOperand(1))->getZExtValue(); return true; @@ -3599,9 +3582,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v4f32; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; @@ -3721,9 +3702,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::v4i32; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; @@ -3776,9 +3755,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::i8; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; @@ -3831,9 +3808,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::i16; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; @@ -3886,9 +3861,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::i32; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; @@ -3926,9 +3899,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.memVT = MVT::i64; Info.ptrVal = nullptr; Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; Info.align = 16; return true; } @@ -4051,9 +4022,9 @@ bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { return true; // Allow unsafe math if unsafe-fp-math attribute explicitly says so. - const Function *F = MF.getFunction(); - if (F->hasFnAttribute("unsafe-fp-math")) { - Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + const Function &F = MF.getFunction(); + if (F.hasFnAttribute("unsafe-fp-math")) { + Attribute Attr = F.getFnAttribute("unsafe-fp-math"); StringRef Val = Attr.getValueAsString(); if (Val == "true") return true; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 971945dedb3e..ef04a8573d45 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -448,6 +448,7 @@ class NVPTXTargetLowering : public TargetLowering { const char *getTargetNodeName(unsigned Opcode) const override; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 478f3e9d0577..c932758bd0ae 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -979,6 +979,33 @@ def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, Float64Regs, int_nvvm_bitcast_d2ll>; +// +// FNS +// + +class INT_FNS_MBO + : NVPTXInst<(outs Int32Regs:$dst), ins, + "fns.b32 \t$dst, $mask, $base, $offset;", + [(set Int32Regs:$dst, Operands )]>, + Requires<[hasPTX60, hasSM30]>; + +def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; +def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; + //----------------------------------- // Atomic Functions //----------------------------------- diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 989f0a3aba2f..52ced266b91c 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -111,23 +111,13 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { ConstantInt *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - if (!TTI.useWideIRMemcpyLoopLowering()) { - createMemCpyLoop(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile()); - } else { - createMemCpyLoopKnownSize(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile(), TTI); - } + createMemCpyLoopKnownSize(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), TTI); SI->eraseFromParent(); LI->eraseFromParent(); diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index 86a28f7d0700..a754a6a36dab 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/Support/Format.h" using namespace llvm; #define DEBUG_TYPE "nvptx-mcexpr" @@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { } APInt API = APF.bitcastToAPInt(); - std::string HexStr(utohexstr(API.getZExtValue())); - if (HexStr.length() < NumHex) - OS << std::string(NumHex - HexStr.length(), '0'); - OS << utohexstr(API.getZExtValue()); + OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true); } const NVPTXGenericMCSymbolRefExpr* diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index 7258e818e728..02c32c68ee2c 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -22,11 +22,11 @@ // This peephole pass optimizes these cases, for example // // It will transform the following pattern -// %vreg0 = LEA_ADDRi64 %VRFrame, 4 -// %vreg1 = cvta_to_local_yes_64 %vreg0 +// %0 = LEA_ADDRi64 %VRFrame, 4 +// %1 = cvta_to_local_yes_64 %0 // // into -// %vreg1 = LEA_ADDRi64 %VRFrameLocal, 4 +// %1 = LEA_ADDRi64 %VRFrameLocal, 4 // // %VRFrameLocal is the virtual register name of %SPL // @@ -125,7 +125,7 @@ static void CombineCVTAToLocal(MachineInstr &Root) { } bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool Changed = false; diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index 2022caca76ee..82befe4b101b 100644 --- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -158,7 +158,7 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { unsigned Param = atoi(Sym.data()+ParamBaseName.size()); std::string NewSym; raw_string_ostream NewSymStr(NewSym); - NewSymStr << MF.getFunction()->getName() << "_param_" << Param; + NewSymStr << MF.getName() << "_param_" << Param; InstrsToRemove.insert(&TexHandleDef); Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 85f757878f94..50c3e279f3ae 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -180,10 +180,9 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } -TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(NVPTXTTIImpl(this, F)); - }); +TargetTransformInfo +NVPTXTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(NVPTXTTIImpl(this, F)); } void NVPTXPassConfig::addEarlyCSEOrGVNPass() { @@ -324,7 +323,7 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&StackSlotColoringID); // FIXME: Needs physical registers - //addPass(&PostRAMachineLICMID); + //addPass(&MachineLICMID); printAndVerify("After StackSlotColoring"); } @@ -359,7 +358,7 @@ void NVPTXPassConfig::addMachineSSAOptimization() { if (addILPOpts()) printAndVerify("After ILP optimizations"); - addPass(&MachineLICMID); + addPass(&EarlyMachineLICMID); addPass(&MachineCSEID); addPass(&MachineSinkingID); diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 54a72a688ee3..eeebf64d39c3 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -63,7 +63,7 @@ class NVPTXTargetMachine : public LLVMTargetMachine { void adjustPassManager(PassManagerBuilder &) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; bool isMachineVerifierClean() const override { return false; diff --git a/lib/Target/Nios2/CMakeLists.txt b/lib/Target/Nios2/CMakeLists.txt index eaf7a0b6f4c4..7cad3c5ba9c1 100644 --- a/lib/Target/Nios2/CMakeLists.txt +++ b/lib/Target/Nios2/CMakeLists.txt @@ -4,7 +4,10 @@ set(LLVM_TARGET_DEFINITIONS Nios2.td) #your hand code C++ files. #Nios2GenRegisterInfo.inc came from Nios2RegisterInfo.td, Nios2GenInstrInfo.inc #came from Nios2InstrInfo.td. +tablegen(LLVM Nios2GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM Nios2GenDAGISel.inc -gen-dag-isel) tablegen(LLVM Nios2GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM Nios2GenCallingConv.inc -gen-callingconv) tablegen(LLVM Nios2GenInstrInfo.inc -gen-instr-info) tablegen(LLVM Nios2GenSubtargetInfo.inc -gen-subtarget) @@ -13,13 +16,20 @@ add_public_tablegen_target(Nios2CommonTableGen) #Nios2CodeGen should match with LLVMBuild.txt Nios2CodeGen add_llvm_target(Nios2CodeGen - Nios2InstrInfo.cpp + Nios2AsmPrinter.cpp Nios2FrameLowering.cpp + Nios2InstrInfo.cpp + Nios2ISelDAGToDAG.cpp + Nios2ISelLowering.cpp + Nios2MachineFunction.cpp + Nios2MCInstLower.cpp Nios2RegisterInfo.cpp Nios2Subtarget.cpp Nios2TargetMachine.cpp + Nios2TargetObjectFile.cpp ) -#Should match with "subdirectories = MCTargetDesc TargetInfo" in LLVMBuild.txt -add_subdirectory(TargetInfo) +#Should match with "subdirectories = InstPrinter MCTargetDesc TargetInfo" in LLVMBuild.txt +add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) diff --git a/lib/Target/Nios2/InstPrinter/CMakeLists.txt b/lib/Target/Nios2/InstPrinter/CMakeLists.txt new file mode 100644 index 000000000000..dc50be755754 --- /dev/null +++ b/lib/Target/Nios2/InstPrinter/CMakeLists.txt @@ -0,0 +1 @@ +add_llvm_library(LLVMNios2AsmPrinter Nios2InstPrinter.cpp) diff --git a/lib/Target/Nios2/InstPrinter/LLVMBuild.txt b/lib/Target/Nios2/InstPrinter/LLVMBuild.txt new file mode 100644 index 000000000000..bc7882dd3577 --- /dev/null +++ b/lib/Target/Nios2/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Nios2/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2AsmPrinter +parent = Nios2 +required_libraries = MC Support +add_to_library_groups = Nios2 diff --git a/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp b/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp new file mode 100644 index 000000000000..de0a5f9e84ea --- /dev/null +++ b/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.cpp @@ -0,0 +1,66 @@ +//===-- Nios2InstPrinter.cpp - Convert Nios2 MCInst to assembly syntax-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Nios2 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "Nios2InstPrinter.h" + +#include "Nios2InstrInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +#define PRINT_ALIAS_INSTR +#include "Nios2GenAsmWriter.inc" + +void Nios2InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void Nios2InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot, const MCSubtargetInfo &STI) { + // Try to print any aliases first. + if (!printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); + printAnnotation(O, Annot); +} + +void Nios2InstPrinter::printOperand(const MCInst *MI, int OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } + + if (Op.isImm()) { + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI, true); +} + +void Nios2InstPrinter::printMemOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier) { + // Load/Store memory operands -- imm($reg) + printOperand(MI, opNum + 1, STI, O); + O << "("; + printOperand(MI, opNum, STI, O); + O << ")"; +} diff --git a/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h b/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h new file mode 100644 index 000000000000..43a12951baea --- /dev/null +++ b/lib/Target/Nios2/InstPrinter/Nios2InstPrinter.h @@ -0,0 +1,49 @@ +//= Nios2InstPrinter.h - Convert Nios2 MCInst to assembly syntax -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a Nios2 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H +#define LLVM_LIB_TARGET_NIOS2_INSTPRINTER_NIOS2INSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class Nios2InstPrinter : public MCInstPrinter { +public: + Nios2InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); + void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS, const char *Modifier = nullptr); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/Nios2/LLVMBuild.txt b/lib/Target/Nios2/LLVMBuild.txt index b40a76379706..0125bbedea58 100644 --- a/lib/Target/Nios2/LLVMBuild.txt +++ b/lib/Target/Nios2/LLVMBuild.txt @@ -19,6 +19,7 @@ [common] subdirectories = + InstPrinter MCTargetDesc TargetInfo @@ -33,6 +34,7 @@ name = Nios2 parent = Target #Whether this target defines an assembly parser, assembly printer, disassembler #, and supports JIT compilation.They are optional. +has_asmprinter = 1 [component_1] #component_1 is a Library type and name is Nios2CodeGen.After build it will @@ -46,12 +48,14 @@ parent = Nios2 #dependencies for this component.When tools are built, the build system will #include the transitive closure of all required_libraries for the components #the tool needs. -required_libraries = CodeGen +required_libraries = AsmPrinter + CodeGen Core GlobalISel MC Nios2Desc Nios2Info + SelectionDAG Support Target #end of required_libraries diff --git a/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt index 21def509a232..138832d33abf 100644 --- a/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt @@ -1,2 +1,9 @@ #MCTargetDesc / CMakeLists.txt -add_llvm_library(LLVMNios2Desc Nios2MCTargetDesc.cpp) +add_llvm_library(LLVMNios2Desc + Nios2AsmBackend.cpp + Nios2ELFObjectWriter.cpp + Nios2MCAsmInfo.cpp + Nios2MCExpr.cpp + Nios2MCTargetDesc.cpp + Nios2TargetStreamer.cpp) + diff --git a/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt index 4dc6995e7f5c..3794c83e504d 100644 --- a/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt @@ -19,7 +19,8 @@ type = Library name = Nios2Desc parent = Nios2 -required_libraries = MC - Nios2Info +required_libraries = MC + Nios2AsmPrinter + Nios2Info Support add_to_library_groups = Nios2 diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp new file mode 100644 index 000000000000..3971630c6beb --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.cpp @@ -0,0 +1,131 @@ +//===-- Nios2AsmBackend.cpp - Nios2 Asm Backend --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Nios2AsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#include "MCTargetDesc/Nios2AsmBackend.h" +#include "MCTargetDesc/Nios2FixupKinds.h" +#include "MCTargetDesc/Nios2MCTargetDesc.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +// Prepare value for the target space for it +static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value) { + + unsigned Kind = Fixup.getKind(); + + // Add/subtract and shift + switch (Kind) { + default: + return 0; + case Nios2::fixup_Nios2_LO16: + break; + case Nios2::fixup_Nios2_HI16: + // Get the higher 16-bits. Also add 1 if bit 15 is 1. + Value = ((Value + 0x8000) >> 16) & 0xffff; + break; + } + + return Value; +} + +// Calculate index for Nios2 specific little endian byte order +static unsigned calculateLEIndex(unsigned i) { + assert(i <= 3 && "Index out of range!"); + + return (1 - i / 2) * 2 + i % 2; +} + +/// ApplyFixup - Apply the \p Value for given \p Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void Nios2AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved) const { + MCFixupKind Kind = Fixup.getKind(); + Value = adjustFixupValue(Fixup, Value); + + if (!Value) + return; // Doesn't change encoding. + + // Where do we start in the object + unsigned Offset = Fixup.getOffset(); + // Number of bytes we need to fixup + unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + // Grab current value, if any, from bits. + uint64_t CurVal = 0; + + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = calculateLEIndex(i); + CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i * 8); + } + + uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); + CurVal |= Value & Mask; + + // Write out the fixed up bytes back to the code/data bits. + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = calculateLEIndex(i); + Data[Offset + Idx] = (uint8_t)((CurVal >> (i * 8)) & 0xff); + } +} + +Optional Nios2AsmBackend::getFixupKind(StringRef Name) const { + return StringSwitch>(Name) + .Case("R_NIOS2_NONE", (MCFixupKind)Nios2::fixup_Nios2_32) + .Case("R_NIOS2_32", FK_Data_4) + .Default(MCAsmBackend::getFixupKind(Name)); +} + +//@getFixupKindInfo { +const MCFixupKindInfo & +Nios2AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // Nios2FixupKinds.h. + // + // name offset bits flags + {"fixup_Nios2_32", 0, 32, 0}, + {"fixup_Nios2_HI16", 0, 16, 0}, + {"fixup_Nios2_LO16", 0, 16, 0}}; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; +} + +std::unique_ptr +Nios2AsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { + return createNios2ELFObjectWriter(OS, + MCELFObjectTargetWriter::getOSABI(OSType)); +} + +bool Nios2AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + return true; +} + +// MCAsmBackend +MCAsmBackend *llvm::createNios2AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options) { + + return new Nios2AsmBackend(T, TT.getOS()); +} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h b/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h new file mode 100644 index 000000000000..0aa42043ee2a --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2AsmBackend.h @@ -0,0 +1,81 @@ +//===-- Nios2AsmBackend.h - Nios2 Asm Backend ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Nios2AsmBackend class. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2ASMBACKEND_H + +#include "MCTargetDesc/Nios2FixupKinds.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmBackend.h" + +namespace llvm { + +class MCAssembler; +struct MCFixupKindInfo; +class Target; +class MCObjectWriter; + +class Nios2AsmBackend : public MCAsmBackend { + Triple::OSType OSType; + +public: + Nios2AsmBackend(const Target &T, Triple::OSType OSType) + : MCAsmBackend(), OSType(OSType) {} + + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved) const override; + + Optional getFixupKind(StringRef Name) const override; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + unsigned getNumFixupKinds() const override { + return Nios2::NumTargetFixupKinds; + } + + /// MayNeedRelaxation - Check whether the given instruction may need + /// relaxation. + /// + /// \param Inst - The instruction to test. + bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } + + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + // FIXME. + llvm_unreachable("RelaxInstruction() unimplemented"); + return false; + } + + /// RelaxInstruction - Relax the instruction in the given fragment + /// to the next wider instruction. + /// + /// \param Inst - The instruction to relax, which may be the same + /// as the output. + /// \param [out] Res On return, the relaxed instruction. + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override {} + +}; // class Nios2AsmBackend + +} // namespace llvm + +#endif diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h b/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h new file mode 100644 index 000000000000..225671ebc8d8 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2BaseInfo.h @@ -0,0 +1,38 @@ +//===-- Nios2BaseInfo.h - Top level definitions for NIOS2 MC ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the Nios2 target useful for the compiler back-end and the MC libraries. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2BASEINFO_H + +namespace llvm { + +/// Nios2FG - This namespace holds all of the target specific flags that +/// instruction info tracks. +namespace Nios2FG { +/// Target Operand Flag enum. +enum TOF { + //===------------------------------------------------------------------===// + // Nios2 Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + +}; +} // namespace Nios2FG +} // namespace llvm + +#endif diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp new file mode 100644 index 000000000000..04f727ad390c --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2ELFObjectWriter.cpp @@ -0,0 +1,44 @@ +//===-- Nios2ELFObjectWriter.cpp - Nios2 ELF Writer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Nios2FixupKinds.h" +#include "MCTargetDesc/Nios2MCExpr.h" +#include "MCTargetDesc/Nios2MCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +namespace { +class Nios2ELFObjectWriter : public MCELFObjectTargetWriter { +public: + Nios2ELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(false, OSABI, ELF::EM_ALTERA_NIOS2, false) {} + + ~Nios2ELFObjectWriter() override; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; +} // namespace + +Nios2ELFObjectWriter::~Nios2ELFObjectWriter() {} + +unsigned Nios2ELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + return 0; +} + +std::unique_ptr +llvm::createNios2ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { + auto MOTW = llvm::make_unique(OSABI); + return createELFObjectWriter(std::move(MOTW), OS, true); +} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h b/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h new file mode 100644 index 000000000000..c169a1b19371 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2FixupKinds.h @@ -0,0 +1,41 @@ +//===-- Nios2FixupKinds.h - Nios2 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace Nios2 { +// Although most of the current fixup types reflect a unique relocation +// one can have multiple fixup types for a given relocation and thus need +// to be uniquely named. +// +// This table *must* be in the save order of +// MCFixupKindInfo Infos[Nios2::NumTargetFixupKinds] +// in Nios2AsmBackend.cpp. +enum Fixups { + // Pure upper 32 bit fixup resulting in - R_NIOS2_32. + fixup_Nios2_32 = FirstTargetFixupKind, + + // Pure upper 16 bit fixup resulting in - R_NIOS2_HI16. + fixup_Nios2_HI16, + + // Pure lower 16 bit fixup resulting in - R_NIOS2_LO16. + fixup_Nios2_LO16, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // namespace Nios2 +} // namespace llvm + +#endif // LLVM_NIOS2_NIOS2FIXUPKINDS_H diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp new file mode 100644 index 000000000000..e3c66e6776c2 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.cpp @@ -0,0 +1,44 @@ +//===-- Nios2MCAsmInfo.cpp - Nios2 Asm Properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the Nios2MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "Nios2MCAsmInfo.h" + +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +void Nios2MCAsmInfo::anchor() {} + +Nios2MCAsmInfo::Nios2MCAsmInfo(const Triple &TheTriple) { + if ((TheTriple.getArch() == Triple::nios2)) + IsLittleEndian = true; // the default of IsLittleEndian is true + + AlignmentIsInBytes = false; + Data16bitsDirective = "\t.2byte\t"; + Data32bitsDirective = "\t.4byte\t"; + Data64bitsDirective = "\t.8byte\t"; + PrivateLabelPrefix = ".LC"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + GPRel32Directive = "\t.gpword\t"; + GPRel64Directive = "\t.gpdword\t"; + WeakRefDirective = "\t.weak\t"; + GlobalDirective = "\t.global\t"; + AscizDirective = "\t.string\t"; + UseAssignmentForEHBegin = true; + + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::DwarfCFI; + DwarfRegNumForCFI = true; + UsesELFSectionDirectiveForBSS = true; +} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h b/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h new file mode 100644 index 000000000000..0c81276f84d8 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCAsmInfo.h @@ -0,0 +1,31 @@ +//===-- Nios2MCAsmInfo.h - Nios2 Asm Info ----------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the Nios2MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { +class Triple; + +class Nios2MCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit Nios2MCAsmInfo(const Triple &TheTriple); +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp new file mode 100644 index 000000000000..0f12c9e93378 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.cpp @@ -0,0 +1,76 @@ +//===-- Nios2MCExpr.cpp - Nios2 specific MC expression classes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2.h" + +#include "Nios2MCExpr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSymbolELF.h" + +using namespace llvm; + +#define DEBUG_TYPE "nios2mcexpr" + +const Nios2MCExpr *Nios2MCExpr::create(Nios2MCExpr::Nios2ExprKind Kind, + const MCExpr *Expr, MCContext &Ctx) { + return new (Ctx) Nios2MCExpr(Kind, Expr); +} + +const Nios2MCExpr *Nios2MCExpr::create(const MCSymbol *Symbol, + Nios2MCExpr::Nios2ExprKind Kind, + MCContext &Ctx) { + const MCSymbolRefExpr *MCSym = + MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, Ctx); + return new (Ctx) Nios2MCExpr(Kind, MCSym); +} + +void Nios2MCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + + switch (Kind) { + case CEK_None: + case CEK_Special: + llvm_unreachable("CEK_None and CEK_Special are invalid"); + break; + case CEK_ABS_HI: + OS << "%hiadj"; + break; + case CEK_ABS_LO: + OS << "%lo"; + break; + } + + OS << '('; + Expr->print(OS, MAI, true); + OS << ')'; +} + +bool Nios2MCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); +} + +void Nios2MCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +void Nios2MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + switch (getKind()) { + case CEK_None: + case CEK_Special: + llvm_unreachable("CEK_None and CEK_Special are invalid"); + break; + case CEK_ABS_HI: + case CEK_ABS_LO: + break; + } +} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h b/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h new file mode 100644 index 000000000000..5b49005eb648 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCExpr.h @@ -0,0 +1,60 @@ +//===-- Nios2MCExpr.h - Nios2 specific MC expression classes ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCEXPR_H + +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { + +class Nios2MCExpr : public MCTargetExpr { +public: + enum Nios2ExprKind { + CEK_None, + CEK_ABS_HI, + CEK_ABS_LO, + CEK_Special, + }; + +private: + const Nios2ExprKind Kind; + const MCExpr *Expr; + + explicit Nios2MCExpr(Nios2ExprKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + static const Nios2MCExpr *create(Nios2ExprKind Kind, const MCExpr *Expr, + MCContext &Ctx); + static const Nios2MCExpr *create(const MCSymbol *Symbol, + Nios2MCExpr::Nios2ExprKind Kind, + MCContext &Ctx); + + /// Get the kind of this expression. + Nios2ExprKind getKind() const { return Kind; } + + /// Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp index 0c70dc0bedc9..e57b44d3cfdc 100644 --- a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp @@ -12,8 +12,13 @@ //===----------------------------------------------------------------------===// #include "Nios2MCTargetDesc.h" +#include "InstPrinter/Nios2InstPrinter.h" +#include "Nios2MCAsmInfo.h" +#include "Nios2TargetStreamer.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -26,4 +31,72 @@ using namespace llvm; #define GET_REGINFO_MC_DESC #include "Nios2GenRegisterInfo.inc" -extern "C" void LLVMInitializeNios2TargetMC() {} +static MCInstrInfo *createNios2MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitNios2MCInstrInfo(X); // defined in Nios2GenInstrInfo.inc + return X; +} + +static MCRegisterInfo *createNios2MCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitNios2MCRegisterInfo(X, Nios2::R15); // defined in Nios2GenRegisterInfo.inc + return X; +} + +static MCSubtargetInfo * +createNios2MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { + if (CPU.empty() || CPU == "generic") + CPU = "nios2r1"; + return createNios2MCSubtargetInfoImpl(TT, CPU, FS); + // createNios2MCSubtargetInfoImpl defined in Nios2GenSubtargetInfo.inc +} + +static MCAsmInfo *createNios2MCAsmInfo(const MCRegisterInfo &MRI, + const Triple &TT) { + MCAsmInfo *MAI = new Nios2MCAsmInfo(TT); + + unsigned SP = MRI.getDwarfRegNum(Nios2::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0); + MAI->addInitialFrameState(Inst); + + return MAI; +} + +static MCInstPrinter *createNios2MCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new Nios2InstPrinter(MAI, MII, MRI); +} + +static MCTargetStreamer *createNios2AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new Nios2TargetAsmStreamer(S, OS); +} + +extern "C" void LLVMInitializeNios2TargetMC() { + Target *T = &getTheNios2Target(); + + // Register the MC asm info. + RegisterMCAsmInfoFn X(*T, createNios2MCAsmInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createNios2MCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createNios2MCRegisterInfo); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createNios2AsmTargetStreamer); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createNios2MCSubtargetInfo); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createNios2MCInstPrinter); + + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(*T, createNios2AsmBackend); +} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h index e8fe865fadb2..d918a066acae 100644 --- a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h @@ -14,12 +14,27 @@ #ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H #define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H +#include + namespace llvm { +class MCAsmBackend; +class MCObjectWriter; +class MCRegisterInfo; +class MCTargetOptions; class Target; class Triple; +class StringRef; +class raw_pwrite_stream; Target &getTheNios2Target(); +MCAsmBackend *createNios2AsmBackend(const Target &T, const MCRegisterInfo &MRI, + const Triple &TT, StringRef CPU, + const MCTargetOptions &Options); + +std::unique_ptr +createNios2ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); + } // namespace llvm // Defines symbolic names for Nios2 registers. This defines a mapping from diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp new file mode 100644 index 000000000000..b7e1bc36a6d3 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2TargetStreamer.cpp @@ -0,0 +1,22 @@ +//===-- Nios2TargetStreamer.cpp - Nios2 Target Streamer Methods -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "Nios2TargetStreamer.h" + +using namespace llvm; + +Nios2TargetStreamer::Nios2TargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + +Nios2TargetAsmStreamer::Nios2TargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : Nios2TargetStreamer(S), OS(OS) {} diff --git a/lib/Target/Nios2/Nios2.h b/lib/Target/Nios2/Nios2.h index 87202f48cfbe..d6c5c1e49662 100644 --- a/lib/Target/Nios2/Nios2.h +++ b/lib/Target/Nios2/Nios2.h @@ -19,7 +19,17 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { +class FunctionPass; +class formatted_raw_ostream; class Nios2TargetMachine; +class AsmPrinter; +class MachineInstr; +class MCInst; + +FunctionPass *createNios2ISelDag(Nios2TargetMachine &TM, + CodeGenOpt::Level OptLevel); +void LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP); } // namespace llvm #endif diff --git a/lib/Target/Nios2/Nios2.td b/lib/Target/Nios2/Nios2.td index c2b54caf38ca..1acf4c70c42c 100644 --- a/lib/Target/Nios2/Nios2.td +++ b/lib/Target/Nios2/Nios2.td @@ -13,12 +13,9 @@ include "llvm/Target/Target.td" include "Nios2RegisterInfo.td" -include "Nios2InstrInfo.td" include "Nios2Schedule.td" - -def Nios2InstrInfo : InstrInfo; - -def Nios2 : Target { let InstructionSet = Nios2InstrInfo; } +include "Nios2InstrInfo.td" +include "Nios2CallingConv.td" //===----------------------------------------------------------------------===// // Nios2 Subtarget features @@ -37,3 +34,26 @@ class Proc Features> def : Proc<"nios2r1", [FeatureNios2r1]>; def : Proc<"nios2r2", [FeatureNios2r2]>; + +def Nios2InstrInfo : InstrInfo; + +def Nios2AsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Nios2AsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; +} + +def Nios2 : Target { +// def Nios2InstrInfo : InstrInfo as before. + let InstructionSet = Nios2InstrInfo; + let AssemblyParsers = [Nios2AsmParser]; + let AssemblyWriters = [Nios2AsmWriter]; +} diff --git a/lib/Target/Nios2/Nios2AsmPrinter.cpp b/lib/Target/Nios2/Nios2AsmPrinter.cpp new file mode 100644 index 000000000000..1abf19591774 --- /dev/null +++ b/lib/Target/Nios2/Nios2AsmPrinter.cpp @@ -0,0 +1,153 @@ +//===-- Nios2AsmPrinter.cpp - Nios2 LLVM Assembly Printer -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format NIOS2 assembly language. +// +//===----------------------------------------------------------------------===// + +#include "InstPrinter/Nios2InstPrinter.h" +#include "MCTargetDesc/Nios2BaseInfo.h" +#include "Nios2.h" +#include "Nios2TargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "nios2-asm-printer" + +namespace { + +class Nios2AsmPrinter : public AsmPrinter { + +public: + explicit Nios2AsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "Nios2 Assembly Printer"; } + + //- EmitInstruction() must exists or will have run time error. + void EmitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void EmitFunctionEntryLabel() override; +}; +} // namespace + +//- EmitInstruction() must exists or will have run time error. +void Nios2AsmPrinter::EmitInstruction(const MachineInstr *MI) { + + // Print out both ordinary instruction and boudle instruction + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + do { + + if (I->isPseudo()) { + llvm_unreachable("Pseudo opcode found in EmitInstruction()"); + } + + MCInst TmpInst0; + LowerNios2MachineInstToMCInst(&*I, TmpInst0, *this); + EmitToStreamer(*OutStreamer, TmpInst0); + } while ((++I != E) && I->isInsideBundle()); // Delay slot check +} + +// .type main,@function +//-> .ent main # @main +// main: +void Nios2AsmPrinter::EmitFunctionEntryLabel() { + OutStreamer->EmitLabel(CurrentFnSym); +} + +// Print out an operand for an inline asm expression. +bool Nios2AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + printOperand(MI, OpNum, O); + return false; +} + +bool Nios2AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNum, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline asm memory operand"); + O << "($" << Nios2InstPrinter::getRegisterName(MO.getReg()) << ")"; + + return false; +} + +void Nios2AsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + bool closeP = false; + + if (MO.getTargetFlags()) + closeP = true; + + switch (MO.getTargetFlags()) { + case Nios2FG::MO_ABS_HI: + O << "%hiadj("; + break; + case Nios2FG::MO_ABS_LO: + O << "%lo("; + break; + } + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << '$' + << StringRef(Nios2InstPrinter::getRegisterName(MO.getReg())).lower(); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + break; + + case MachineOperand::MO_BlockAddress: + O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + + default: + llvm_unreachable(""); + } + + if (closeP) + O << ")"; +} + +// Force static initialization. +extern "C" void LLVMInitializeNios2AsmPrinter() { + RegisterAsmPrinter X(getTheNios2Target()); +} diff --git a/lib/Target/Nios2/Nios2CallingConv.td b/lib/Target/Nios2/Nios2CallingConv.td new file mode 100644 index 000000000000..f0b172f8422d --- /dev/null +++ b/lib/Target/Nios2/Nios2CallingConv.td @@ -0,0 +1,34 @@ +//===- Nios2CallingConv.td - Calling Conventions for Nios2 -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for Nios2 architecture. +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget: + CCIf().", F), A>; + +def CC_Nios2 : CallingConv<[ + // i32 f32 arguments get passed in integer registers if there is space. + CCIfType<[i32, f32], CCAssignToReg<[R4, R5, R6, R7]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; + +def RetCC_Nios2EABI : CallingConv<[ + // i32 are returned in registers R2, R3 + CCIfType<[i32], CCAssignToReg<[R2, R3]>>, + // In case of floating point (FPH2 instr.) also use the same register set + CCIfType<[f32], CCAssignToReg<[R2, R3]>>, + CCIfByVal>, + // Stack parameter slots for i32 is 32-bit words and 4-byte aligned. + CCIfType<[i32], CCAssignToStack<4, 4>> +]>; + +def CSR : CalleeSavedRegs<(add RA, FP, (sequence "R%u", 16, 23))>; diff --git a/lib/Target/Nios2/Nios2FrameLowering.cpp b/lib/Target/Nios2/Nios2FrameLowering.cpp index f278d80f8054..6fb28a6fd638 100644 --- a/lib/Target/Nios2/Nios2FrameLowering.cpp +++ b/lib/Target/Nios2/Nios2FrameLowering.cpp @@ -25,7 +25,3 @@ void Nios2FrameLowering::emitPrologue(MachineFunction &MF, void Nios2FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {} - -const Nios2FrameLowering *Nios2FrameLowering::create(const Nios2Subtarget &ST) { - return new Nios2FrameLowering(ST, 4); -} diff --git a/lib/Target/Nios2/Nios2FrameLowering.h b/lib/Target/Nios2/Nios2FrameLowering.h index 2d9e84b2c72b..4ffb01dda36a 100644 --- a/lib/Target/Nios2/Nios2FrameLowering.h +++ b/lib/Target/Nios2/Nios2FrameLowering.h @@ -24,11 +24,10 @@ class Nios2FrameLowering : public TargetFrameLowering { const Nios2Subtarget &STI; public: - explicit Nios2FrameLowering(const Nios2Subtarget &sti, unsigned Alignment) - : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) { - } + explicit Nios2FrameLowering(const Nios2Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0, 4), + STI(sti) {} - static const Nios2FrameLowering *create(const Nios2Subtarget &ST); bool hasFP(const MachineFunction &MF) const override; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp b/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp new file mode 100644 index 000000000000..31d04ebe447e --- /dev/null +++ b/lib/Target/Nios2/Nios2ISelDAGToDAG.cpp @@ -0,0 +1,79 @@ +//===-- Nios2ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Nios2 ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the NIOS2 target. +// +//===----------------------------------------------------------------------===// + +#include "Nios2.h" +#include "Nios2TargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "nios2-isel" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Nios2DAGToDAGISel - NIOS2 specific code to select NIOS2 machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// + +namespace { + +class Nios2DAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Nios2 Subtarget around so that we can + /// make the right decision when generating code for different targets. + const Nios2Subtarget *Subtarget; + +public: + explicit Nios2DAGToDAGISel(Nios2TargetMachine &TM, CodeGenOpt::Level OL) + : SelectionDAGISel(TM, OL) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + void Select(SDNode *N) override; + + // Pass Name + StringRef getPassName() const override { + return "NIOS2 DAG->DAG Pattern Instruction Selection"; + } + +#include "Nios2GenDAGISel.inc" +}; +} // namespace + +// Select instructions not customized! Used for +// expanded, promoted and normal instructions +void Nios2DAGToDAGISel::Select(SDNode *Node) { + + // Dump information about the Node being selected + DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + Node->setNodeId(-1); + return; + } + + // Select the default instruction + SelectCode(Node); +} + +FunctionPass *llvm::createNios2ISelDag(Nios2TargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new Nios2DAGToDAGISel(TM, OptLevel); +} diff --git a/lib/Target/Nios2/Nios2ISelLowering.cpp b/lib/Target/Nios2/Nios2ISelLowering.cpp new file mode 100644 index 000000000000..008ce1570722 --- /dev/null +++ b/lib/Target/Nios2/Nios2ISelLowering.cpp @@ -0,0 +1,188 @@ +//===-- Nios2ISelLowering.cpp - Nios2 DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Nios2 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "Nios2ISelLowering.h" +#include "Nios2MachineFunction.h" +#include "Nios2TargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "Nios2GenCallingConv.inc" + +SDValue +Nios2TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of + // the return value to a location + SmallVector RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + // Analyze return values. + CCInfo.CheckReturn(Outs, RetCC_Nios2EABI); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + SDValue Val = OutVals[i]; + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + if (RVLocs[i].getValVT() != RVLocs[i].getLocVT()) + Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(Nios2ISD::Ret, DL, MVT::Other, RetOps); +} + +// addLiveIn - This helper function adds the specified physical register to the +// MachineFunction as a live in value. It also creates a corresponding +// virtual register for it. +static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, + const TargetRegisterClass *RC) { + unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(PReg, VReg); + return VReg; +} + +//===----------------------------------------------------------------------===// +// Formal Arguments Calling Convention Implementation +//===----------------------------------------------------------------------===// + +// LowerFormalArguments - transform physical registers into virtual registers +// and generate load operations for arguments places on the stack. +SDValue Nios2TargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_Nios2); + + // Used with vargs to acumulate store chains. + std::vector OutChains; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + EVT ValVT = VA.getValVT(); + + // Arguments stored on registers + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + unsigned ArgReg = VA.getLocReg(); + const TargetRegisterClass *RC = getRegClassFor(RegVT); + + // Transform the arguments stored on + // physical registers into virtual ones + unsigned Reg = addLiveIn(MF, ArgReg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); + + // If this is an 8 or 16-bit value, it has been passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + if (VA.getLocInfo() != CCValAssign::Full) { + unsigned Opcode = 0; + if (VA.getLocInfo() == CCValAssign::SExt) + Opcode = ISD::AssertSext; + else if (VA.getLocInfo() == CCValAssign::ZExt) + Opcode = ISD::AssertZext; + if (Opcode) + ArgValue = + DAG.getNode(Opcode, DL, RegVT, ArgValue, DAG.getValueType(ValVT)); + ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue); + } + + // Handle floating point arguments passed in integer registers. + if ((RegVT == MVT::i32 && ValVT == MVT::f32) || + (RegVT == MVT::i64 && ValVT == MVT::f64)) + ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); + InVals.push_back(ArgValue); + } else { // VA.isRegLoc() + MVT LocVT = VA.getLocVT(); + + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Load = DAG.getLoad( + LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + InVals.push_back(Load); + OutChains.push_back(Load.getValue(1)); + } + } + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +Nios2TargetLowering::Nios2TargetLowering(const TargetMachine &TM, + const Nios2Subtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { + + addRegisterClass(MVT::i32, &Nios2::CPURegsRegClass); + computeRegisterProperties(Subtarget->getRegisterInfo()); +} + +const char *Nios2TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case Nios2ISD::Hi: + return "Nios2ISD::Hi"; + case Nios2ISD::Lo: + return "Nios2ISD::Lo"; + case Nios2ISD::Ret: + return "Nios2ISD::Ret"; + } + return nullptr; +} diff --git a/lib/Target/Nios2/Nios2ISelLowering.h b/lib/Target/Nios2/Nios2ISelLowering.h new file mode 100644 index 000000000000..c3c8179054bb --- /dev/null +++ b/lib/Target/Nios2/Nios2ISelLowering.h @@ -0,0 +1,63 @@ +//===-- Nios2ISelLowering.h - Nios2 DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Nios2 uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2ISELLOWERING_H + +#include "Nios2.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { +class Nios2Subtarget; + +namespace Nios2ISD { +enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Get the Higher 16 bits from a 32-bit immediate + // No relation with Nios2 Hi register + Hi, + // Get the Lower 16 bits from a 32-bit immediate + // No relation with Nios2 Lo register + Lo, + // Return + Ret +}; +} + +class Nios2TargetLowering : public TargetLowering { + const Nios2Subtarget *Subtarget; + +public: + Nios2TargetLowering(const TargetMachine &TM, const Nios2Subtarget &STI); + + /// getTargetNodeName - This method returns the name of a target specific + // DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &dl, + SelectionDAG &DAG) const override; +}; +} // end namespace llvm + +#endif // NIOS2_ISELLOWERING_H diff --git a/lib/Target/Nios2/Nios2InstrFormats.td b/lib/Target/Nios2/Nios2InstrFormats.td index 79868be48a48..f57bf03bba3c 100644 --- a/lib/Target/Nios2/Nios2InstrFormats.td +++ b/lib/Target/Nios2/Nios2InstrFormats.td @@ -16,102 +16,220 @@ // Format specifies the encoding used by the instruction. This is part of the // ad-hoc solution used to emit machine instruction encodings by our machine // code emitter. -class Format val> { - bits<3> Value = val; +class Format val> { + bits<6> Value = val; } -def Pseudo : Format<0>; -def FrmI : Format<1>; -def FrmR : Format<2>; -def FrmJ : Format<3>; -def FrmOther : Format<4>; // Instruction w/ a custom format +def Pseudo : Format<0>; +// Nios2 R1 instr formats: +def FrmI : Format<1>; +def FrmR : Format<2>; +def FrmJ : Format<3>; +def FrmOther : Format<4>; // Instruction w/ a custom format +// Nios2 R2 instr 32-bit formats: +def FrmL26 : Format<5>; // corresponds to J format in R1 +def FrmF2I16 : Format<6>; // corresponds to I format in R1 +def FrmF2X4I12 : Format<7>; +def FrmF1X4I12 : Format<8>; +def FrmF1X4L17 : Format<9>; +def FrmF3X6L5 : Format<10>; // corresponds to R format in R1 +def FrmF2X6L10 : Format<11>; +def FrmF3X6 : Format<12>; // corresponds to R format in R1 +def FrmF3X8 : Format<13>; // corresponds to custom format in R1 +// Nios2 R2 instr 16-bit formats: +def FrmI10 : Format<14>; +def FrmT1I7 : Format<15>; +def FrmT2I4 : Format<16>; +def FrmT1X1I6 : Format<17>; +def FrmX1I7 : Format<18>; +def FrmL5I4X1 : Format<19>; +def FrmT2X1L3 : Format<20>; +def FrmT2X1I3 : Format<21>; +def FrmT3X1 : Format<22>; +def FrmT2X3 : Format<23>; +def FrmF1X1 : Format<24>; +def FrmX2L5 : Format<25>; +def FrmF1I5 : Format<26>; +def FrmF2 : Format<27>; -// Generic Nios2 Format -class Nios2Inst pattern, Format f> - : Instruction { +//===----------------------------------------------------------------------===// +// Instruction Predicates: +//===----------------------------------------------------------------------===// + +def isNios2r1 : Predicate<"Subtarget->isNios2r1()">; +def isNios2r2 : Predicate<"Subtarget->isNios2r2()">; + +class PredicateControl { + // Predicates related to specific target CPU features + list FeaturePredicates = []; + // Predicates for the instruction group membership in given ISA + list InstrPredicates = []; + + list Predicates = !listconcat(FeaturePredicates, InstrPredicates); +} + +//===----------------------------------------------------------------------===// +// Base classes for 32-bit, 16-bit and pseudo instructions +//===----------------------------------------------------------------------===// + +class Nios2Inst32 pattern, + InstrItinClass itin, Format f>: Instruction, + PredicateControl { field bits<32> Inst; Format Form = f; let Namespace = "Nios2"; - let Size = 4; bits<6> Opcode = 0; // Bottom 6 bits are the 'opcode' field - let Inst{5 - 0} = Opcode; + let Inst{5-0} = Opcode; let OutOperandList = outs; - let InOperandList = ins; + let InOperandList = ins; let AsmString = asmstr; - let Pattern = pattern; + let Pattern = pattern; + let Itinerary = itin; - // // Attributes specific to Nios2 instructions: - // - bits<3> FormBits = Form.Value; // TSFlags layout should be kept in sync with Nios2InstrInfo.h. - let TSFlags{2 - 0} = FormBits; + let TSFlags{5-0} = Form.Value; + let DecoderNamespace = "Nios2"; + field bits<32> SoftFail = 0; +} + +class Nios2Pseudo pattern, + InstrItinClass Itin = IIPseudo>: + Nios2Inst32 { + + let isCodeGenOnly = 1; + let isPseudo = 1; +} +//===----------------------------------------------------------------------===// +// Base classes for R1 and R2 instructions +//===----------------------------------------------------------------------===// + +class Nios2R1Inst32 pattern, + InstrItinClass itin, Format f>: + Nios2Inst32 { let DecoderNamespace = "Nios2"; + let InstrPredicates = [isNios2r1]; } -// Nios2 Instruction Format -class InstSE pattern, Format f> - : Nios2Inst { +class Nios2R2Inst32 pattern, + InstrItinClass itin, Format f>: + Nios2Inst32 { + let DecoderNamespace = "Nios2r2"; + let InstrPredicates = [isNios2r2]; } //===----------------------------------------------------------------------===// // Format I instruction class in Nios2 : <|A|B|immediate|opcode|> //===----------------------------------------------------------------------===// -class FI op, dag outs, dag ins, string asmstr, list pattern> - : InstSE { - bits<5> rA; - bits<5> rB; +class FI op, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: Nios2R1Inst32 { + + bits<5> rA; + bits<5> rB; bits<16> imm; let Opcode = op; - let Inst{31 - 27} = rA; - let Inst{26 - 22} = rB; - let Inst{21 - 6} = imm; + let Inst{31-27} = rA; + let Inst{26-22} = rB; + let Inst{21-6} = imm; } + //===----------------------------------------------------------------------===// // Format R instruction : <|A|B|C|opx|imm|opcode|> //===----------------------------------------------------------------------===// -class FR opx, dag outs, dag ins, string asmstr, list pattern> - : InstSE { +class FR opx, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: Nios2R1Inst32 { bits<5> rA; bits<5> rB; bits<5> rC; bits<5> imm = 0; - // opcode is always 0x3a for R instr. - let Opcode = 0x3a; + let Opcode = 0x3a; /* opcode is always 0x3a for R instr. */ - let Inst{31 - 27} = rA; - let Inst{26 - 22} = rB; - let Inst{21 - 17} = rC; - // opx stands for opcode extension - let Inst{16 - 11} = opx; - // optional 5-bit immediate value - let Inst{10 - 6} = imm; + let Inst{31-27} = rA; + let Inst{26-22} = rB; + let Inst{21-17} = rC; + let Inst{16-11} = opx; /* opx stands for opcode extension */ + let Inst{10-6} = imm; /* optional 5-bit immediate value */ } //===----------------------------------------------------------------------===// // Format J instruction class in Nios2 : <|address|opcode|> //===----------------------------------------------------------------------===// -class FJ op, dag outs, dag ins, string asmstr, list pattern> - : InstSE { +class FJ op, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: + Nios2R1Inst32 { bits<26> addr; - let Opcode = op; + let Inst{31-6} = addr; +} + +//===----------------------------------------------------------------------===// +// Format F3X6 (R2) instruction : <|opx|RSV|C|B|A|opcode|> +//===----------------------------------------------------------------------===// + +class F3X6 opx, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: + Nios2R2Inst32 { + bits<5> rC; + bits<5> rB; + bits<5> rA; + bits<5> rsv = 0; + + let Opcode = 0x20; /* opcode is always 0x20 (OPX group) for F3X6 instr. */ + + let Inst{31-26} = opx; /* opx stands for opcode extension */ + let Inst{25-21} = rsv; + let Inst{20-16} = rC; + let Inst{15-11} = rB; + let Inst{10-6} = rA; +} + +//===----------------------------------------------------------------------===// +// Multiclasses for common instructions of both R1 and R2: +//===----------------------------------------------------------------------===// - let Inst{31 - 6} = addr; +// Multiclass for instructions that have R format in R1 and F3X6 format in R2 +// and their opx values differ between R1 and R2 +multiclass CommonInstr_R_F3X6_opx opxR1, bits<6> opxR2, dag outs, + dag ins, string asmstr, list pattern, + InstrItinClass itin> { + def NAME#_R1 : FR; + def NAME#_R2 : F3X6; } + +// Multiclass for instructions that have R format in R1 and F3X6 format in R2 +// and their opx values are the same in R1 and R2 +multiclass CommonInstr_R_F3X6 opx, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin> : + CommonInstr_R_F3X6_opx; + +// Multiclass for instructions that have I format in R1 and F2I16 format in R2 +// and their op code values differ between R1 and R2 +multiclass CommonInstr_I_F2I16_op opR1, bits<6> opR2, dag outs, dag ins, + string asmstr, list pattern, + InstrItinClass itin> { + def NAME#_R1 : FI; +} + +// Multiclass for instructions that have I format in R1 and F2I16 format in R2 +// and their op code values are the same in R1 and R2 +multiclass CommonInstr_I_F2I16 op, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin> : + CommonInstr_I_F2I16_op; diff --git a/lib/Target/Nios2/Nios2InstrInfo.cpp b/lib/Target/Nios2/Nios2InstrInfo.cpp index 412cfcef3afb..9700cba3595b 100644 --- a/lib/Target/Nios2/Nios2InstrInfo.cpp +++ b/lib/Target/Nios2/Nios2InstrInfo.cpp @@ -13,14 +13,42 @@ #include "Nios2InstrInfo.h" #include "Nios2TargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "Nios2GenInstrInfo.inc" -const Nios2InstrInfo *Nios2InstrInfo::create(Nios2Subtarget &STI) { - return new Nios2InstrInfo(STI); +// Pin the vtable to this file. +void Nios2InstrInfo::anchor() {} + +Nios2InstrInfo::Nios2InstrInfo(Nios2Subtarget &ST) + : Nios2GenInstrInfo(), RI(ST), Subtarget(ST) {} + +/// Expand Pseudo instructions into real backend instructions +bool Nios2InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + MachineBasicBlock &MBB = *MI.getParent(); + + switch (MI.getDesc().getOpcode()) { + default: + return false; + case Nios2::RetRA: + BuildMI(MBB, MI, MI.getDebugLoc(), get(Nios2::RET_R1)).addReg(Nios2::RA); + break; + } + + MBB.erase(MI); + return true; } -const Nios2RegisterInfo &Nios2InstrInfo::getRegisterInfo() const { return RI; } +void Nios2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + unsigned opc = Subtarget.hasNios2r2() ? Nios2::ADD_R2 : Nios2::ADD_R1; + BuildMI(MBB, I, DL, get(opc)) + .addReg(DestReg, RegState::Define) + .addReg(Nios2::ZERO) + .addReg(SrcReg, getKillRegState(KillSrc)); +} diff --git a/lib/Target/Nios2/Nios2InstrInfo.h b/lib/Target/Nios2/Nios2InstrInfo.h index 6a0a050c839e..52f6e7e9c7c8 100644 --- a/lib/Target/Nios2/Nios2InstrInfo.h +++ b/lib/Target/Nios2/Nios2InstrInfo.h @@ -14,10 +14,7 @@ #ifndef LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H #define LLVM_LIB_TARGET_NIOS2_NIOS2INSTRINFO_H -#include "Nios2.h" #include "Nios2RegisterInfo.h" - -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER @@ -25,22 +22,27 @@ namespace llvm { +class Nios2Subtarget; + class Nios2InstrInfo : public Nios2GenInstrInfo { -protected: - const Nios2Subtarget &Subtarget; const Nios2RegisterInfo RI; + const Nios2Subtarget &Subtarget; + virtual void anchor(); public: - explicit Nios2InstrInfo(const Nios2Subtarget &STI) - : Nios2GenInstrInfo(), Subtarget(STI), RI(STI) {} - - static const Nios2InstrInfo *create(Nios2Subtarget &STI); + explicit Nios2InstrInfo(Nios2Subtarget &ST); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - const Nios2RegisterInfo &getRegisterInfo() const; + const Nios2RegisterInfo &getRegisterInfo() const { return RI; }; + + bool expandPostRAPseudo(MachineInstr &MI) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; }; } // namespace llvm diff --git a/lib/Target/Nios2/Nios2InstrInfo.td b/lib/Target/Nios2/Nios2InstrInfo.td index 5e4815ab3e16..dee84f74bcbe 100644 --- a/lib/Target/Nios2/Nios2InstrInfo.td +++ b/lib/Target/Nios2/Nios2InstrInfo.td @@ -17,11 +17,12 @@ include "Nios2InstrFormats.td" + //===----------------------------------------------------------------------===// // Nios2 Operand, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// -def simm16 : Operand { +def simm16 : Operand { let DecoderMethod= "DecodeSimm16"; } @@ -29,22 +30,80 @@ def simm16 : Operand { // e.g. addi, andi def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; +// Custom return SDNode +def Nios2Ret : SDNode<"Nios2ISD::Ret", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// -// Arithmetic and logical instructions with 2 register operands. -class ArithLogicI op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type, RegisterClass RC> : - FI { - let isReMaterializable = 1; +// Arithmetic and logical instructions with 2 registers and 16-bit immediate +// value. +multiclass ArithLogicRegImm16 op, string mnemonic, SDNode opNode, + Operand immOp, PatLeaf immType>: + CommonInstr_I_F2I16; + +// Arithmetic and logical instructions with 3 register operands. +// Defines R1 and R2 instruction at the same time. +multiclass ArithLogicReg opx, string mnemonic, + SDNode opNode>: + CommonInstr_R_F3X6; + +multiclass Return opx, dag outs, dag ins, string mnemonic> { + let rB = 0, rC = 0, + isReturn = 1, + isCodeGenOnly = 1, + hasCtrlDep = 1, + hasExtraSrcRegAllocReq = 1 in { + defm NAME# : CommonInstr_R_F3X6; + } } //===----------------------------------------------------------------------===// -// Nios2 R1 Instructions +// Nios2 Instructions //===----------------------------------------------------------------------===// +/// Arithmetic instructions operating on registers. +let isCommutable = 1 , + isReMaterializable = 1 in { + defm ADD : ArithLogicReg<0x31, "add", add>; + defm AND : ArithLogicReg<0x0e, "and", and>; + defm OR : ArithLogicReg<0x16, "or", or>; + defm XOR : ArithLogicReg<0x1e, "xor", xor>; + defm MUL : ArithLogicReg<0x27, "mul", mul>; +} + +let isReMaterializable = 1 in { + defm SUB : ArithLogicReg<0x39, "sub", sub>; +} + +defm DIVU : ArithLogicReg<0x24, "divu", udiv>; +defm DIV : ArithLogicReg<0x25, "div", sdiv>; + +defm SLL : ArithLogicReg<0x13, "sll", shl>; +defm SRL : ArithLogicReg<0x1b, "srl", srl>; +defm SRA : ArithLogicReg<0x3b, "sra", sra>; + /// Arithmetic Instructions (ALU Immediate) -def ADDi : ArithLogicI<0x04, "addi", add, simm16, immSExt16, CPURegs>; +defm ADDI : ArithLogicRegImm16<0x04, "addi", add, simm16, immSExt16>; + +// Returns: +defm RET : Return<0x05, (outs), (ins CPURegs:$rA), "ret">; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +// Return RA. +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in +def RetRA : Nios2Pseudo<(outs), (ins), "", [(Nios2Ret)]>; diff --git a/lib/Target/Nios2/Nios2MCInstLower.cpp b/lib/Target/Nios2/Nios2MCInstLower.cpp new file mode 100644 index 000000000000..c43af879b8a6 --- /dev/null +++ b/lib/Target/Nios2/Nios2MCInstLower.cpp @@ -0,0 +1,117 @@ +//===-- Nios2MCInstLower.cpp - Convert Nios2 MachineInstr to MCInst -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Nios2 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/Nios2BaseInfo.h" +#include "MCTargetDesc/Nios2MCExpr.h" +#include "Nios2.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" + +using namespace llvm; + +static MCOperand LowerSymbolOperand(const MachineOperand &MO, AsmPrinter &AP) { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + Nios2MCExpr::Nios2ExprKind TargetKind = Nios2MCExpr::CEK_None; + const MCSymbol *Symbol; + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Invalid target flag!"); + case Nios2FG::MO_NO_FLAG: + break; + case Nios2FG::MO_ABS_HI: + TargetKind = Nios2MCExpr::CEK_ABS_HI; + break; + case Nios2FG::MO_ABS_LO: + TargetKind = Nios2MCExpr::CEK_ABS_LO; + break; + } + + switch (MO.getType()) { + case MachineOperand::MO_GlobalAddress: + Symbol = AP.getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AP.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AP.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AP.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AP.GetCPISymbol(MO.getIndex()); + break; + + default: + llvm_unreachable(""); + } + + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, AP.OutContext); + + if (TargetKind != Nios2MCExpr::CEK_None) + Expr = Nios2MCExpr::create(TargetKind, Expr, AP.OutContext); + + return MCOperand::createExpr(Expr); +} + +static MCOperand LowerOperand(const MachineOperand &MO, AsmPrinter &AP) { + + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + break; + return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm()); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MO, AP); + case MachineOperand::MO_RegisterMask: + break; + } + + return MCOperand(); +} + +void llvm::LowerNios2MachineInstToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP) { + + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MO, AP); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/Nios2/Nios2MachineFunction.cpp b/lib/Target/Nios2/Nios2MachineFunction.cpp new file mode 100644 index 000000000000..be5b8829fe36 --- /dev/null +++ b/lib/Target/Nios2/Nios2MachineFunction.cpp @@ -0,0 +1,14 @@ +//===-- Nios2MachineFunctionInfo.cpp - Private data used for Nios2 --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2MachineFunction.h" + +using namespace llvm; + +void Nios2FunctionInfo::anchor() {} diff --git a/lib/Target/Nios2/Nios2MachineFunction.h b/lib/Target/Nios2/Nios2MachineFunction.h new file mode 100644 index 000000000000..73baf9694790 --- /dev/null +++ b/lib/Target/Nios2/Nios2MachineFunction.h @@ -0,0 +1,62 @@ +//===-- Nios2MachineFunctionInfo.h - Private data used for Nios2 --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Nios2 specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2MACHINEFUNCTION_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// Nios2FunctionInfo - This class is derived from MachineFunction private +/// Nios2 target-specific information for each MachineFunction. +class Nios2FunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + +private: + unsigned GlobalBaseReg; + + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + + /// SRetReturnReg - Holds the virtual register into which the sret + /// argument is passed. + unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; + +public: + Nios2FunctionInfo() + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} + explicit Nios2FunctionInfo(MachineFunction &MF) + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0), + IsLeafProc(false) {} + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } +}; + +} // end of namespace llvm + +#endif // NIOS2_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/Nios2/Nios2RegisterInfo.cpp b/lib/Target/Nios2/Nios2RegisterInfo.cpp index b938c48e37c8..9b892f917535 100644 --- a/lib/Target/Nios2/Nios2RegisterInfo.cpp +++ b/lib/Target/Nios2/Nios2RegisterInfo.cpp @@ -32,11 +32,16 @@ const TargetRegisterClass *Nios2RegisterInfo::intRegClass(unsigned Size) const { const MCPhysReg * Nios2RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return nullptr; + return CSR_SaveList; } BitVector Nios2RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(1); + static const MCPhysReg ReservedCPURegs[] = {Nios2::ZERO, Nios2::AT, Nios2::SP, + Nios2::RA, Nios2::PC, Nios2::GP}; + BitVector Reserved(getNumRegs()); + + for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I) + Reserved.set(ReservedCPURegs[I]); return Reserved; } @@ -46,5 +51,5 @@ void Nios2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, RegScavenger *RS) const {} unsigned Nios2RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return 0; + return Nios2::SP; } diff --git a/lib/Target/Nios2/Nios2Subtarget.cpp b/lib/Target/Nios2/Nios2Subtarget.cpp index 6176d37fea0e..196bed20cdcc 100644 --- a/lib/Target/Nios2/Nios2Subtarget.cpp +++ b/lib/Target/Nios2/Nios2Subtarget.cpp @@ -12,10 +12,7 @@ //===----------------------------------------------------------------------===// #include "Nios2Subtarget.h" - #include "Nios2.h" -#include "Nios2RegisterInfo.h" -#include "Nios2TargetMachine.h" using namespace llvm; @@ -28,19 +25,17 @@ using namespace llvm; void Nios2Subtarget::anchor() {} Nios2Subtarget::Nios2Subtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, - const Nios2TargetMachine &_TM) + const std::string &FS, const TargetMachine &TM) : // Nios2GenSubtargetInfo will display features by llc -march=nios2 // -mcpu=help - Nios2GenSubtargetInfo(TT, CPU, FS), TM(_TM), TargetTriple(TT), - InstrInfo(Nios2InstrInfo::create( - initializeSubtargetDependencies(CPU, FS, TM))) {} + Nios2GenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering(*this) {} -Nios2Subtarget & -Nios2Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, - const TargetMachine &TM) { +Nios2Subtarget &Nios2Subtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { if (TargetTriple.getArch() == Triple::nios2) { if (CPU != "nios2r2") { CPU = "nios2r1"; diff --git a/lib/Target/Nios2/Nios2Subtarget.h b/lib/Target/Nios2/Nios2Subtarget.h index b03a291946db..a822dff33b5b 100644 --- a/lib/Target/Nios2/Nios2Subtarget.h +++ b/lib/Target/Nios2/Nios2Subtarget.h @@ -15,7 +15,10 @@ #define LLVM_LIB_TARGET_NIOS2_NIOS2SUBTARGET_H #include "Nios2FrameLowering.h" +#include "Nios2ISelLowering.h" #include "Nios2InstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #define GET_SUBTARGETINFO_HEADER @@ -51,18 +54,18 @@ class Nios2Subtarget : public Nios2GenSubtargetInfo { // Nios2 architecture version Nios2ArchEnum Nios2ArchVersion; - const Nios2TargetMachine &TM; - Triple TargetTriple; - std::unique_ptr InstrInfo; - std::unique_ptr FrameLowering; + Nios2InstrInfo InstrInfo; + Nios2TargetLowering TLInfo; + SelectionDAGTargetInfo TSInfo; + Nios2FrameLowering FrameLowering; public: /// This constructor initializes the data members to match that /// of the specified triple. Nios2Subtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, const Nios2TargetMachine &_TM); + const std::string &FS, const TargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -73,14 +76,20 @@ class Nios2Subtarget : public Nios2GenSubtargetInfo { bool hasNios2r2() const { return Nios2ArchVersion >= Nios2r2; } bool isNios2r2() const { return Nios2ArchVersion == Nios2r2; } - Nios2Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, - const TargetMachine &TM); + Nios2Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + const Nios2InstrInfo *getInstrInfo() const override { return &InstrInfo; } const TargetFrameLowering *getFrameLowering() const override { - return FrameLowering.get(); + return &FrameLowering; } const Nios2RegisterInfo *getRegisterInfo() const override { - return &InstrInfo->getRegisterInfo(); + return &InstrInfo.getRegisterInfo(); + } + const Nios2TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; } }; } // namespace llvm diff --git a/lib/Target/Nios2/Nios2TargetMachine.cpp b/lib/Target/Nios2/Nios2TargetMachine.cpp index 7370dac2ce38..b7594dde709d 100644 --- a/lib/Target/Nios2/Nios2TargetMachine.cpp +++ b/lib/Target/Nios2/Nios2TargetMachine.cpp @@ -13,6 +13,7 @@ #include "Nios2TargetMachine.h" #include "Nios2.h" +#include "Nios2TargetObjectFile.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" @@ -36,14 +37,27 @@ static Reloc::Model getEffectiveRelocModel(Optional RM) { return *RM; } +static CodeModel::Model getEffectiveCodeModel(Optional CM, + Reloc::Model RM, bool JIT) { + if (CM) + return *CM; + return CodeModel::Small; +} + Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine(T, computeDataLayout(), TT, CPU, FS, Options, - getEffectiveRelocModel(RM), *CM, OL) {} + : LLVMTargetMachine( + T, computeDataLayout(), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, getEffectiveRelocModel(RM), JIT), OL), + TLOF(make_unique()), + Subtarget(TT, CPU, FS, *this) { + initAsmInfo(); +} Nios2TargetMachine::~Nios2TargetMachine() {} @@ -82,6 +96,7 @@ class Nios2PassConfig : public TargetPassConfig { } void addCodeGenPrepare() override; + bool addInstSelector() override; void addIRPasses() override; }; } // namespace @@ -95,3 +110,10 @@ void Nios2PassConfig::addCodeGenPrepare() { } void Nios2PassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } + +// Install an instruction selector pass using +// the ISelDag to gen Nios2 code. +bool Nios2PassConfig::addInstSelector() { + addPass(createNios2ISelDag(getNios2TargetMachine(), getOptLevel())); + return false; +} diff --git a/lib/Target/Nios2/Nios2TargetMachine.h b/lib/Target/Nios2/Nios2TargetMachine.h index 503187778c1b..1ebfb397383e 100644 --- a/lib/Target/Nios2/Nios2TargetMachine.h +++ b/lib/Target/Nios2/Nios2TargetMachine.h @@ -20,6 +20,8 @@ namespace llvm { class Nios2TargetMachine : public LLVMTargetMachine { mutable StringMap> SubtargetMap; + std::unique_ptr TLOF; + Nios2Subtarget Subtarget; public: Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -28,8 +30,13 @@ class Nios2TargetMachine : public LLVMTargetMachine { CodeGenOpt::Level OL, bool JIT); ~Nios2TargetMachine() override; + const Nios2Subtarget *getSubtargetImpl() const { return &Subtarget; } const Nios2Subtarget *getSubtargetImpl(const Function &F) const override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; }; diff --git a/lib/Target/Nios2/Nios2TargetObjectFile.cpp b/lib/Target/Nios2/Nios2TargetObjectFile.cpp new file mode 100644 index 000000000000..5fc85ef487e6 --- /dev/null +++ b/lib/Target/Nios2/Nios2TargetObjectFile.cpp @@ -0,0 +1,18 @@ +//===-- Nios2TargetObjectFile.cpp - Nios2 Object Files --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2TargetObjectFile.h" + +using namespace llvm; + +void Nios2TargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/Nios2/Nios2TargetObjectFile.h b/lib/Target/Nios2/Nios2TargetObjectFile.h new file mode 100644 index 000000000000..28d7ff0ec668 --- /dev/null +++ b/lib/Target/Nios2/Nios2TargetObjectFile.h @@ -0,0 +1,28 @@ +//===-- llvm/Target/Nios2TargetObjectFile.h - Nios2 Object Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETOBJECTFILE_H + +#include "Nios2TargetMachine.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +class Nios2TargetObjectFile : public TargetLoweringObjectFileELF { + const Nios2TargetMachine *TM; + +public: + Nios2TargetObjectFile() : TargetLoweringObjectFileELF() {} + + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/Nios2/Nios2TargetStreamer.h b/lib/Target/Nios2/Nios2TargetStreamer.h new file mode 100644 index 000000000000..63e4e3ccdc64 --- /dev/null +++ b/lib/Target/Nios2/Nios2TargetStreamer.h @@ -0,0 +1,32 @@ +//===-- Nios2TargetStreamer.h - Nios2 Target Streamer ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETSTREAMER_H + +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class Nios2TargetStreamer : public MCTargetStreamer { +public: + Nios2TargetStreamer(MCStreamer &S); +}; + +// This part is for ascii assembly output +class Nios2TargetAsmStreamer : public Nios2TargetStreamer { + formatted_raw_ostream &OS; + +public: + Nios2TargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); +}; + +} // namespace llvm +#endif diff --git a/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp index e317686140f7..d808a96db772 100644 --- a/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp +++ b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp @@ -20,5 +20,5 @@ Target &llvm::getTheNios2Target() { extern "C" void LLVMInitializeNios2TargetInfo() { RegisterTarget - X(getTheNios2Target(), "nios2", "Nios2"); + X(getTheNios2Target(), "nios2", "Nios2", "Nios2"); } diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 2fbf51007c3d..d6db354e0215 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -393,6 +393,10 @@ struct PPCOperand : public MCParsedAsmOperand { /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const override { return EndLoc; } + /// getLocRange - Get the range between the first and last token of this + /// operand. + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// isPPC64 - True if this operand is for an instruction in 64-bit mode. bool isPPC64() const { return IsPPC64; } @@ -1268,6 +1272,9 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst, } } +static std::string PPCMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); + bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -1283,8 +1290,13 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return false; case Match_MissingFeature: return Error(IDLoc, "instruction use requires an option to be enabled"); - case Match_MnemonicFail: - return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = PPCMnemonicSpellCheck( + ((PPCOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, + ((PPCOperand &)*Operands[0]).getLocRange()); + } case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0ULL) { @@ -1920,6 +1932,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() { #define GET_REGISTER_MATCHER #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "PPCGenAsmMatcher.inc" // Define this matcher function after the auto-generated include so we diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 4aa6dfab5257..3f173787114d 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -39,9 +39,11 @@ add_llvm_target(PowerPCCodeGen PPCTOCRegDeps.cpp PPCTLSDynamicCall.cpp PPCVSXCopy.cpp + PPCReduceCRLogicals.cpp PPCVSXFMAMutate.cpp PPCVSXSwapRemoval.cpp PPCExpandISEL.cpp + PPCPreEmitPeephole.cpp ) add_subdirectory(AsmParser) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index be6fe7b7ad65..ea709a73ebf2 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -39,6 +39,12 @@ static cl::opt ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false), cl::desc("Prints full register names with vs{31-63} as v{0-31}")); +// Prints full register names with percent symbol. +static cl::opt +FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden, + cl::init(false), + cl::desc("Prints full register names with percent")); + #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" @@ -445,28 +451,57 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); } +/// showRegistersWithPercentPrefix - Check if this register name should be +/// printed with a percentage symbol as prefix. +bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const { + if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX) + return false; -/// stripRegisterPrefix - This method strips the character prefix from a -/// register name so that only the number is left. Used by for linux asm. -static const char *stripRegisterPrefix(const char *RegName, unsigned RegNum, - unsigned RegEncoding) { - if (FullRegNames) { - if (RegNum >= PPC::CR0EQ && RegNum <= PPC::CR7UN) { - const char *CRBits[] = - { "lt", "gt", "eq", "un", - "4*cr1+lt", "4*cr1+gt", "4*cr1+eq", "4*cr1+un", - "4*cr2+lt", "4*cr2+gt", "4*cr2+eq", "4*cr2+un", - "4*cr3+lt", "4*cr3+gt", "4*cr3+eq", "4*cr3+un", - "4*cr4+lt", "4*cr4+gt", "4*cr4+eq", "4*cr4+un", - "4*cr5+lt", "4*cr5+gt", "4*cr5+eq", "4*cr5+un", - "4*cr6+lt", "4*cr6+gt", "4*cr6+eq", "4*cr6+un", - "4*cr7+lt", "4*cr7+gt", "4*cr7+eq", "4*cr7+un" - }; - return CRBits[RegEncoding]; - } - return RegName; + switch (RegName[0]) { + default: + return false; + case 'r': + case 'f': + case 'q': + case 'v': + case 'c': + return true; } +} + +/// getVerboseConditionalRegName - This method expands the condition register +/// when requested explicitly or targetting Darwin. +const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum, + unsigned RegEncoding) + const { + if (!TT.isOSDarwin() && !FullRegNames) + return nullptr; + if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN) + return nullptr; + const char *CRBits[] = { + "lt", "gt", "eq", "un", + "4*cr1+lt", "4*cr1+gt", "4*cr1+eq", "4*cr1+un", + "4*cr2+lt", "4*cr2+gt", "4*cr2+eq", "4*cr2+un", + "4*cr3+lt", "4*cr3+gt", "4*cr3+eq", "4*cr3+un", + "4*cr4+lt", "4*cr4+gt", "4*cr4+eq", "4*cr4+un", + "4*cr5+lt", "4*cr5+gt", "4*cr5+eq", "4*cr5+un", + "4*cr6+lt", "4*cr6+gt", "4*cr6+eq", "4*cr6+un", + "4*cr7+lt", "4*cr7+gt", "4*cr7+eq", "4*cr7+un" + }; + return CRBits[RegEncoding]; +} +// showRegistersWithPrefix - This method determines whether registers +// should be number-only or include the prefix. +bool PPCInstPrinter::showRegistersWithPrefix() const { + if (TT.getOS() == Triple::AIX) + return false; + return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames; +} + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. +static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': @@ -502,10 +537,14 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, Reg = PPC::VSX32 + (Reg - PPC::VF0); } - const char *RegName = getRegisterName(Reg); - // The linux and AIX assembler does not take register prefixes. - if (!isDarwinSyntax()) - RegName = stripRegisterPrefix(RegName, Reg, MRI.getEncodingValue(Reg)); + const char *RegName; + RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg)); + if (RegName == nullptr) + RegName = getRegisterName(Reg); + if (showRegistersWithPercentPrefix(RegName)) + O << "%"; + if (!showRegistersWithPrefix()) + RegName = stripRegisterPrefix(RegName); O << RegName; return; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 9c79ffb1176c..f000fbb98110 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -14,21 +14,24 @@ #ifndef LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H #define LLVM_LIB_TARGET_POWERPC_INSTPRINTER_PPCINSTPRINTER_H +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { class PPCInstPrinter : public MCInstPrinter { - bool IsDarwin; + Triple TT; +private: + bool showRegistersWithPercentPrefix(const char *RegName) const; + bool showRegistersWithPrefix() const; + const char *getVerboseConditionRegName(unsigned RegNum, + unsigned RegEncoding) const; + public: PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, bool isDarwin) - : MCInstPrinter(MAI, MII, MRI), IsDarwin(isDarwin) {} - - bool isDarwinSyntax() const { - return IsDarwin; - } - + const MCRegisterInfo &MRI, Triple T) + : MCInstPrinter(MAI, MII, MRI), TT(T) {} + void printRegName(raw_ostream &OS, unsigned RegNo) const override; void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 2a1de244da92..728e7757fd28 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -231,9 +232,10 @@ namespace { } // end anonymous namespace MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); if (TT.isOSDarwin()) return new DarwinPPCAsmBackend(T); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 7044835cb8a9..a1e4e07b25af 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -239,7 +239,7 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin()); + return new PPCInstPrinter(MAI, MII, MRI, T); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 99fec6c554b0..d47b9a6e452c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -29,6 +29,7 @@ class MCContext; class MCInstrInfo; class MCObjectWriter; class MCRegisterInfo; +class MCSubtargetInfo; class MCTargetOptions; class Target; class Triple; @@ -43,8 +44,8 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); /// Construct an PPC ELF object writer. @@ -101,6 +102,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { // Defines symbolic names for the PowerPC instructions. // #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_SCHED_ENUM #include "PPCGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 4b9f0e1b1b75..dfdec246e868 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -41,6 +41,7 @@ namespace llvm { FunctionPass *createPPCVSXCopyPass(); FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCVSXSwapRemovalPass(); + FunctionPass *createPPCReduceCRLogicalsPass(); FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCBranchCoalescingPass(); @@ -49,6 +50,7 @@ namespace llvm { FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); FunctionPass *createPPCExpandISELPass(); + FunctionPass *createPPCPreEmitPeepholePass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, @@ -58,7 +60,9 @@ namespace llvm { void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); + void initializePPCPreEmitPeepholePass(PassRegistry &); void initializePPCTLSDynamicCallPass(PassRegistry &); + void initializePPCMIPeepholePass(PassRegistry&); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 7fee5ff1bf8f..17451900840a 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -507,7 +507,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; bool isPPC64 = Subtarget->isPPC64(); bool isDarwin = TM.getTargetTriple().isOSDarwin(); - const Module *M = MF->getFunction()->getParent(); + const Module *M = MF->getFunction().getParent(); PICLevel::Level PL = M->getPICLevel(); // Lower multi-instruction pseudo operations. @@ -521,7 +521,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return LowerPATCHPOINT(SM, *MI); case PPC::MoveGOTtoLR: { - // Transform %LR = MoveGOTtoLR + // Transform %lr = MoveGOTtoLR // Into this: bl _GLOBAL_OFFSET_TABLE_@local-4 // _GLOBAL_OFFSET_TABLE_@local-4 (instruction preceding // _GLOBAL_OFFSET_TABLE_) has exactly one instruction: @@ -542,7 +542,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::MovePCtoLR: case PPC::MovePCtoLR8: { - // Transform %LR = MovePCtoLR + // Transform %lr = MovePCtoLR // Into this, where the label is the PIC base: // bl L1$pb // L1$pb: @@ -560,9 +560,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::UpdateGBR: { - // Transform %Rd = UpdateGBR(%Rt, %Ri) - // Into: lwz %Rt, .L0$poff - .L0$pb(%Ri) - // add %Rd, %Rt, %Ri + // Transform %rd = UpdateGBR(%rt, %ri) + // Into: lwz %rt, .L0$poff - .L0$pb(%ri) + // add %rd, %rt, %ri // Get the offset from the GOT Base Register to the GOT LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); MCSymbol *PICOffset = @@ -577,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCOperand TR = TmpInst.getOperand(1); const MCOperand PICR = TmpInst.getOperand(0); - // Step 1: lwz %Rt, .L$poff - .L$pb(%Ri) + // Step 1: lwz %rt, .L$poff - .L$pb(%ri) TmpInst.getOperand(1) = MCOperand::createExpr(MCBinaryExpr::createSub(Exp, PB, OutContext)); TmpInst.getOperand(0) = TR; @@ -592,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LWZtoc: { - // Transform %R3 = LWZtoc , %R2 + // Transform %r3 = LWZtoc @min1, %r2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LWZ, and the global address operand to be a @@ -636,7 +636,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::LDtocCPT: case PPC::LDtocBA: case PPC::LDtoc: { - // Transform %X3 = LDtoc , %X2 + // Transform %x3 = LDtoc @min1, %x2 LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD, and the global address operand to be a @@ -667,7 +667,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDIStocHA: { - // Transform %Xd = ADDIStocHA %X2, + // Transform %xd = ADDIStocHA %x2, @sym LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDIS8. If the global address is external, has @@ -714,7 +714,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::LDtocL: { - // Transform %Xd = LDtocL , %Xs + // Transform %xd = LDtocL @sym, %xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. If the global address is external, has @@ -757,7 +757,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItocL: { - // Transform %Xd = ADDItocL %Xs, + // Transform %xd = ADDItocL %xs, @sym LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to ADDI8. If the global address is external, then @@ -788,8 +788,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDISgotTprelHA: { - // Transform: %Xd = ADDISgotTprelHA %X2, - // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + // Transform: %xd = ADDISgotTprelHA %x2, @sym + // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -805,7 +805,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { - // Transform %Xd = LDgotTprelL , %Xs + // Transform %xd = LDgotTprelL @sym, %xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin); // Change the opcode to LD. @@ -866,8 +866,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDIStlsgdHA: { - // Transform: %Xd = ADDIStlsgdHA %X2, - // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + // Transform: %xd = ADDIStlsgdHA %x2, @sym + // Into: %xd = ADDIS8 %x2, sym@got@tlsgd@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -882,11 +882,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItlsgdL: - // Transform: %Xd = ADDItlsgdL %Xs, - // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l + // Transform: %xd = ADDItlsgdL %xs, @sym + // Into: %xd = ADDI8 %xs, sym@got@tlsgd@l case PPC::ADDItlsgdL32: { - // Transform: %Rd = ADDItlsgdL32 %Rs, - // Into: %Rd = ADDI %Rs, sym@got@tlsgd + // Transform: %rd = ADDItlsgdL32 %rs, @sym + // Into: %rd = ADDI %rs, sym@got@tlsgd const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -902,17 +902,17 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::GETtlsADDR: - // Transform: %X3 = GETtlsADDR %X3, + // Transform: %x3 = GETtlsADDR %x3, @sym // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd) case PPC::GETtlsADDR32: { - // Transform: %R3 = GETtlsADDR32 %R3, + // Transform: %r3 = GETtlsADDR32 %r3, @sym // Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD); return; } case PPC::ADDIStlsldHA: { - // Transform: %Xd = ADDIStlsldHA %X2, - // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha + // Transform: %xd = ADDIStlsldHA %x2, @sym + // Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha assert(Subtarget->isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -927,11 +927,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDItlsldL: - // Transform: %Xd = ADDItlsldL %Xs, - // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l + // Transform: %xd = ADDItlsldL %xs, @sym + // Into: %xd = ADDI8 %xs, sym@got@tlsld@l case PPC::ADDItlsldL32: { - // Transform: %Rd = ADDItlsldL32 %Rs, - // Into: %Rd = ADDI %Rs, sym@got@tlsld + // Transform: %rd = ADDItlsldL32 %rs, @sym + // Into: %rd = ADDI %rs, sym@got@tlsld const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -947,20 +947,20 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::GETtlsldADDR: - // Transform: %X3 = GETtlsldADDR %X3, + // Transform: %x3 = GETtlsldADDR %x3, @sym // Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld) case PPC::GETtlsldADDR32: { - // Transform: %R3 = GETtlsldADDR32 %R3, + // Transform: %r3 = GETtlsldADDR32 %r3, @sym // Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD); return; } case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %Xs, - // Into: %Xd = ADDIS8 %Xs, sym@dtprel@ha + // Transform: %xd = ADDISdtprelHA %xs, @sym + // Into: %xd = ADDIS8 %xs, sym@dtprel@ha case PPC::ADDISdtprelHA32: { - // Transform: %Rd = ADDISdtprelHA32 %Rs, - // Into: %Rd = ADDIS %Rs, sym@dtprel@ha + // Transform: %rd = ADDISdtprelHA32 %rs, @sym + // Into: %rd = ADDIS %rs, sym@dtprel@ha const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -976,11 +976,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case PPC::ADDIdtprelL: - // Transform: %Xd = ADDIdtprelL %Xs, - // Into: %Xd = ADDI8 %Xs, sym@dtprel@l + // Transform: %xd = ADDIdtprelL %xs, @sym + // Into: %xd = ADDI8 %xs, sym@dtprel@l case PPC::ADDIdtprelL32: { - // Transform: %Rd = ADDIdtprelL32 %Rs, - // Into: %Rd = ADDI %Rs, sym@dtprel@l + // Transform: %rd = ADDIdtprelL32 %rs, @sym + // Into: %rd = ADDI %rs, sym@dtprel@l const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = getSymbol(GValue); @@ -997,8 +997,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::MFOCRF: case PPC::MFOCRF8: if (!Subtarget->hasMFOCRF()) { - // Transform: %R3 = MFOCRF %CR7 - // Into: %R3 = MFCR ;; cr7 + // Transform: %r3 = MFOCRF %cr7 + // Into: %r3 = MFCR ;; cr7 unsigned NewOpcode = MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8; OutStreamer->AddComment(PPCInstPrinter:: @@ -1011,8 +1011,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { case PPC::MTOCRF: case PPC::MTOCRF8: if (!Subtarget->hasMFOCRF()) { - // Transform: %CR7 = MTOCRF %R3 - // Into: MTCRF mask, %R3 ;; cr7 + // Transform: %cr7 = MTOCRF %r3 + // Into: MTCRF mask, %r3 ;; cr7 unsigned NewOpcode = MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8; unsigned Mask = 0x80 >> OutContext.getRegisterInfo() @@ -1228,7 +1228,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // linux/ppc32 - Normal entry label. if (!Subtarget->isPPC64() && (!isPositionIndependent() || - MF->getFunction()->getParent()->getPICLevel() == PICLevel::SmallPIC)) + MF->getFunction().getParent()->getPICLevel() == PICLevel::SmallPIC)) return AsmPrinter::EmitFunctionEntryLabel(); if (!Subtarget->isPPC64()) { diff --git a/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 6e1cd1323e6c..32d801b13ded 100644 --- a/lib/Target/PowerPC/PPCBranchCoalescing.cpp +++ b/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -59,68 +59,68 @@ namespace llvm { /// /// expands to the following machine code: /// -/// BB#0: derived from LLVM BB %entry -/// Live Ins: %F1 %F3 %X6 +/// %bb.0: derived from LLVM BB %entry +/// Live Ins: %f1 %f3 %x6 /// -/// %vreg0 = COPY %F1; F8RC:%vreg0 -/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 -/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; -/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 -/// BCC 76, %vreg5, ; CRRC:%vreg5 -/// Successors according to CFG: BB#1(?%) BB#2(?%) -/// -/// BB#1: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 -/// Successors according to CFG: BB#2(?%) -/// -/// BB#2: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 BB#1 -/// %vreg9 = PHI %vreg8, , %vreg0, ; -/// F8RC:%vreg9,%vreg8,%vreg0 +/// %0 = COPY %f1; F8RC:%0 +/// %5 = CMPLWI killed %4, 0; CRRC:%5 GPRC:%4 +/// %8 = LXSDX %zero8, killed %7, implicit %rm; +/// mem:LD8[ConstantPool] F8RC:%8 G8RC:%7 +/// BCC 76, %5, <%bb.2>; CRRC:%5 +/// Successors according to CFG: %bb.1(?%) %bb.2(?%) +/// +/// %bb.1: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.0 +/// Successors according to CFG: %bb.2(?%) +/// +/// %bb.2: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.0 %bb.1 +/// %9 = PHI %8, <%bb.1>, %0, <%bb.0>; +/// F8RC:%9,%8,%0 /// -/// BCC 76, %vreg5, ; CRRC:%vreg5 -/// Successors according to CFG: BB#3(?%) BB#4(?%) +/// BCC 76, %5, <%bb.4>; CRRC:%5 +/// Successors according to CFG: %bb.3(?%) %bb.4(?%) /// -/// BB#3: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#2 -/// Successors according to CFG: BB#4(?%) +/// %bb.3: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.2 +/// Successors according to CFG: %bb.4(?%) /// -/// BB#4: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#2 BB#3 -/// %vreg13 = PHI %vreg12, , %vreg2, ; -/// F8RC:%vreg13,%vreg12,%vreg2 +/// %bb.4: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.2 %bb.3 +/// %13 = PHI %12, <%bb.3>, %2, <%bb.2>; +/// F8RC:%13,%12,%2 /// -/// BLR8 %LR8, %RM, %F1 +/// BLR8 implicit %lr8, implicit %rm, implicit %f1 /// /// When this pattern is detected, branch coalescing will try to collapse -/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3. +/// it by moving code in %bb.2 to %bb.0 and/or %bb.4 and removing %bb.3. /// /// If all conditions are meet, IR should collapse to: /// -/// BB#0: derived from LLVM BB %entry -/// Live Ins: %F1 %F3 %X6 +/// %bb.0: derived from LLVM BB %entry +/// Live Ins: %f1 %f3 %x6 /// -/// %vreg0 = COPY %F1; F8RC:%vreg0 -/// %vreg5 = CMPLWI %vreg4, 0; CRRC:%vreg5 GPRC:%vreg4 -/// %vreg8 = LXSDX %ZERO8, %vreg7, %RM; -/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7 +/// %0 = COPY %f1; F8RC:%0 +/// %5 = CMPLWI killed %4, 0; CRRC:%5 GPRC:%4 +/// %8 = LXSDX %zero8, killed %7, implicit %rm; +/// mem:LD8[ConstantPool] F8RC:%8 G8RC:%7 /// -/// BCC 76, %vreg5, ; CRRC:%vreg5 -/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%) -/// BB#4(0x55555554 / 0x80000000 = 66.67%) -/// -/// BB#1: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 -/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%) -/// -/// BB#4: derived from LLVM BB %entry -/// Predecessors according to CFG: BB#0 BB#1 -/// %vreg9 = PHI %vreg8, , %vreg0, ; -/// F8RC:%vreg9,%vreg8,%vreg0 -/// %vreg13 = PHI %vreg12, , %vreg2, ; -/// F8RC:%vreg13,%vreg12,%vreg2 +/// BCC 76, %5, <%bb.4>; CRRC:%5 +/// Successors according to CFG: %bb.1(0x2aaaaaaa / 0x80000000 = 33.33%) +/// %bb.4(0x55555554 / 0x80000000 = 66.67%) +/// +/// %bb.1: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.0 +/// Successors according to CFG: %bb.4(0x40000000 / 0x80000000 = 50.00%) +/// +/// %bb.4: derived from LLVM BB %entry +/// Predecessors according to CFG: %bb.0 %bb.1 +/// %9 = PHI %8, <%bb.1>, %0, <%bb.0>; +/// F8RC:%9,%8,%0 +/// %13 = PHI %12, <%bb.1>, %2, <%bb.0>; +/// F8RC:%13,%12,%2 /// -/// BLR8 %LR8, %RM, %F1 +/// BLR8 implicit %lr8, implicit %rm, implicit %f1 /// /// Branch Coalescing does not split blocks, it moves everything in the same /// direction ensuring it does not break use/definition semantics. @@ -714,7 +714,7 @@ bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || MF.empty()) + if (skipFunction(MF.getFunction()) || MF.empty()) return false; bool didSomething = false; diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 8784a8319029..96ad1c6d4036 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -403,15 +403,16 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { } if (Opcode) { - MVT VTy = TLI->getSimpleValueType( - *DL, CI->getArgOperand(0)->getType(), true); - if (VTy == MVT::Other) + EVT EVTy = + TLI->getValueType(*DL, CI->getArgOperand(0)->getType(), true); + + if (EVTy == MVT::Other) return true; - if (TLI->isOperationLegalOrCustom(Opcode, VTy)) + if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) continue; - else if (VTy.isVector() && - TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) + else if (EVTy.isVector() && + TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) continue; return true; @@ -690,12 +691,11 @@ static bool verifyCTRBranch(MachineBasicBlock *MBB, } if (I != BI && clobbersCTR(*I)) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" << - MBB->getFullName() << ") instruction " << *I << - " clobbers CTR, invalidating " << "BB#" << - BI->getParent()->getNumber() << " (" << - BI->getParent()->getFullName() << ") instruction " << - *BI << "\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << " (" << MBB->getFullName() + << ") instruction " << *I << " clobbers CTR, invalidating " + << printMBBReference(*BI->getParent()) << " (" + << BI->getParent()->getFullName() << ") instruction " << *BI + << "\n"); return false; } @@ -709,10 +709,10 @@ static bool verifyCTRBranch(MachineBasicBlock *MBB, if (CheckPreds) { queue_preds: if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) { - DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" << - BI->getParent()->getNumber() << " (" << - BI->getParent()->getFullName() << ") instruction " << - *BI << "\n"); + DEBUG(dbgs() << "Unable to find a MTCTR instruction for " + << printMBBReference(*BI->getParent()) << " (" + << BI->getParent()->getFullName() << ") instruction " << *BI + << "\n"); return false; } diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index 811e4dd9dfe1..1699463c0a4b 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -173,7 +173,7 @@ namespace { public: bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp index 41e3190c3eec..b00e98b63e34 100644 --- a/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -59,6 +59,8 @@ class PPCExpandISEL : public MachineFunctionPass { typedef SmallDenseMap ISELInstructionList; // A map of MBB numbers to their lists of contained ISEL instructions. + // Please note when we traverse this list and expand ISEL, we only remove + // the ISEL from the MBB not from this list. ISELInstructionList ISELInstructions; /// Initialize the object. @@ -124,9 +126,6 @@ class PPCExpandISEL : public MachineFunctionPass { #endif bool runOnMachineFunction(MachineFunction &MF) override { - if (!isExpandISELEnabled(MF)) - return false; - DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n"); initialize(MF); @@ -171,7 +170,7 @@ bool PPCExpandISEL::collectISELInstructions() { #ifndef NDEBUG void PPCExpandISEL::DumpISELInstructions() const { for (const auto &I : ISELInstructions) { - DEBUG(dbgs() << "BB#" << I.first << ":\n"); + DEBUG(dbgs() << printMBBReference(*MF->getBlockNumbered(I.first)) << ":\n"); for (const auto &VI : I.second) DEBUG(dbgs() << " "; VI->print(dbgs())); } @@ -190,26 +189,71 @@ bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) { } void PPCExpandISEL::expandAndMergeISELs() { + bool ExpandISELEnabled = isExpandISELEnabled(*MF); + for (auto &BlockList : ISELInstructions) { - DEBUG(dbgs() << "Expanding ISEL instructions in BB#" << BlockList.first + DEBUG(dbgs() << "Expanding ISEL instructions in " + << printMBBReference(*MF->getBlockNumbered(BlockList.first)) << "\n"); - BlockISELList &CurrentISELList = BlockList.second; auto I = CurrentISELList.begin(); auto E = CurrentISELList.end(); while (I != E) { - BlockISELList SubISELList; - - SubISELList.push_back(*I++); - - // Collect the ISELs that can be merged together. - while (I != E && canMerge(SubISELList.back(), *I)) + assert(isISEL(**I) && "Expecting an ISEL instruction"); + MachineOperand &Dest = (*I)->getOperand(0); + MachineOperand &TrueValue = (*I)->getOperand(1); + MachineOperand &FalseValue = (*I)->getOperand(2); + + // Special case 1, all registers used by ISEL are the same one. + // The non-redundant isel 0, 0, 0, N would not satisfy these conditions + // as it would be ISEL %R0, %ZERO, %R0, %CRN. + if (useSameRegister(Dest, TrueValue) && + useSameRegister(Dest, FalseValue)) { + DEBUG(dbgs() << "Remove redudant ISEL instruction: " << **I << "\n"); + // FIXME: if the CR field used has no other uses, we could eliminate the + // instruction that defines it. This would have to be done manually + // since this pass runs too late to run DCE after it. + NumRemoved++; + (*I)->eraseFromParent(); + I++; + } else if (useSameRegister(TrueValue, FalseValue)) { + // Special case 2, the two input registers used by ISEL are the same. + // Note: the non-foldable isel RX, 0, 0, N would not satisfy this + // condition as it would be ISEL %RX, %ZERO, %R0, %CRN, which makes it + // safe to fold ISEL to MR(OR) instead of ADDI. + MachineBasicBlock *MBB = (*I)->getParent(); + DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy:\n"); + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + NumFolded++; + // Note: we're using both the TrueValue and FalseValue operands so as + // not to lose the kill flag if it is set on either of them. + BuildMI(*MBB, (*I), dl, TII->get(isISEL8(**I) ? PPC::OR8 : PPC::OR)) + .add(Dest) + .add(TrueValue) + .add(FalseValue); + (*I)->eraseFromParent(); + I++; + } else if (ExpandISELEnabled) { // Normal cases expansion enabled + DEBUG(dbgs() << "Expand ISEL instructions:\n"); + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + BlockISELList SubISELList; SubISELList.push_back(*I++); - - expandMergeableISELs(SubISELList); - } - } + // Collect the ISELs that can be merged together. + // This will eat up ISEL instructions without considering whether they + // may be redundant or foldable to a register copy. So we still keep + // the handleSpecialCases() downstream to handle them. + while (I != E && canMerge(SubISELList.back(), *I)) { + DEBUG(dbgs() << "ISEL: " << **I << "\n"); + SubISELList.push_back(*I++); + } + + expandMergeableISELs(SubISELList); + } else { // Normal cases expansion disabled + I++; // leave the ISEL as it is + } + } // end while + } // end for } void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, @@ -232,13 +276,15 @@ void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, // Similarly, if at least one of the ISEL instructions satisfy the // following condition, we need the False Block: // The Dest Register and False Value Register are not the same. - bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue); bool IsORIInstRequired = !useSameRegister(Dest, FalseValue); // Special case 1, all registers used by ISEL are the same one. if (!IsADDIInstRequired && !IsORIInstRequired) { DEBUG(dbgs() << "Remove redudant ISEL instruction."); + // FIXME: if the CR field used has no other uses, we could eliminate the + // instruction that defines it. This would have to be done manually + // since this pass runs too late to run DCE after it. NumRemoved++; (*MI)->eraseFromParent(); // Setting MI to the erase result keeps the iterator valid and increased. @@ -253,14 +299,15 @@ void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL, // PPC::ZERO8 will be used for the first operand if the value is meant to // be zero. In this case, the useSameRegister method will return false, // thereby preventing this ISEL from being folded. - if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) { DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy."); NumFolded++; - BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI)) + // Note: we're using both the TrueValue and FalseValue operands so as + // not to lose the kill flag if it is set on either of them. + BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::OR8 : PPC::OR)) .add(Dest) .add(TrueValue) - .add(MachineOperand::CreateImm(0)); + .add(FalseValue); (*MI)->eraseFromParent(); // Setting MI to the erase result keeps the iterator valid and increased. MI = BIL.erase(MI); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index f34c2cd42852..402e29cdff72 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1991,9 +1991,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: - // LDtocL(GV, ADDIStocHA(%X2, GV)) + // LDtocL(GV, ADDIStocHA(%x2, GV)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%X2, GV), GV) + // ADDItocL(ADDIStocHA(%x2, GV), GV) // Either way, start with the ADDIStocHA: unsigned HighPartReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a01fdf9e676..7902da20a010 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -434,7 +434,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); unsigned LR = RegInfo->getRARegister(); - bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. !MustSaveLR(MF, LR) && // No need to save LR. @@ -499,7 +499,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -692,7 +692,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); DebugLoc dl; bool needsCFI = MMI.hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); + MF.getFunction().needsUnwindTableEntry(); // Get processor type. bool isPPC64 = Subtarget.isPPC64(); @@ -1505,7 +1505,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); if (MF.getTarget().Options.GuaranteedTailCallOpt && (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && - MF.getFunction()->getCallingConv() == CallingConv::Fast) { + MF.getFunction().getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); @@ -1531,11 +1531,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); - DebugLoc dl; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); + // If we got this far a first terminator should exist. + assert(MBBI != MBB.end() && "Failed to find the first terminator."); + DebugLoc dl = MBBI->getDebugLoc(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); // Create branch instruction for pseudo tail call return instruction diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 531b95a662e7..d3a223fe03e0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -69,6 +69,19 @@ using namespace llvm; #define DEBUG_TYPE "ppc-codegen" +STATISTIC(NumSextSetcc, + "Number of (sext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(NumZextSetcc, + "Number of (zext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(SignExtensionsAdded, + "Number of sign extensions for compare inputs added."); +STATISTIC(ZeroExtensionsAdded, + "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); +STATISTIC(OmittedForNonExtendUses, + "Number of compares not eliminated as they have non-extending uses."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -88,6 +101,29 @@ static cl::opt EnableBranchHint( cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); +enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, + ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, + ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; + +static cl::opt CmpInGPR( + "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), + cl::desc("Specify the types of comparisons to emit GPR-only code for."), + cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), + clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), + clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), + clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), + clEnumValN(ICGPR_NonExtIn, "nonextin", + "Only comparisons where inputs don't need [sz]ext."), + clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), + clEnumValN(ICGPR_ZextI32, "zexti32", + "Only i32 comparisons with zext result."), + clEnumValN(ICGPR_ZextI64, "zexti64", + "Only i64 comparisons with zext result."), + clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), + clEnumValN(ICGPR_SextI32, "sexti32", + "Only i32 comparisons with sext result."), + clEnumValN(ICGPR_SextI64, "sexti64", + "Only i64 comparisons with sext result."))); namespace { //===--------------------------------------------------------------------===// @@ -161,6 +197,7 @@ namespace { bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); + bool tryIntCompareInGPR(SDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. @@ -354,7 +391,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - const Module *M = MF->getFunction()->getParent(); + const Module *M = MF->getFunction().getParent(); DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { @@ -749,8 +786,10 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, // Simple value. if (isInt<16>(Imm)) { + uint64_t SextImm = SignExtend64(Lo, 16); + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); // Just the Lo bits. - Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } else if (Lo) { // Handle the Hi bits. unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; @@ -855,12 +894,74 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, getI32Imm(64 - RMin), getI32Imm(MaskEnd)); } +static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { + unsigned MaxTruncation = 0; + // Cannot use range-based for loop here as we need the actual use (i.e. we + // need the operand number corresponding to the use). A range-based for + // will unbox the use and provide an SDNode*. + for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); + Use != UseEnd; ++Use) { + unsigned Opc = + Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); + switch (Opc) { + default: return 0; + case ISD::TRUNCATE: + if (Use->isMachineOpcode()) + return 0; + MaxTruncation = + std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); + continue; + case ISD::STORE: { + if (Use->isMachineOpcode()) + return 0; + StoreSDNode *STN = cast(*Use); + unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); + if (MemVTSize == 64 || Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, MemVTSize); + continue; + } + case PPC::STW8: + case PPC::STWX8: + case PPC::STWU8: + case PPC::STWUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 32u); + continue; + case PPC::STH8: + case PPC::STHX8: + case PPC::STHU8: + case PPC::STHUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 16u); + continue; + case PPC::STB8: + case PPC::STBX8: + case PPC::STBU8: + case PPC::STBUX8: + if (Use.getOperandNo() != 0) + return 0; + MaxTruncation = std::max(MaxTruncation, 8u); + continue; + } + } + return MaxTruncation; +} + // Select a 64-bit constant. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = cast(N)->getZExtValue(); + if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { + uint64_t SextImm = SignExtend64(Imm, MinSize); + SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); + if (isInt<16>(SextImm)) + return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); + } return selectI64Imm(CurDAG, dl, Imm); } @@ -2064,8 +2165,1204 @@ class BitPermutationSelector { } }; +class IntegerCompareEliminator { + SelectionDAG *CurDAG; + PPCDAGToDAGISel *S; + // Conversion type for interpreting results of a 32-bit instruction as + // a 64-bit value or vice versa. + enum ExtOrTruncConversion { Ext, Trunc }; + + // Modifiers to guide how an ISD::SETCC node's result is to be computed + // in a GPR. + // ZExtOrig - use the original condition code, zero-extend value + // ZExtInvert - invert the condition code, zero-extend value + // SExtOrig - use the original condition code, sign-extend value + // SExtInvert - invert the condition code, sign-extend value + enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; + + // Comparisons against zero to emit GPR code sequences for. Each of these + // sequences may need to be emitted for two or more equivalent patterns. + // For example (a >= 0) == (a > -1). The direction of the comparison () + // matters as well as the extension type: sext (-1/0), zext (1/0). + // GEZExt - (zext (LHS >= 0)) + // GESExt - (sext (LHS >= 0)) + // LEZExt - (zext (LHS <= 0)) + // LESExt - (sext (LHS <= 0)) + enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; + + SDNode *tryEXTEND(SDNode *N); + SDNode *tryLogicOpOfCompares(SDNode *N); + SDValue computeLogicOpInGPR(SDValue LogicOp); + SDValue signExtendInputIfNeeded(SDValue Input); + SDValue zeroExtendInputIfNeeded(SDValue Input); + SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); + SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, + ZeroCompare CmpTy); + SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); + +public: + IntegerCompareEliminator(SelectionDAG *DAG, + PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { + assert(CurDAG->getTargetLoweringInfo() + .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && + "Only expecting to use this on 64 bit targets."); + } + SDNode *Select(SDNode *N) { + if (CmpInGPR == ICGPR_None) + return nullptr; + switch (N->getOpcode()) { + default: break; + case ISD::ZERO_EXTEND: + if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_SextI64) + return nullptr; + LLVM_FALLTHROUGH; + case ISD::SIGN_EXTEND: + if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || + CmpInGPR == ICGPR_ZextI64) + return nullptr; + return tryEXTEND(N); + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return tryLogicOpOfCompares(N); + } + return nullptr; + } +}; + +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} +// The obvious case for wanting to keep the value in a GPR. Namely, the +// result of the comparison is actually needed in a GPR. +SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { + assert((N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && + "Expecting a zero/sign extend node!"); + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = computeLogicOpInGPR(N->getOperand(0)); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) + return nullptr; + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + + if (!WideRes) + return nullptr; + + SDLoc dl(N); + bool Input32Bit = WideRes.getValueType() == MVT::i32; + bool Output32Bit = N->getValueType(0) == MVT::i32; + + NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; + NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; + + SDValue ConvOp = WideRes; + if (Input32Bit != Output32Bit) + ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : + ExtOrTruncConversion::Trunc); + return ConvOp.getNode(); +} + +// Attempt to perform logical operations on the results of comparisons while +// keeping the values in GPRs. Without doing so, these would end up being +// lowered to CR-logical operations which suffer from significant latency and +// low ILP. +SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { + if (N->getValueType(0) != MVT::i1) + return nullptr; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); + if (!LoweredLogical) + return nullptr; + + SDLoc dl(N); + bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; + unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue LHS = LoweredLogical.getOperand(0); + SDValue RHS = LoweredLogical.getOperand(1); + SDValue WideOp; + SDValue OpToConvToRecForm; + + // Look through any 32-bit to 64-bit implicit extend nodes to find the + // opcode that is input to the XORI. + if (IsBitwiseNegate && + LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) + OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); + else if (IsBitwiseNegate) + // If the input to the XORI isn't an extension, that's what we're after. + OpToConvToRecForm = LoweredLogical.getOperand(0); + else + // If this is not an XORI, it is a reg-reg logical op and we can convert + // it to record-form. + OpToConvToRecForm = LoweredLogical; + + // Get the record-form version of the node we're looking to use to get the + // CR result from. + uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); + int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); + + // Convert the right node to record-form. This is either the logical we're + // looking at or it is the input node to the negation (if we're looking at + // a bitwise negation). + if (NewOpc != -1 && IsBitwiseNegate) { + // The input to the XORI has a record-form. Use it. + assert(LoweredLogical.getConstantOperandVal(1) == 1 && + "Expected a PPC::XORI8 only for bitwise negation."); + // Emit the record-form instruction. + std::vector Ops; + for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) + Ops.push_back(OpToConvToRecForm.getOperand(i)); + + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, + OpToConvToRecForm.getValueType(), + MVT::Glue, Ops), 0); + } else { + assert((NewOpc != -1 || !IsBitwiseNegate) && + "No record form available for AND8/OR8/XOR8?"); + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, + MVT::i64, MVT::Glue, LHS, RHS), 0); + } + + // Select this node to a single bit from CR0 set by the record-form node + // just created. For bitwise negation, use the EQ bit which is the equivalent + // of negating the result (i.e. it is a bit set when the result of the + // operation is zero). + SDValue SRIdxVal = + CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); + SDValue CRBit = + SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + return CRBit.getNode(); +} + +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue LHS, RHS; + + // Special case: xor %a, -1 + bool IsBitwiseNegation = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + S->getI64Imm(0, dl), + S->getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return computeLogicOpInGPR(Operand); + return SDValue(); + }; + LHS = getLogicOperand(LogicOp.getOperand(0)); + RHS = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!LHS || (!RHS && !IsBitwiseNegation)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (LHS.getValueType() == MVT::i32) + LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); + if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) + RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = PPC::AND8; break; + case ISD::OR: NewOpc = PPC::OR8; break; + case ISD::XOR: NewOpc = PPC::XOR8; break; + } + + if (IsBitwiseNegation) { + RHS = S->getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); + +} + +/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. +/// Otherwise just reinterpret it as a 64-bit value. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only sign-extend 32-bit values here."); + unsigned Opc = Input.getOpcode(); + + // The value was sign extended and then truncated to 32-bits. No need to + // sign extend it again. + if (Opc == ISD::TRUNCATE && + (Input.getOperand(0).getOpcode() == ISD::AssertSext || + Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + LoadSDNode *InputLoad = dyn_cast(Input); + // The input is a sign-extending load. All ppc sign-extending loads + // sign-extend to the full 64-bits. + if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + ConstantSDNode *InputConst = dyn_cast(Input); + // We don't sign-extend constants. + if (InputConst) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + SDLoc dl(Input); + SignExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, + MVT::i64, Input), 0); +} + +/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. +/// Otherwise just reinterpret it as a 64-bit value. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only zero-extend 32-bit values here."); + unsigned Opc = Input.getOpcode(); + + // The only condition under which we can omit the actual extend instruction: + // - The value is a positive constant + // - The value comes from a load that isn't a sign-extending load + // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. + bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && + (Input.getOperand(0).getOpcode() == ISD::AssertZext || + Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); + if (IsTruncateOfZExt) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + ConstantSDNode *InputConst = dyn_cast(Input); + if (InputConst && InputConst->getSExtValue() >= 0) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + LoadSDNode *InputLoad = dyn_cast(Input); + // The input is a load that doesn't sign-extend (it will be zero-extended). + if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) + return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); + + // None of the above, need to zero-extend. + SDLoc dl(Input); + ZeroExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, + S->getI64Imm(0, dl), + S->getI64Imm(32, dl)), 0); +} + +// Handle a 32-bit value in a 64-bit register and vice-versa. These are of +// course not actual zero/sign extensions that will generate machine code, +// they're just a way to reinterpret a 32 bit value in a register as a +// 64 bit value and vice-versa. +SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, + ExtOrTruncConversion Conv) { + SDLoc dl(NatWidthRes); + + // For reinterpreting 32-bit values as 64 bit values, we generate + // INSERT_SUBREG IMPLICIT_DEF:i64, , TargetConstant:i32<1> + if (Conv == ExtOrTruncConversion::Ext) { + SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, + ImDef, NatWidthRes, SubRegIdx), 0); + } + + assert(Conv == ExtOrTruncConversion::Trunc && + "Unknown convertion between 32 and 64 bit values."); + // For reinterpreting 64-bit values as 32-bit values, we just need to + // EXTRACT_SUBREG (i.e. extract the low word). + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, + NatWidthRes, SubRegIdx), 0); +} + +// Produce a GPR sequence for compound comparisons (<=, >=) against zero. +// Handle both zero-extensions and sign-extensions. +SDValue +IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, + ZeroCompare CmpTy) { + EVT InVT = LHS.getValueType(); + bool Is32Bit = InVT == MVT::i32; + SDValue ToExtend; + + // Produce the value that needs to be either zero or sign extended. + switch (CmpTy) { + case ZeroCompare::GEZExt: + case ZeroCompare::GESExt: + ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, + dl, InVT, LHS, LHS), 0); + break; + case ZeroCompare::LEZExt: + case ZeroCompare::LESExt: { + if (Is32Bit) { + // Upper 32 bits cannot be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + ToExtend = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Neg, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } else { + SDValue Addi = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(~0ULL, dl)), 0); + ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, + Addi, LHS), 0); + } + break; + } + } + + // For 64-bit sequences, the extensions are the same for the GE/LE cases. + if (!Is32Bit && + (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + ToExtend, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + if (!Is32Bit && + (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, + S->getI64Imm(63, dl)), 0); + + assert(Is32Bit && "Should have handled the 32-bit sequences above."); + // For 32-bit sequences, the extensions differ between GE/LE cases. + switch (CmpTy) { + case ZeroCompare::GEZExt: { + SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + case ZeroCompare::GESExt: + return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, + S->getI32Imm(31, dl)), 0); + case ZeroCompare::LEZExt: + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, + S->getI32Imm(1, dl)), 0); + case ZeroCompare::LESExt: + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, + S->getI32Imm(-1, dl)), 0); + } + + // The above case covers all the enumerators so it can't have a default clause + // to avoid compiler warnings. + llvm_unreachable("Unknown zero-comparison type."); +} + +/// Produces a zero-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || + CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) + // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), + S->getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + S->getI32Imm(1, dl)), 0); + } + case ISD::SETGE: { + // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) + // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) + if(IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + + // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) + // by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) + // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) + if(IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + } + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Sub = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, + S->getI64Imm(1, dl), S->getI64Imm(63, dl)), + 0); + return + SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, + MVT::i64, Shift, S->getI32Imm(1, dl)), 0); + } + case ISD::SETGT: { + // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) + // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) + // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) + // Handle SETLT -1 (which is equivalent to SETGE 0). + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + + if (IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); + } + // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as + // (%b < %a) by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) + // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) + // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) + // Handle SETLT 1 (which is equivalent to SETLE 0). + if (IsRHSOne) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + } + + if (IsRHSZero) { + SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), + S->getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SUBFNode, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + case ISD::SETUGE: + // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) + // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue SrdiNode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Subtract, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, + S->getI32Imm(1, dl)), 0); + } + case ISD::SETUGT: + // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) + // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + Subtract, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + } +} + +/// Produces a sign-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || + CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (sext (setcc %a, %b, seteq)) -> + // (ashr (shl (ctlz (xor %a, %b)), 58), 63) + // (sext (setcc %a, 0, seteq)) -> + // (ashr (shl (ctlz %a), 58), 63) + SDValue CountInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Cntlzw = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); + SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), + S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; + SDValue Slwi = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); + } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + S->getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } + case ISD::SETGE: { + // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) + // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + + // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) + // by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) + // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 0); + SDValue Srdi = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + SUBFNode, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, + S->getI32Imm(-1, dl)), 0); + } + case ISD::SETGT: { + // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) + // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) + // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + if (IsRHSZero) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue Neg = + SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, + S->getI64Imm(63, dl)), 0); + } + // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as + // (%b < %a) by swapping inputs and falling through. + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) + // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) + // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) + if (IsRHSOne) { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + } + if (IsRHSZero) + return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, + S->getI32Imm(31, dl)), 0); + + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + SDValue SUBFNode = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + SUBFNode, S->getI64Imm(63, dl)), 0); + } + case ISD::SETUGE: + // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) + // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, + S->getI32Imm(1, dl), S->getI32Imm(63,dl)), + 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, + S->getI32Imm(-1, dl)), 0); + } + case ISD::SETUGT: + // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) + // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + if (CmpInGPR == ICGPR_NonExtIn) + return SDValue(); + // The upper 32-bits of the register can't be undefined for this sequence. + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + SDValue Subtract = + SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + Subtract, S->getI64Imm(63, dl)), 0); + } + } +} + +/// Produces a zero-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) + // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, + S->getI64Imm(58, dl), + S->getI64Imm(63, dl)), 0); + } + case ISD::SETNE: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue AC = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + Xor, S->getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, + Xor, AC.getValue(1)), 0); + } + case ISD::SETGE: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setge)) -> + // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) + // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setge)) -> + // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) + // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + SDValue ShiftL = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue ShiftR = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, + S->getI64Imm(63, dl)), 0); + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + ShiftR, ShiftL, SubtractCarry), 0); + } + case ISD::SETGT: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setgt)) -> + // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) + // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); + if (IsRHSZero) { + SDValue Addi = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(~0ULL, dl)), 0); + SDValue Nor = + SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + } + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setlt)) -> + // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) + // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) + if (IsRHSOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); + if (IsRHSZero) + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SRADINode = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + LHS, S->getI64Imm(63, dl)), 0); + SDValue SRDINode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + RHS, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ADDE8Node = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + SRDINode, SRADINode, SUBFC8Carry), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, + ADDE8Node, S->getI64Imm(1, dl)), 0); + } + case ISD::SETUGE: + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue SUBFE8Node = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, + LHS, LHS, SUBFC8Carry), 0); + return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, + SUBFE8Node, S->getI64Imm(1, dl)), 0); + } + case ISD::SETUGT: + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ExtSub = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, + LHS, LHS, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, + ExtSub), 0); + } + } +} + +/// Produces a sign-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue +IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || + CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) + return SDValue(); + bool IsRHSZero = RHSValue == 0; + bool IsRHSOne = RHSValue == 1; + bool IsRHSNegOne = RHSValue == -1LL; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue AddInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Addic = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + AddInput, S->getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, + Addic, Addic.getValue(1)), 0); + } + case ISD::SETNE: { + // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) + // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) + // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) + // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue SC = + SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, + Xor, S->getI32Imm(0, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, + SC, SC.getValue(1)), 0); + } + case ISD::SETGE: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setge)) -> + // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) + // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + LLVM_FALLTHROUGH; + } + case ISD::SETLE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setge)) -> + // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) + // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) + if (IsRHSZero) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + SDValue ShiftR = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, + S->getI64Imm(63, dl)), 0); + SDValue ShiftL = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, + S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue Adde = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, + ShiftR, ShiftL, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); + } + case ISD::SETGT: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (zext (setcc %a, %b, setgt)) -> + // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) + // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) + if (IsRHSNegOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); + if (IsRHSZero) { + SDValue Add = + SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, + S->getI64Imm(-1, dl)), 0); + SDValue Nor = + SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, + S->getI64Imm(63, dl)), 0); + } + std::swap(LHS, RHS); + ConstantSDNode *RHSConst = dyn_cast(RHS); + IsRHSZero = RHSConst && RHSConst->isNullValue(); + IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; + LLVM_FALLTHROUGH; + } + case ISD::SETLT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (zext (setcc %a, %b, setlt)) -> + // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) + // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) + if (IsRHSOne) + return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); + if (IsRHSZero) { + return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, + S->getI64Imm(63, dl)), 0); + } + SDValue SRADINode = + SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, + LHS, S->getI64Imm(63, dl)), 0); + SDValue SRDINode = + SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, + RHS, S->getI64Imm(1, dl), + S->getI64Imm(63, dl)), 0); + SDValue SUBFC8Carry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + SDValue ADDE8Node = + SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, + SRDINode, SRADINode, SUBFC8Carry), 0); + SDValue XORI8Node = + SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, + ADDE8Node, S->getI64Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, + XORI8Node), 0); + } + case ISD::SETUGE: + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULE: { + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) + SDValue SubtractCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + LHS, RHS), 1); + SDValue ExtSub = + SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, + LHS, SubtractCarry), 0); + return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, + ExtSub, ExtSub), 0); + } + case ISD::SETUGT: + // {subc.reg, subc.CA} = (subcarry %b, %a) + // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ISD::SETULT: { + // {subc.reg, subc.CA} = (subcarry %a, %b) + // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) + SDValue SubCarry = + SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, + RHS, LHS), 1); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, + LHS, LHS, SubCarry), 0); + } + } +} + +/// Do all uses of this SDValue need the result in a GPR? +/// This is meant to be used on values that have type i1 since +/// it is somewhat meaningless to ask if values of other types +/// should be kept in GPR's. +static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { + assert(Compare.getOpcode() == ISD::SETCC && + "An ISD::SETCC node required here."); + + // For values that have a single use, the caller should obviously already have + // checked if that use is an extending use. We check the other uses here. + if (Compare.hasOneUse()) + return true; + // We want the value in a GPR if it is being extended, used for a select, or + // used in logical operations. + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; +} + +/// Returns an equivalent of a SETCC node but with the result the same width as +/// the inputs. This can nalso be used for SELECT_CC if either the true or false +/// values is a power of two while the other is zero. +SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, + SetccInGPROpts ConvOpts) { + assert((Compare.getOpcode() == ISD::SETCC || + Compare.getOpcode() == ISD::SELECT_CC) && + "An ISD::SETCC node required here."); + + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) + return SDValue(); + + SDValue LHS = Compare.getOperand(0); + SDValue RHS = Compare.getOperand(1); + + // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. + int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; + ISD::CondCode CC = + cast(Compare.getOperand(CCOpNum))->get(); + EVT InputVT = LHS.getValueType(); + if (InputVT != MVT::i32 && InputVT != MVT::i64) + return SDValue(); + + if (ConvOpts == SetccInGPROpts::ZExtInvert || + ConvOpts == SetccInGPROpts::SExtInvert) + CC = ISD::getSetCCInverse(CC, true); + + bool Inputs32Bit = InputVT == MVT::i32; + + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast(RHS); + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; + bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || + ConvOpts == SetccInGPROpts::SExtInvert; + + if (IsSext && Inputs32Bit) + return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); +} + } // end anonymous namespace +bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { + if (N->getValueType(0) != MVT::i32 && + N->getValueType(0) != MVT::i64) + return false; + + // This optimization will emit code that assumes 64-bit registers + // so we don't want to run it in 32-bit mode. Also don't run it + // on functions that are not to be optimized. + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + + switch (N->getOpcode()) { + default: break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + IntegerCompareEliminator ICmpElim(CurDAG, this); + if (SDNode *New = ICmpElim.Select(N)) { + ReplaceNode(N, New); + return true; + } + } + } + return false; +} + bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) @@ -2578,6 +3875,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitPermutation(N)) return; + // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). + if (tryIntCompareInGPR(N)) + return; + switch (N->getOpcode()) { default: break; @@ -3218,9 +4519,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // The first source operand is a TargetGlobalAddress or a TargetJumpTable. // If it must be toc-referenced according to PPCSubTarget, we generate: - // LDtocL(, ADDIStocHA(%X2, )) + // LDtocL(@sym, ADDIStocHA(%x2, @sym)) // Otherwise we generate: - // ADDItocL(ADDIStocHA(%X2, ), ) + // ADDItocL(ADDIStocHA(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3fe9fe734993..3c09ab8d7555 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; + case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; + case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; @@ -2428,8 +2433,8 @@ static SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, bool Is64Bit, SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, - MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true, - false, 0); + MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, + MachineMemOperand::MOLoad); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, @@ -2573,7 +2578,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); - const Module *M = DAG.getMachineFunction().getFunction()->getParent(); + const Module *M = DAG.getMachineFunction().getFunction().getParent(); PICLevel::Level picLevel = M->getPICLevel(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); @@ -3542,7 +3547,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; - Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; @@ -3986,7 +3991,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; - Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; @@ -4397,13 +4402,18 @@ hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) { static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC) { - // Tail or Sibling call optimization (TCO/SCO) needs callee and caller to - // have the same calling convention. - if (CallerCC != CalleeCC) + // Tail calls are possible with fastcc and ccc. + auto isTailCallableCC = [] (CallingConv::ID CC){ + return CC == CallingConv::C || CC == CallingConv::Fast; + }; + if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) return false; - // Tail or Sibling calls can be done with fastcc/ccc. - return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C); + // We can safely tail call both fastcc and ccc callees from a c calling + // convention caller. If the caller is fastcc, we may have less stack space + // than a non-fastcc caller with the same signature so disable tail-calls in + // that case. + return CallerCC == CallingConv::C || CallerCC == CalleeCC; } bool @@ -4422,9 +4432,9 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( // Variadic argument functions are not supported. if (isVarArg) return false; - auto *Caller = DAG.getMachineFunction().getFunction(); + auto &Caller = DAG.getMachineFunction().getFunction(); // Check that the calling conventions are compatible for tco. - if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), CalleeCC)) + if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC)) return false; // Caller contains any byval parameter is not supported. @@ -4434,10 +4444,28 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply - // sibling call optimization. See: https://reviews.llvm.org/D23441#513574 + // sibling call optimization. For example, gcc is able to do SCO for caller1 + // in the following example, but not for caller2. + // struct test { + // long int a; + // char ary[56]; + // } gTest; + // __attribute__((noinline)) int callee(struct test v, struct test *b) { + // b->a = v.a; + // return 0; + // } + // void caller1(struct test a, struct test c, struct test *b) { + // callee(gTest, b); } + // void caller2(struct test *b) { callee(gTest, b); } if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; + // If callee and caller use different calling conventions, we cannot pass + // parameters on stack since offsets for the parameter area may be different. + if (Caller.getCallingConv() != CalleeCC && + needStackSlotPassParameters(Subtarget, Outs)) + return false; + // No TCO/SCO on indirect call because Caller have to restore its TOC if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) @@ -4446,7 +4474,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( // If the caller and callee potentially have different TOC bases then we // cannot tail call since we need to restore the TOC pointer after the call. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!callsShareTOCBase(Caller, Callee, getTargetMachine())) + if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4458,7 +4486,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. - if (!hasSameArgumentList(Caller, CS) && + if (!hasSameArgumentList(&Caller, CS) && needStackSlotPassParameters(Subtarget, Outs)) { return false; } @@ -4483,7 +4511,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; MachineFunction &MF = DAG.getMachineFunction(); - CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { // Functions containing by val parameters are not supported. for (unsigned i = 0; i != Ins.size(); i++) { @@ -4735,7 +4763,7 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, // we're building with the leopard linker or later, which automatically // synthesizes these stubs. const TargetMachine &TM = DAG.getTarget(); - const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); const GlobalValue *GV = nullptr; if (auto *G = dyn_cast(Callee)) GV = G->getGlobal(); @@ -5028,7 +5056,7 @@ SDValue PPCTargetLowering::FinishCall( // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); } else if (CallOpc == PPCISD::CALL && - !callsShareTOCBase(MF.getFunction(), Callee, DAG.getTarget())) { + !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; } @@ -8811,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { return Op; } +// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be +// compared to a value that is atomically loaded (atomic loads zero-extend). +SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && + "Expecting an atomic compare-and-swap here."); + SDLoc dl(Op); + auto *AtomicNode = cast(Op.getNode()); + EVT MemVT = AtomicNode->getMemoryVT(); + if (MemVT.getSizeInBits() >= 32) + return Op; + + SDValue CmpOp = Op.getOperand(2); + // If this is already correctly zero-extended, leave it alone. + auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); + if (DAG.MaskedValueIsZero(CmpOp, HighBits)) + return Op; + + // Clear the high bits of the compare operand. + unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; + SDValue NewCmpOp = + DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, + DAG.getConstant(MaskVal, dl, MVT::i32)); + + // Replace the existing compare operand with the properly zero-extended one. + SmallVector Ops; + for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) + Ops.push_back(AtomicNode->getOperand(i)); + Ops[2] = NewCmpOp; + MachineMemOperand *MMO = AtomicNode->getMemOperand(); + SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); + auto NodeTy = + (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; + return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -9302,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerREM(Op, DAG); case ISD::BSWAP: return LowerBSWAP(Op, DAG); + case ISD::ATOMIC_CMP_SWAP: + return LowerATOMIC_CMP_SWAP(Op, DAG); } } @@ -9334,7 +9400,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); - Results.push_back(NewInt); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt)); Results.push_back(NewInt.getValue(1)); break; } @@ -9797,7 +9863,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; - if (MF->getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF->getFunction().hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; @@ -11882,6 +11948,12 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, SDLoc dl(N); SDValue Op(N, 0); + // Don't handle ppc_fp128 here or i1 conversions. + if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) + return SDValue(); + if (Op.getOperand(0).getValueType() == MVT::i1) + return SDValue(); + SDValue FirstOperand(Op.getOperand(0)); bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && (FirstOperand.getValueType() == MVT::i8 || @@ -11910,11 +11982,6 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); } - // Don't handle ppc_fp128 here or i1 conversions. - if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) - return SDValue(); - if (Op.getOperand(0).getValueType() == MVT::i1) - return SDValue(); // For i32 intermediate values, unfortunately, the conversion functions // leave the upper 32 bits of the value are undefined. Within the set of @@ -12228,8 +12295,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N->getOperand(1).getValueType(); if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && isa(N->getOperand(1)) && VT == MVT::i32) { - SDValue Const64 = DAG.getConstant(N->getConstantOperandVal(1), dl, - MVT::i64); + // Need to sign-extended to 64-bits to handle negative values. + EVT MemVT = cast(N)->getMemoryVT(); + uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), + MemVT.getSizeInBits()); + SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64); + // DAG.getTruncStore() can't be used here because it doesn't accept // the general (base + offset) addressing mode. // So we use UpdateNodeOperands and setTruncatingStore instead. @@ -13041,6 +13112,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::QSRCRegClass); if (Subtarget.hasAltivec()) return std::make_pair(0U, &PPC::VRRCRegClass); + break; case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } @@ -13246,7 +13318,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; @@ -13291,6 +13363,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: @@ -13343,9 +13416,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::ppc_qpx_qvlfda: @@ -13379,9 +13450,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::ppc_qpx_qvstfd: @@ -13433,9 +13502,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = 1; - Info.vol = false; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::ppc_qpx_qvstfda: @@ -13468,9 +13535,7 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Info.size = VT.getStoreSize(); Info.align = 1; - Info.vol = false; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore; return true; } default: @@ -13497,12 +13562,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool MemcpyStrSrc, MachineFunction &MF) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // When expanding a memset, require at least two QPX instructions to cover // the cost of loading the value to be stored from the constant pool. if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) && (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + !F.hasFnAttribute(Attribute::NoImplicitFloat)) { return MVT::v4f64; } @@ -13721,7 +13786,7 @@ void PPCTargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 22dd56b33383..b3215a84829e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -262,7 +262,7 @@ namespace llvm { /// local dynamic TLS on PPC32. PPC32_PICGOT, - /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec /// TLS model, produces an ADDIS8 instruction that adds the GOT /// base to sym\@got\@tprel\@ha. ADDIS_GOT_TPREL_HA, @@ -281,18 +281,18 @@ namespace llvm { /// TLS sequence. ADD_TLS, - /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsgd\@ha. ADDIS_TLSGD_HA, - /// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. ADDI_TLSGD_L, - /// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by /// ADDIS_TLSGD_L_ADDR until after register assignment. GET_TLS_ADDR, @@ -302,18 +302,18 @@ namespace llvm { /// register assignment. ADDI_TLSGD_L_ADDR, - /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds the GOT base /// register to sym\@got\@tlsld\@ha. ADDIS_TLSLD_HA, - /// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS /// model, produces an ADDI8 instruction that adds G8RReg to /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. ADDI_TLSLD_L, - /// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by /// ADDIS_TLSLD_L_ADDR until after register assignment. GET_TLSLD_ADDR, @@ -323,7 +323,7 @@ namespace llvm { /// following register assignment. ADDI_TLSLD_L_ADDR, - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS + /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS /// model, produces an ADDIS8 instruction that adds X3 to /// sym\@dtprel\@ha. ADDIS_DTPREL_HA, @@ -430,6 +430,11 @@ namespace llvm { /// The 4xf32 load used for v4i1 constants. QVLFSb, + /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes + /// except they ensure that the compare input is zero-extended for + /// sub-word versions because the atomic loads zero-extend. + ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16, + /// GPRC = TOC_ENTRY GA, TOC /// Loads the entry for GA from the TOC, where the TOC base is given by /// the last operand. @@ -586,8 +591,8 @@ namespace llvm { bool supportSplitCSR(MachineFunction *MF) const override { return - MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; @@ -773,6 +778,7 @@ namespace llvm { bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// getOptimalMemOpType - Returns the target specific optimal type for load @@ -954,6 +960,7 @@ namespace llvm { SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index d5b5f69e0096..fdd28c2ff03f 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -194,6 +194,11 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; // Atomic operations +// FIXME: some of these might be used with constant operands. This will result +// in constant materialization instructions that may be redundant. We currently +// clean this up in PPCMIPeephole with calls to +// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them +// in the first place. let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index f25b929c8083..ec74d309f68a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -20,7 +20,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,6 +51,10 @@ STATISTIC(NumStoreSPILLVSRRCAsVec, STATISTIC(NumStoreSPILLVSRRCAsGpr, "Number of spillvsrrc spilled to stack as gpr"); STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc"); +STATISTIC(CmpIselsConverted, + "Number of ISELs that depend on comparison of constants converted"); +STATISTIC(MissedConvertibleImmediateInstrs, + "Number of compare-immediate instructions fed by constants"); static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, @@ -2147,6 +2151,877 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; } +unsigned PPCInstrInfo::lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI) { + while (true) { + MachineInstr *MI = MRI->getVRegDef(SrcReg); + if (!MI->isCopyLike()) + return SrcReg; + + unsigned CopySrcReg; + if (MI->isCopy()) + CopySrcReg = MI->getOperand(1).getReg(); + else { + assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); + CopySrcReg = MI->getOperand(2).getReg(); + } + + if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) + return CopySrcReg; + + SrcReg = CopySrcReg; + } +} + +// Essentially a compile-time implementation of a compare->isel sequence. +// It takes two constants to compare, along with the true/false registers +// and the comparison type (as a subreg to a CR field) and returns one +// of the true/false registers, depending on the comparison results. +static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc, + unsigned TrueReg, unsigned FalseReg, + unsigned CRSubReg) { + // Signed comparisons. The immediates are assumed to be sign-extended. + if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) { + switch (CRSubReg) { + default: llvm_unreachable("Unknown integer comparison type."); + case PPC::sub_lt: + return Imm1 < Imm2 ? TrueReg : FalseReg; + case PPC::sub_gt: + return Imm1 > Imm2 ? TrueReg : FalseReg; + case PPC::sub_eq: + return Imm1 == Imm2 ? TrueReg : FalseReg; + } + } + // Unsigned comparisons. + else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) { + switch (CRSubReg) { + default: llvm_unreachable("Unknown integer comparison type."); + case PPC::sub_lt: + return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg; + case PPC::sub_gt: + return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg; + case PPC::sub_eq: + return Imm1 == Imm2 ? TrueReg : FalseReg; + } + } + return PPC::NoRegister; +} + +// Replace an instruction with one that materializes a constant (and sets +// CR0 if the original instruction was a record-form instruction). +void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI, + const LoadImmediateInfo &LII) const { + // Remove existing operands. + int OperandToKeep = LII.SetCR ? 1 : 0; + for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--) + MI.RemoveOperand(i); + + // Replace the instruction. + if (LII.SetCR) { + MI.setDesc(get(LII.Is64Bit ? PPC::ANDIo8 : PPC::ANDIo)); + // Set the immediate. + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine); + return; + } + else + MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI)); + + // Set the immediate. + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(LII.Imm); +} + +MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, + unsigned &ConstOp, + bool &SeenIntermediateUse) const { + ConstOp = ~0U; + MachineInstr *DefMI = nullptr; + MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); + // If we'ere in SSA, get the defs through the MRI. Otherwise, only look + // within the basic block to see if the register is defined using an LI/LI8. + if (MRI->isSSA()) { + for (int i = 1, e = MI.getNumOperands(); i < e; i++) { + if (!MI.getOperand(i).isReg()) + continue; + unsigned Reg = MI.getOperand(i).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + unsigned TrueReg = lookThruCopyLike(Reg, MRI); + if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { + DefMI = MRI->getVRegDef(TrueReg); + if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { + ConstOp = i; + break; + } + } + } + } else { + // Looking back through the definition for each operand could be expensive, + // so exit early if this isn't an instruction that either has an immediate + // form or is already an immediate form that we can handle. + ImmInstrInfo III; + unsigned Opc = MI.getOpcode(); + bool ConvertibleImmForm = + Opc == PPC::CMPWI || Opc == PPC::CMPLWI || + Opc == PPC::CMPDI || Opc == PPC::CMPLDI || + Opc == PPC::ADDI || Opc == PPC::ADDI8 || + Opc == PPC::ORI || Opc == PPC::ORI8 || + Opc == PPC::XORI || Opc == PPC::XORI8 || + Opc == PPC::RLDICL || Opc == PPC::RLDICLo || + Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 || + Opc == PPC::RLWINM || Opc == PPC::RLWINMo || + Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; + if (!instrHasImmForm(MI, III) && !ConvertibleImmForm) + return nullptr; + + // Don't convert or %X, %Y, %Y since that's just a register move. + if ((Opc == PPC::OR || Opc == PPC::OR8) && + MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) + return nullptr; + for (int i = 1, e = MI.getNumOperands(); i < e; i++) { + MachineOperand &MO = MI.getOperand(i); + SeenIntermediateUse = false; + if (MO.isReg() && MO.isUse() && !MO.isImplicit()) { + MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI; + It++; + unsigned Reg = MI.getOperand(i).getReg(); + // MachineInstr::readsRegister only returns true if the machine + // instruction reads the exact register or its super-register. It + // does not consider uses of sub-registers which seems like strange + // behaviour. Nonetheless, if we end up with a 64-bit register here, + // get the corresponding 32-bit register to check. + if (PPC::G8RCRegClass.contains(Reg)) + Reg = Reg - PPC::X0 + PPC::R0; + + // Is this register defined by a load-immediate in this block? + for ( ; It != E; ++It) { + if (It->modifiesRegister(Reg, &getRegisterInfo())) { + if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) { + ConstOp = i; + return &*It; + } else + break; + } else if (It->readsRegister(Reg, &getRegisterInfo())) + // If we see another use of this reg between the def and the MI, + // we want to flat it so the def isn't deleted. + SeenIntermediateUse = true; + } + } + } + } + return ConstOp == ~0U ? nullptr : DefMI; +} + +// If this instruction has an immediate form and one of its operands is a +// result of a load-immediate, convert it to the immediate form if the constant +// is in range. +bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, + MachineInstr **KilledDef) const { + MachineFunction *MF = MI.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + bool PostRA = !MRI->isSSA(); + bool SeenIntermediateUse = true; + unsigned ConstantOperand = ~0U; + MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand, + SeenIntermediateUse); + if (!DefMI || !DefMI->getOperand(1).isImm()) + return false; + assert(ConstantOperand < MI.getNumOperands() && + "The constant operand needs to be valid at this point"); + + int64_t Immediate = DefMI->getOperand(1).getImm(); + // Sign-extend to 64-bits. + int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? + (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; + + if (KilledDef && MI.getOperand(ConstantOperand).isKill() && + !SeenIntermediateUse) + *KilledDef = DefMI; + + // If this is a reg+reg instruction that has a reg+imm form, convert it now. + ImmInstrInfo III; + if (instrHasImmForm(MI, III)) + return transformToImmForm(MI, III, ConstantOperand, SExtImm); + + bool ReplaceWithLI = false; + bool Is64BitLI = false; + int64_t NewImm = 0; + bool SetCR = false; + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: return false; + + // FIXME: Any branches conditional on such a comparison can be made + // unconditional. At this time, this happens too infrequently to be worth + // the implementation effort, but if that ever changes, we could convert + // such a pattern here. + case PPC::CMPWI: + case PPC::CMPLWI: + case PPC::CMPDI: + case PPC::CMPLDI: { + // Doing this post-RA would require dataflow analysis to reliably find uses + // of the CR register set by the compare. + if (PostRA) + return false; + // If a compare-immediate is fed by an immediate and is itself an input of + // an ISEL (the most common case) into a COPY of the correct register. + bool Changed = false; + unsigned DefReg = MI.getOperand(0).getReg(); + int64_t Comparand = MI.getOperand(2).getImm(); + int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0 ? + (Comparand | 0xFFFFFFFFFFFF0000) : Comparand; + + for (auto &CompareUseMI : MRI->use_instructions(DefReg)) { + unsigned UseOpc = CompareUseMI.getOpcode(); + if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8) + continue; + unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg(); + unsigned TrueReg = CompareUseMI.getOperand(1).getReg(); + unsigned FalseReg = CompareUseMI.getOperand(2).getReg(); + unsigned RegToCopy = selectReg(SExtImm, SExtComparand, Opc, TrueReg, + FalseReg, CRSubReg); + if (RegToCopy == PPC::NoRegister) + continue; + // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0. + if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) { + CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI)); + CompareUseMI.getOperand(1).ChangeToImmediate(0); + CompareUseMI.RemoveOperand(3); + CompareUseMI.RemoveOperand(2); + continue; + } + DEBUG(dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n"); + DEBUG(DefMI->dump(); MI.dump(); CompareUseMI.dump()); + DEBUG(dbgs() << "Is converted to:\n"); + // Convert to copy and remove unneeded operands. + CompareUseMI.setDesc(get(PPC::COPY)); + CompareUseMI.RemoveOperand(3); + CompareUseMI.RemoveOperand(RegToCopy == TrueReg ? 2 : 1); + CmpIselsConverted++; + Changed = true; + DEBUG(CompareUseMI.dump()); + } + if (Changed) + return true; + // This may end up incremented multiple times since this function is called + // during a fixed-point transformation, but it is only meant to indicate the + // presence of this opportunity. + MissedConvertibleImmediateInstrs++; + return false; + } + + // Immediate forms - may simply be convertable to an LI. + case PPC::ADDI: + case PPC::ADDI8: { + // Does the sum fit in a 16-bit signed field? + int64_t Addend = MI.getOperand(2).getImm(); + if (isInt<16>(Addend + SExtImm)) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::ADDI8; + NewImm = Addend + SExtImm; + break; + } + return false; + } + case PPC::RLDICL: + case PPC::RLDICLo: + case PPC::RLDICL_32: + case PPC::RLDICL_32_64: { + // Use APInt's rotate function. + int64_t SH = MI.getOperand(2).getImm(); + int64_t MB = MI.getOperand(3).getImm(); + APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true); + InVal = InVal.rotl(SH); + uint64_t Mask = (1LLU << (63 - MB + 1)) - 1; + InVal &= Mask; + // Can't replace negative values with an LI as that will sign-extend + // and not clear the left bits. If we're setting the CR bit, we will use + // ANDIo which won't sign extend, so that's safe. + if (isUInt<15>(InVal.getSExtValue()) || + (Opc == PPC::RLDICLo && isUInt<16>(InVal.getSExtValue()))) { + ReplaceWithLI = true; + Is64BitLI = Opc != PPC::RLDICL_32; + NewImm = InVal.getSExtValue(); + SetCR = Opc == PPC::RLDICLo; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; + break; + } + return false; + } + case PPC::RLWINM: + case PPC::RLWINM8: + case PPC::RLWINMo: + case PPC::RLWINM8o: { + int64_t SH = MI.getOperand(2).getImm(); + int64_t MB = MI.getOperand(3).getImm(); + int64_t ME = MI.getOperand(4).getImm(); + APInt InVal(32, SExtImm, true); + InVal = InVal.rotl(SH); + // Set the bits ( MB + 32 ) to ( ME + 32 ). + uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1); + InVal &= Mask; + // Can't replace negative values with an LI as that will sign-extend + // and not clear the left bits. If we're setting the CR bit, we will use + // ANDIo which won't sign extend, so that's safe. + bool ValueFits = isUInt<15>(InVal.getSExtValue()); + ValueFits |= ((Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o) && + isUInt<16>(InVal.getSExtValue())); + if (ValueFits) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8o; + NewImm = InVal.getSExtValue(); + SetCR = Opc == PPC::RLWINMo || Opc == PPC::RLWINM8o; + if (SetCR && (SExtImm & NewImm) != NewImm) + return false; + break; + } + return false; + } + case PPC::ORI: + case PPC::ORI8: + case PPC::XORI: + case PPC::XORI8: { + int64_t LogicalImm = MI.getOperand(2).getImm(); + int64_t Result = 0; + if (Opc == PPC::ORI || Opc == PPC::ORI8) + Result = LogicalImm | SExtImm; + else + Result = LogicalImm ^ SExtImm; + if (isInt<16>(Result)) { + ReplaceWithLI = true; + Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8; + NewImm = Result; + break; + } + return false; + } + } + + if (ReplaceWithLI) { + DEBUG(dbgs() << "Replacing instruction:\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "Fed by:\n"); + DEBUG(DefMI->dump()); + LoadImmediateInfo LII; + LII.Imm = NewImm; + LII.Is64Bit = Is64BitLI; + LII.SetCR = SetCR; + // If we're setting the CR, the original load-immediate must be kept (as an + // operand to ANDIo/ANDI8o). + if (KilledDef && SetCR) + *KilledDef = nullptr; + replaceInstrWithLI(MI, LII); + DEBUG(dbgs() << "With:\n"); + DEBUG(MI.dump()); + return true; + } + return false; +} + +bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI, + ImmInstrInfo &III) const { + unsigned Opc = MI.getOpcode(); + // The vast majority of the instructions would need their operand 2 replaced + // with an immediate when switching to the reg+imm form. A marked exception + // are the update form loads/stores for which a constant operand 2 would need + // to turn into a displacement and move operand 1 to the operand 2 position. + III.ImmOpNo = 2; + III.ConstantOpNo = 2; + III.ImmWidth = 16; + III.ImmMustBeMultipleOf = 1; + III.TruncateImmTo = 0; + switch (Opc) { + default: return false; + case PPC::ADD4: + case PPC::ADD8: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 1; + III.IsCommutative = true; + III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8; + break; + case PPC::ADDC: + case PPC::ADDC8: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = true; + III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8; + break; + case PPC::ADDCo: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = true; + III.ImmOpcode = PPC::ADDICo; + break; + case PPC::SUBFC: + case PPC::SUBFC8: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = false; + III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8; + break; + case PPC::CMPW: + case PPC::CMPD: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = false; + III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI; + break; + case PPC::CMPLW: + case PPC::CMPLD: + III.SignedImm = false; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = false; + III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI; + break; + case PPC::ANDo: + case PPC::AND8o: + case PPC::OR: + case PPC::OR8: + case PPC::XOR: + case PPC::XOR8: + III.SignedImm = false; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = true; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::ANDo: III.ImmOpcode = PPC::ANDIo; break; + case PPC::AND8o: III.ImmOpcode = PPC::ANDIo8; break; + case PPC::OR: III.ImmOpcode = PPC::ORI; break; + case PPC::OR8: III.ImmOpcode = PPC::ORI8; break; + case PPC::XOR: III.ImmOpcode = PPC::XORI; break; + case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break; + } + break; + case PPC::RLWNM: + case PPC::RLWNM8: + case PPC::RLWNMo: + case PPC::RLWNM8o: + case PPC::SLW: + case PPC::SLW8: + case PPC::SLWo: + case PPC::SLW8o: + case PPC::SRW: + case PPC::SRW8: + case PPC::SRWo: + case PPC::SRW8o: + case PPC::SRAW: + case PPC::SRAWo: + III.SignedImm = false; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = false; + // This isn't actually true, but the instructions ignore any of the + // upper bits, so any immediate loaded with an LI is acceptable. + // This does not apply to shift right algebraic because a value + // out of range will produce a -1/0. + III.ImmWidth = 16; + if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || + Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o) + III.TruncateImmTo = 5; + else + III.TruncateImmTo = 6; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break; + case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break; + case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break; + case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break; + case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break; + case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break; + case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break; + case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break; + case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break; + case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break; + case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break; + case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break; + case PPC::SRAW: + III.ImmWidth = 5; + III.TruncateImmTo = 0; + III.ImmOpcode = PPC::SRAWI; + break; + case PPC::SRAWo: + III.ImmWidth = 5; + III.TruncateImmTo = 0; + III.ImmOpcode = PPC::SRAWIo; + break; + } + break; + case PPC::RLDCL: + case PPC::RLDCLo: + case PPC::RLDCR: + case PPC::RLDCRo: + case PPC::SLD: + case PPC::SLDo: + case PPC::SRD: + case PPC::SRDo: + case PPC::SRAD: + case PPC::SRADo: + III.SignedImm = false; + III.ZeroIsSpecialOrig = 0; + III.ZeroIsSpecialNew = 0; + III.IsCommutative = false; + // This isn't actually true, but the instructions ignore any of the + // upper bits, so any immediate loaded with an LI is acceptable. + // This does not apply to shift right algebraic because a value + // out of range will produce a -1/0. + III.ImmWidth = 16; + if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo || + Opc == PPC::RLDCR || Opc == PPC::RLDCRo) + III.TruncateImmTo = 6; + else + III.TruncateImmTo = 7; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break; + case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break; + case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break; + case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break; + case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break; + case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break; + case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break; + case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break; + case PPC::SRAD: + III.ImmWidth = 6; + III.TruncateImmTo = 0; + III.ImmOpcode = PPC::SRADI; + break; + case PPC::SRADo: + III.ImmWidth = 6; + III.TruncateImmTo = 0; + III.ImmOpcode = PPC::SRADIo; + break; + } + break; + // Loads and stores: + case PPC::LBZX: + case PPC::LBZX8: + case PPC::LHZX: + case PPC::LHZX8: + case PPC::LHAX: + case PPC::LHAX8: + case PPC::LWZX: + case PPC::LWZX8: + case PPC::LWAX: + case PPC::LDX: + case PPC::LFSX: + case PPC::LFDX: + case PPC::STBX: + case PPC::STBX8: + case PPC::STHX: + case PPC::STHX8: + case PPC::STWX: + case PPC::STWX8: + case PPC::STDX: + case PPC::STFSX: + case PPC::STFDX: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 1; + III.ZeroIsSpecialNew = 2; + III.IsCommutative = true; + III.ImmOpNo = 1; + III.ConstantOpNo = 2; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break; + case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break; + case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break; + case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break; + case PPC::LHAX: III.ImmOpcode = PPC::LHA; break; + case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break; + case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break; + case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break; + case PPC::LWAX: + III.ImmOpcode = PPC::LWA; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break; + case PPC::LFSX: III.ImmOpcode = PPC::LFS; break; + case PPC::LFDX: III.ImmOpcode = PPC::LFD; break; + case PPC::STBX: III.ImmOpcode = PPC::STB; break; + case PPC::STBX8: III.ImmOpcode = PPC::STB8; break; + case PPC::STHX: III.ImmOpcode = PPC::STH; break; + case PPC::STHX8: III.ImmOpcode = PPC::STH8; break; + case PPC::STWX: III.ImmOpcode = PPC::STW; break; + case PPC::STWX8: III.ImmOpcode = PPC::STW8; break; + case PPC::STDX: + III.ImmOpcode = PPC::STD; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::STFSX: III.ImmOpcode = PPC::STFS; break; + case PPC::STFDX: III.ImmOpcode = PPC::STFD; break; + } + break; + case PPC::LBZUX: + case PPC::LBZUX8: + case PPC::LHZUX: + case PPC::LHZUX8: + case PPC::LHAUX: + case PPC::LHAUX8: + case PPC::LWZUX: + case PPC::LWZUX8: + case PPC::LDUX: + case PPC::LFSUX: + case PPC::LFDUX: + case PPC::STBUX: + case PPC::STBUX8: + case PPC::STHUX: + case PPC::STHUX8: + case PPC::STWUX: + case PPC::STWUX8: + case PPC::STDUX: + case PPC::STFSUX: + case PPC::STFDUX: + III.SignedImm = true; + III.ZeroIsSpecialOrig = 2; + III.ZeroIsSpecialNew = 3; + III.IsCommutative = false; + III.ImmOpNo = 2; + III.ConstantOpNo = 3; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break; + case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break; + case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break; + case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break; + case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break; + case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break; + case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break; + case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break; + case PPC::LDUX: + III.ImmOpcode = PPC::LDU; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break; + case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break; + case PPC::STBUX: III.ImmOpcode = PPC::STBU; break; + case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break; + case PPC::STHUX: III.ImmOpcode = PPC::STHU; break; + case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break; + case PPC::STWUX: III.ImmOpcode = PPC::STWU; break; + case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break; + case PPC::STDUX: + III.ImmOpcode = PPC::STDU; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break; + case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break; + } + break; + // Power9 only. + case PPC::LXVX: + case PPC::LXSSPX: + case PPC::LXSDX: + case PPC::STXVX: + case PPC::STXSSPX: + case PPC::STXSDX: + if (!Subtarget.hasP9Vector()) + return false; + III.SignedImm = true; + III.ZeroIsSpecialOrig = 1; + III.ZeroIsSpecialNew = 2; + III.IsCommutative = true; + III.ImmOpNo = 1; + III.ConstantOpNo = 2; + switch(Opc) { + default: llvm_unreachable("Unknown opcode"); + case PPC::LXVX: + III.ImmOpcode = PPC::LXV; + III.ImmMustBeMultipleOf = 16; + break; + case PPC::LXSSPX: + III.ImmOpcode = PPC::LXSSP; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::LXSDX: + III.ImmOpcode = PPC::LXSD; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::STXVX: + III.ImmOpcode = PPC::STXV; + III.ImmMustBeMultipleOf = 16; + break; + case PPC::STXSSPX: + III.ImmOpcode = PPC::STXSSP; + III.ImmMustBeMultipleOf = 4; + break; + case PPC::STXSDX: + III.ImmOpcode = PPC::STXSD; + III.ImmMustBeMultipleOf = 4; + break; + } + break; + } + return true; +} + +// Utility function for swaping two arbitrary operands of an instruction. +static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) { + assert(Op1 != Op2 && "Cannot swap operand with itself."); + + unsigned MaxOp = std::max(Op1, Op2); + unsigned MinOp = std::min(Op1, Op2); + MachineOperand MOp1 = MI.getOperand(MinOp); + MachineOperand MOp2 = MI.getOperand(MaxOp); + MI.RemoveOperand(std::max(Op1, Op2)); + MI.RemoveOperand(std::min(Op1, Op2)); + + // If the operands we are swapping are the two at the end (the common case) + // we can just remove both and add them in the opposite order. + if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) { + MI.addOperand(MOp2); + MI.addOperand(MOp1); + } else { + // Store all operands in a temporary vector, remove them and re-add in the + // right order. + SmallVector MOps; + unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops. + for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) { + MOps.push_back(MI.getOperand(i)); + MI.RemoveOperand(i); + } + // MOp2 needs to be added next. + MI.addOperand(MOp2); + // Now add the rest. + for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) { + if (i == MaxOp) + MI.addOperand(MOp1); + else { + MI.addOperand(MOps.back()); + MOps.pop_back(); + } + } + } +} + +bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, + int64_t Imm) const { + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + bool PostRA = !MRI.isSSA(); + // Exit early if we can't convert this. + if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative) + return false; + if (Imm % III.ImmMustBeMultipleOf) + return false; + if (III.TruncateImmTo) + Imm &= ((1 << III.TruncateImmTo) - 1); + if (III.SignedImm) { + APInt ActualValue(64, Imm, true); + if (!ActualValue.isSignedIntN(III.ImmWidth)) + return false; + } else { + uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; + if ((uint64_t)Imm > UnsignedMax) + return false; + } + + // If we're post-RA, the instructions don't agree on whether register zero is + // special, we can transform this as long as the register operand that will + // end up in the location where zero is special isn't R0. + if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { + unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig : + III.ZeroIsSpecialNew + 1; + unsigned OrigZeroReg = MI.getOperand(PosForOrigZero).getReg(); + unsigned NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + // If R0 is in the operand where zero is special for the new instruction, + // it is unsafe to transform if the constant operand isn't that operand. + if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) && + ConstantOpNo != III.ZeroIsSpecialNew) + return false; + if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) && + ConstantOpNo != PosForOrigZero) + return false; + } + + unsigned Opc = MI.getOpcode(); + bool SpecialShift32 = + Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo; + bool SpecialShift64 = + Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo; + bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo || + Opc == PPC::SLDo || Opc == PPC::SRDo; + bool RightShift = + Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo; + + MI.setDesc(get(III.ImmOpcode)); + if (ConstantOpNo == III.ConstantOpNo) { + // Converting shifts to immediate form is a bit tricky since they may do + // one of three things: + // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero + // 2. If the shift amount is zero, the result is unchanged (save for maybe + // setting CR0) + // 3. If the shift amount is in [1, OpSize), it's just a shift + if (SpecialShift32 || SpecialShift64) { + LoadImmediateInfo LII; + LII.Imm = 0; + LII.SetCR = SetCR; + LII.Is64Bit = SpecialShift64; + uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F); + if (Imm & (SpecialShift32 ? 0x20 : 0x40)) + replaceInstrWithLI(MI, LII); + // Shifts by zero don't change the value. If we don't need to set CR0, + // just convert this to a COPY. Can't do this post-RA since we've already + // cleaned up the copies. + else if (!SetCR && ShAmt == 0 && !PostRA) { + MI.RemoveOperand(2); + MI.setDesc(get(PPC::COPY)); + } else { + // The 32 bit and 64 bit instructions are quite different. + if (SpecialShift32) { + // Left shifts use (N, 0, 31-N), right shifts use (32-N, N, 31). + uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; + uint64_t MB = RightShift ? ShAmt : 0; + uint64_t ME = RightShift ? 31 : 31 - ShAmt; + MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) + .addImm(ME); + } else { + // Left shifts use (N, 63-N), right shifts use (64-N, N). + uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; + uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; + MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); + } + } + } else + MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm); + } + // Convert commutative instructions (switch the operands and convert the + // desired one to an immediate. + else if (III.IsCommutative) { + MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm); + swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo); + } else + llvm_unreachable("Should have exited early!"); + + // For instructions for which the constant register replaces a different + // operand than where the immediate goes, we need to swap them. + if (III.ConstantOpNo != III.ImmOpNo) + swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo); + + // If the R0/X0 register is special for the original instruction and not for + // the new instruction (or vice versa), we need to fix up the register class. + if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) { + if (!III.ZeroIsSpecialOrig) { + unsigned RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg(); + const TargetRegisterClass *NewRC = + MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ? + &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass; + MRI.setRegClass(RegToModify, NewRC); + } + } + return true; +} + const TargetRegisterClass * PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass) @@ -2306,7 +3181,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, const PPCFunctionInfo *FuncInfo = MF->getInfo(); // We check the ZExt/SExt flags for a method parameter. if (MI.getParent()->getBasicBlock() == - &MF->getFunction()->getEntryBlock()) { + &MF->getFunction().getEntryBlock()) { unsigned VReg = MI.getOperand(0).getReg(); if (MF->getRegInfo().isLiveIn(VReg)) return SignExt ? FuncInfo->isLiveInSExt(VReg) : @@ -2315,10 +3190,10 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, // For a method return value, we check the ZExt/SExt flags in attribute. // We assume the following code sequence for method call. - // ADJCALLSTACKDOWN 32, %R1, %R1 - // BL8_NOP ,... - // ADJCALLSTACKUP 32, 0, %R1, %R1 - // %vreg5 = COPY %X3; G8RC:%vreg5 + // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1 + // BL8_NOP @func,... + // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1 + // %5 = COPY %x3; G8RC:%5 if (SrcReg == PPC::X3) { const MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock::const_instr_iterator II = @@ -2378,9 +3253,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, } // If all incoming values are sign-/zero-extended, - // the output of AND, OR, ISEL or PHI is also sign-/zero-extended. - case PPC::AND: - case PPC::AND8: + // the output of OR, ISEL or PHI is also sign-/zero-extended. case PPC::OR: case PPC::OR8: case PPC::ISEL: @@ -2411,6 +3284,36 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, return true; } + // If at least one of the incoming values of an AND is zero extended + // then the output is also zero-extended. If both of the incoming values + // are sign-extended then the output is also sign extended. + case PPC::AND: + case PPC::AND8: { + if (Depth >= MAX_DEPTH) + return false; + + assert(MI.getOperand(1).isReg() && MI.getOperand(2).isReg()); + + unsigned SrcReg1 = MI.getOperand(1).getReg(); + unsigned SrcReg2 = MI.getOperand(2).getReg(); + + if (!TargetRegisterInfo::isVirtualRegister(SrcReg1) || + !TargetRegisterInfo::isVirtualRegister(SrcReg2)) + return false; + + const MachineInstr *MISrc1 = MRI->getVRegDef(SrcReg1); + const MachineInstr *MISrc2 = MRI->getVRegDef(SrcReg2); + if (!MISrc1 || !MISrc2) + return false; + + if(SignExt) + return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) && + isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); + else + return isSignOrZeroExtended(*MISrc1, SignExt, Depth+1) || + isSignOrZeroExtended(*MISrc2, SignExt, Depth+1); + } + default: break; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 097faf7873c5..8bfb8bc88097 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -72,6 +72,43 @@ enum { }; } // end namespace PPCII +// Instructions that have an immediate form might be convertible to that +// form if the correct input is a result of a load immediate. In order to +// know whether the transformation is special, we might need to know some +// of the details of the two forms. +struct ImmInstrInfo { + // Is the immediate field in the immediate form signed or unsigned? + uint64_t SignedImm : 1; + // Does the immediate need to be a multiple of some value? + uint64_t ImmMustBeMultipleOf : 5; + // Is R0/X0 treated specially by the original r+r instruction? + // If so, in which operand? + uint64_t ZeroIsSpecialOrig : 3; + // Is R0/X0 treated specially by the new r+i instruction? + // If so, in which operand? + uint64_t ZeroIsSpecialNew : 3; + // Is the operation commutative? + uint64_t IsCommutative : 1; + // The operand number to check for load immediate. + uint64_t ConstantOpNo : 3; + // The operand number for the immediate. + uint64_t ImmOpNo : 3; + // The opcode of the new instruction. + uint64_t ImmOpcode : 16; + // The size of the immediate. + uint64_t ImmWidth : 5; + // The immediate should be truncated to N bits. + uint64_t TruncateImmTo : 5; +}; + +// Information required to convert an instruction to just a materialized +// immediate. +struct LoadImmediateInfo { + unsigned Imm : 16; + unsigned Is64Bit : 1; + unsigned SetCR : 1; +}; + class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; @@ -87,6 +124,10 @@ class PPCInstrInfo : public PPCGenInstrInfo { const TargetRegisterClass *RC, SmallVectorImpl &NewMIs, bool &NonRI, bool &SpillsVRS) const; + bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, int64_t Imm) const; + MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp, + bool &SeenIntermediateUse) const; virtual void anchor(); protected: @@ -313,6 +354,19 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { return isSignOrZeroExtended(MI, false, depth); } + + bool convertToImmediateForm(MachineInstr &MI, + MachineInstr **KilledDef = nullptr) const; + void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; + + // This is used to find the "true" source register for n + // Machine instruction. Returns the original SrcReg unless it is the target + // of a copy-like operation, in which case we chain backwards through all + // such operations to the ultimate source register. If a + // physical register is encountered, we stop the search. + static unsigned lookThruCopyLike(unsigned SrcReg, + const MachineRegisterInfo *MRI); + bool instrHasImmForm(const MachineInstr &MI, ImmInstrInfo &III) const; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a5c479edeb8e..43dcc4479cf0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; +// PPC-specific atomic operations. +def PPCatomicCmpSwap_8 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +def PPCatomicCmpSwap_16 : + SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, @@ -1590,6 +1597,11 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)), (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read) // Atomic operations +// FIXME: some of these might be used with constant operands. This will result +// in constant materialization instructions that may be redundant. We currently +// clean this up in PPCMIPeephole with calls to +// PPCInstrInfo::convertToImmediateForm() but we should probably not emit them +// in the first place. let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< @@ -1705,6 +1717,11 @@ let usesCustomInserter = 1 in { } } +def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>; +def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new), + (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>; + // Instructions to support atomic operations let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), @@ -3933,6 +3950,63 @@ def STWCIX : XForm_base_r3xo<31, 917, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), def STDCIX : XForm_base_r3xo<31, 1013, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "stdcix $RST, $A, $B", IIC_LdStLoad, []>; +// External PID Load Store Instructions + +def LBEPX : XForm_1<31, 95, (outs gprc:$rD), (ins memrr:$src), + "lbepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def LFDEPX : XForm_25<31, 607, (outs f8rc:$frD), (ins memrr:$src), + "lfdepx $frD, $src", IIC_LdStLFD, []>, + Requires<[IsE500]>; + +def LHEPX : XForm_1<31, 287, (outs gprc:$rD), (ins memrr:$src), + "lhepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def LWEPX : XForm_1<31, 31, (outs gprc:$rD), (ins memrr:$src), + "lwepx $rD, $src", IIC_LdStLoad, []>, + Requires<[IsE500]>; + +def STBEPX : XForm_8<31, 223, (outs), (ins gprc:$rS, memrr:$dst), + "stbepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def STFDEPX : XForm_28<31, 735, (outs), (ins f8rc:$frS, memrr:$dst), + "stfdepx $frS, $dst", IIC_LdStSTFD, []>, + Requires<[IsE500]>; + +def STHEPX : XForm_8<31, 415, (outs), (ins gprc:$rS, memrr:$dst), + "sthepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def STWEPX : XForm_8<31, 159, (outs), (ins gprc:$rS, memrr:$dst), + "stwepx $rS, $dst", IIC_LdStStore, []>, + Requires<[IsE500]>; + +def DCBFEP : DCB_Form<127, 0, (outs), (ins memrr:$dst), "dcbfep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBSTEP : DCB_Form<63, 0, (outs), (ins memrr:$dst), "dcbstep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBTEP : DCB_Form_hint<319, (outs), (ins memrr:$dst, u5imm:$TH), + "dcbtep $TH, $dst", IIC_LdStDCBF, []>, + Requires<[IsE500]>; + +def DCBTSTEP : DCB_Form_hint<255, (outs), (ins memrr:$dst, u5imm:$TH), + "dcbtstep $TH, $dst", IIC_LdStDCBF, []>, + Requires<[IsE500]>; + +def DCBZEP : DCB_Form<1023, 0, (outs), (ins memrr:$dst), "dcbzep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def DCBZLEP : DCB_Form<1023, 1, (outs), (ins memrr:$dst), "dcbzlep $dst", + IIC_LdStDCBF, []>, Requires<[IsE500]>; + +def ICBIEP : XForm_1a<31, 991, (outs), (ins memrr:$src), "icbiep $src", + IIC_LdStICBI, []>, Requires<[IsE500]>; + //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // @@ -4279,7 +4353,7 @@ def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. -let PPC970_Unit = 7 in { +let PPC970_Unit = 7, isBranch = 1 in { let Defs = [CTR], Uses = [CTR, RM] in { def gBC : BForm_3<16, 0, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index a8d98133afcf..a4c7a030389b 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -41,6 +41,22 @@ STATISTIC(MultiTOCSaves, STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); +STATISTIC(NumConvertedToImmediateForm, + "Number of instructions converted to their immediate form"); +STATISTIC(NumFunctionsEnteredInMIPeephole, + "Number of functions entered in PPC MI Peepholes"); +STATISTIC(NumFixedPointIterations, + "Number of fixed-point iterations converting reg-reg instructions " + "to reg-imm ones"); + +static cl::opt +FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), + cl::desc("Iterate to a fixed point when attempting to " + "convert reg-reg instructions to reg-imm")); + +static cl::opt +ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true), + cl::desc("Convert eligible reg+reg instructions to reg+imm")); static cl::opt EnableSExtElimination("ppc-eliminate-signext", @@ -52,10 +68,6 @@ static cl::opt cl::desc("enable elimination of zero-extensions"), cl::init(false), cl::Hidden); -namespace llvm { - void initializePPCMIPeepholePass(PassRegistry&); -} - namespace { struct PPCMIPeephole : public MachineFunctionPass { @@ -83,9 +95,6 @@ struct PPCMIPeephole : public MachineFunctionPass { bool eliminateRedundantTOCSaves(std::map &TOCSaves); void UpdateTOCSaves(std::map &TOCSaves, MachineInstr *MI); - // Find the "true" register represented by SrcReg (following chains - // of copies and subreg_to_reg operations). - unsigned lookThruCopyLike(unsigned SrcReg); public: @@ -97,7 +106,7 @@ struct PPCMIPeephole : public MachineFunctionPass { // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; initialize(MF); return simplifyCode(); @@ -212,6 +221,35 @@ bool PPCMIPeephole::simplifyCode(void) { MachineInstr* ToErase = nullptr; std::map TOCSaves; + NumFunctionsEnteredInMIPeephole++; + if (ConvertRegReg) { + // Fixed-point conversion of reg/reg instructions fed by load-immediate + // into reg/imm instructions. FIXME: This is expensive, control it with + // an option. + bool SomethingChanged = false; + do { + NumFixedPointIterations++; + SomethingChanged = false; + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; + + if (TII->convertToImmediateForm(MI)) { + // We don't erase anything in case the def has other uses. Let DCE + // remove it if it can be removed. + DEBUG(dbgs() << "Converted instruction to imm form: "); + DEBUG(MI.dump()); + NumConvertedToImmediateForm++; + SomethingChanged = true; + Simplified = true; + continue; + } + } + } + } while (SomethingChanged && FixedPointRegToImm); + } + for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { @@ -258,8 +296,10 @@ bool PPCMIPeephole::simplifyCode(void) { // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed. // We have to look through chains of COPY and SUBREG_TO_REG // to find the real source values for comparison. - unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg()); - unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg()); + unsigned TrueReg1 = + TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); + unsigned TrueReg2 = + TII->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); if (TrueReg1 == TrueReg2 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) { @@ -273,7 +313,8 @@ bool PPCMIPeephole::simplifyCode(void) { auto isConversionOfLoadAndSplat = [=]() -> bool { if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS) return false; - unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg()); + unsigned DefReg = + TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); if (TargetRegisterInfo::isVirtualRegister(DefReg)) { MachineInstr *LoadMI = MRI->getVRegDef(DefReg); if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX) @@ -299,10 +340,10 @@ bool PPCMIPeephole::simplifyCode(void) { // can replace it with a copy. if (DefOpc == PPC::XXPERMDI) { unsigned FeedImmed = DefMI->getOperand(3).getImm(); - unsigned FeedReg1 - = lookThruCopyLike(DefMI->getOperand(1).getReg()); - unsigned FeedReg2 - = lookThruCopyLike(DefMI->getOperand(2).getReg()); + unsigned FeedReg1 = + TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); + unsigned FeedReg2 = + TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) { DEBUG(dbgs() @@ -360,7 +401,8 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::XXSPLTW: { unsigned MyOpcode = MI.getOpcode(); unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2; - unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg()); + unsigned TrueReg = + TII->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI); if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); @@ -422,7 +464,8 @@ bool PPCMIPeephole::simplifyCode(void) { } case PPC::XVCVDPSP: { // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant. - unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg()); + unsigned TrueReg = + TII->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); if (!TargetRegisterInfo::isVirtualRegister(TrueReg)) break; MachineInstr *DefMI = MRI->getVRegDef(TrueReg); @@ -430,8 +473,10 @@ bool PPCMIPeephole::simplifyCode(void) { // This can occur when building a vector of single precision or integer // values. if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) { - unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg()); - unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg()); + unsigned DefsReg1 = + TII->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI); + unsigned DefsReg2 = + TII->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI); if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) || !TargetRegisterInfo::isVirtualRegister(DefsReg2)) break; @@ -585,9 +630,9 @@ bool PPCMIPeephole::simplifyCode(void) { // We can eliminate RLDICL (e.g. for zero-extension) // if all bits to clear are already zero in the input. // This code assume following code sequence for zero-extension. - // %vreg6 = COPY %vreg5:sub_32; (optional) - // %vreg8 = IMPLICIT_DEF; - // %vreg7 = INSERT_SUBREG %vreg8, %vreg6, sub_32; + // %6 = COPY %5:sub_32; (optional) + // %8 = IMPLICIT_DEF; + // %7 = INSERT_SUBREG %8, %6, sub_32; if (!EnableZExtElimination) break; if (MI.getOperand(2).getImm() != 0) @@ -685,8 +730,8 @@ bool PPCMIPeephole::simplifyCode(void) { DEBUG(dbgs() << "Optimizing LI to ADDI: "); DEBUG(LiMI->dump()); - // There could be repeated registers in the PHI, e.g: %vreg1 = - // PHI %vreg6, , %vreg8, , %vreg8, ; So if we've + // There could be repeated registers in the PHI, e.g: %1 = + // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've // already replaced the def instruction, skip. if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8) continue; @@ -1039,10 +1084,21 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { // we replace it with a signed comparison if the comparison // to be merged is a signed comparison. // In other cases of opcode mismatch, we cannot optimize this. - if (isEqOrNe(BI2) && + + // We cannot change opcode when comparing against an immediate + // if the most significant bit of the immediate is one + // due to the difference in sign extension. + auto CmpAgainstImmWithSignBit = [](MachineInstr *I) { + if (!I->getOperand(2).isImm()) + return false; + int16_t Imm = (int16_t)I->getOperand(2).getImm(); + return Imm < 0; + }; + + if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) && CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode())) NewOpCode = CMPI1->getOpcode(); - else if (isEqOrNe(BI1) && + else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) && getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode()) NewOpCode = CMPI2->getOpcode(); else continue; @@ -1209,8 +1265,9 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { DEBUG(BI1->dump()); DEBUG(BI2->dump()); if (IsPartiallyRedundant) { - DEBUG(dbgs() << "The following compare is moved into BB#" << - MBBtoMoveCmp->getNumber() << " to handle partial redundancy.\n"); + DEBUG(dbgs() << "The following compare is moved into " + << printMBBReference(*MBBtoMoveCmp) + << " to handle partial redundancy.\n"); DEBUG(CMPI2->dump()); } @@ -1220,36 +1277,6 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { return Simplified; } -// This is used to find the "true" source register for an -// XXPERMDI instruction, since MachineCSE does not handle the -// "copy-like" operations (Copy and SubregToReg). Returns -// the original SrcReg unless it is the target of a copy-like -// operation, in which case we chain backwards through all -// such operations to the ultimate source register. If a -// physical register is encountered, we stop the search. -unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) { - - while (true) { - - MachineInstr *MI = MRI->getVRegDef(SrcReg); - if (!MI->isCopyLike()) - return SrcReg; - - unsigned CopySrcReg; - if (MI->isCopy()) - CopySrcReg = MI->getOperand(1).getReg(); - else { - assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike"); - CopySrcReg = MI->getOperand(2).getReg(); - } - - if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) - return CopySrcReg; - - SrcReg = CopySrcReg; - } -} - } // end default namespace INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, diff --git a/lib/Target/PowerPC/PPCMachineBasicBlockUtils.h b/lib/Target/PowerPC/PPCMachineBasicBlockUtils.h new file mode 100644 index 000000000000..628ea2ab9fe6 --- /dev/null +++ b/lib/Target/PowerPC/PPCMachineBasicBlockUtils.h @@ -0,0 +1,198 @@ +//==-- PPCMachineBasicBlockUtils.h - Functions for common MBB operations ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility functions for commonly used operations on +// MachineBasicBlock's. +// NOTE: Include this file after defining DEBUG_TYPE so that the debug messages +// can be emitted for the pass that is using this. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_MACHINE_BASIC_BLOCK_UTILS_H +#define LLVM_LIB_TARGET_PPC_MACHINE_BASIC_BLOCK_UTILS_H + +#include "PPCInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#ifndef DEBUG_TYPE +#define DEBUG_TYPE "ppc-generic-mbb-utilities" +#endif + +using namespace llvm; + +/// Given a basic block \p Successor that potentially contains PHIs, this +/// function will look for any incoming values in the PHIs that are supposed to +/// be coming from \p OrigMBB but whose definition is actually in \p NewMBB. +/// Any such PHIs will be updated to reflect reality. +static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB, + MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) { + for (auto &MI : Successor->instrs()) { + if (!MI.isPHI()) + continue; + // This is a really ugly-looking loop, but it was pillaged directly from + // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). + for (unsigned i = 2, e = MI.getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == OrigMBB) { + // Check if the instruction is actualy defined in NewMBB. + if (MI.getOperand(i-1).isReg()) { + MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i-1).getReg()); + if (DefMI->getParent() == NewMBB || !OrigMBB->isSuccessor(Successor)) { + MO.setMBB(NewMBB); + break; + } + } + } + } + } +} + +/// Given a basic block \p Successor that potentially contains PHIs, this +/// function will look for PHIs that have an incoming value from \p OrigMBB +/// and will add the same incoming value from \p NewMBB. +/// NOTE: This should only be used if \p NewMBB is an immediate dominator of +/// \p OrigMBB. +static void addIncomingValuesToPHIs(MachineBasicBlock *Successor, + MachineBasicBlock *OrigMBB, + MachineBasicBlock *NewMBB, + MachineRegisterInfo *MRI) { + assert(OrigMBB->isSuccessor(NewMBB) && "NewMBB must be a sucessor of OrigMBB"); + for (auto &MI : Successor->instrs()) { + if (!MI.isPHI()) + continue; + // This is a really ugly-looking loop, but it was pillaged directly from + // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). + for (unsigned i = 2, e = MI.getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI.getOperand(i); + if (MO.getMBB() == OrigMBB) { + MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); + MIB.addReg(MI.getOperand(i-1).getReg()).addMBB(NewMBB); + break; + } + } + } +} + +struct BlockSplitInfo { + MachineInstr *OrigBranch; + MachineInstr *SplitBefore; + MachineInstr *SplitCond; + bool InvertNewBranch; + bool InvertOrigBranch; + bool BranchToFallThrough; + const MachineBranchProbabilityInfo *MBPI; + MachineInstr *MIToDelete; + MachineInstr *NewCond; + bool allInstrsInSameMBB() { + if (!OrigBranch || !SplitBefore || !SplitCond) + return false; + MachineBasicBlock *MBB = OrigBranch->getParent(); + if (SplitBefore->getParent() != MBB || + SplitCond->getParent() != MBB) + return false; + if (MIToDelete && MIToDelete->getParent() != MBB) + return false; + if (NewCond && NewCond->getParent() != MBB) + return false; + return true; + } +}; + +/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original +/// branch is \p OrigBranch. The target of the new branch can either be the same +/// as the target of the original branch or the fallthrough successor of the +/// original block as determined by \p BranchToFallThrough. The branch +/// conditions will be inverted according to \p InvertNewBranch and +/// \p InvertOrigBranch. If an instruction that previously fed the branch is to +/// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as +/// the branch condition. The branch probabilities will be set if the +/// MachineBranchProbabilityInfo isn't null. +static bool splitMBB(BlockSplitInfo &BSI) { + assert(BSI.allInstrsInSameMBB() && + "All instructions must be in the same block."); + + MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); + MachineFunction *MF = ThisMBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); + if (ThisMBB->succ_size() != 2) { + DEBUG(dbgs() << "Don't know how to handle blocks that don't have exactly" + << " two succesors.\n"); + return false; + } + + const PPCInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); + unsigned InvertedOpcode = + OrigBROpcode == PPC::BC ? PPC::BCn : + OrigBROpcode == PPC::BCn ? PPC::BC : + OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; + unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; + MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); + MachineBasicBlock *OrigFallThrough = + OrigTarget == *ThisMBB->succ_begin() ? *ThisMBB->succ_rbegin() : + *ThisMBB->succ_begin(); + MachineBasicBlock *NewBRTarget = + BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; + BranchProbability ProbToNewTarget = + !BSI.MBPI ? BranchProbability::getUnknown() : + BSI.MBPI->getEdgeProbability(ThisMBB, NewBRTarget); + + // Create a new basic block. + MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; + const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); + MachineFunction::iterator It = ThisMBB->getIterator(); + MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(++It, NewMBB); + + // Move everything after SplitBefore into the new block. + NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); + NewMBB->transferSuccessors(ThisMBB); + + // Add the two successors to ThisMBB. The probabilities come from the + // existing blocks if available. + ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); + ThisMBB->addSuccessor(NewMBB, ProbToNewTarget.getCompl()); + + // Add the branches to ThisMBB. + BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), + TII->get(NewBROpcode)).addReg(BSI.SplitCond->getOperand(0).getReg()) + .addMBB(NewBRTarget); + BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), + TII->get(PPC::B)).addMBB(NewMBB); + if (BSI.MIToDelete) + BSI.MIToDelete->eraseFromParent(); + + // Change the condition on the original branch and invert it if requested. + auto FirstTerminator = NewMBB->getFirstTerminator(); + if (BSI.NewCond) { + assert(FirstTerminator->getOperand(0).isReg() && + "Can't update condition of unconditional branch."); + FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); + } + if (BSI.InvertOrigBranch) + FirstTerminator->setDesc(TII->get(InvertedOpcode)); + + // If any of the PHIs in the successors of NewMBB reference values that + // now come from NewMBB, they need to be updated. + for (auto *Succ : NewMBB->successors()) { + updatePHIs(Succ, ThisMBB, NewMBB, MRI); + } + addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); + + DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); + DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); + DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); + return true; +} + + +#endif diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp new file mode 100644 index 000000000000..d524c354ed35 --- /dev/null +++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -0,0 +1,95 @@ +//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A pre-emit peephole for catching opportunities introduced by late passes such +// as MachineBlockPlacement. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "PPCSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-pre-emit-peephole" + +STATISTIC(NumRRConvertedInPreEmit, + "Number of r+r instructions converted to r+i in pre-emit peephole"); +STATISTIC(NumRemovedInPreEmit, + "Number of instructions deleted in pre-emit peephole"); + +static cl::opt +RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), + cl::desc("Run pre-emit peephole optimizations.")); + +namespace { + class PPCPreEmitPeephole : public MachineFunctionPass { + public: + static char ID; + PPCPreEmitPeephole() : MachineFunctionPass(ID) { + initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) + return false; + bool Changed = false; + const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector InstrsToErase; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + MachineInstr *DefMIToErase = nullptr; + if (TII->convertToImmediateForm(MI, &DefMIToErase)) { + Changed = true; + NumRRConvertedInPreEmit++; + DEBUG(dbgs() << "Converted instruction to imm form: "); + DEBUG(MI.dump()); + if (DefMIToErase) { + InstrsToErase.push_back(DefMIToErase); + } + } + } + } + for (MachineInstr *MI : InstrsToErase) { + DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); + DEBUG(MI->dump()); + MI->eraseFromParent(); + NumRemovedInPreEmit++; + } + return Changed; + } + }; +} + +INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole", + false, false) +char PPCPreEmitPeephole::ID = 0; + +FunctionPass *llvm::createPPCPreEmitPeepholePass() { + return new PPCPreEmitPeephole(); +} diff --git a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp index bc8652393f4b..25b2b54cbe98 100644 --- a/lib/Target/PowerPC/PPCQPXLoadSplat.cpp +++ b/lib/Target/PowerPC/PPCQPXLoadSplat.cpp @@ -60,7 +60,7 @@ FunctionPass *llvm::createPPCQPXLoadSplatPass() { } bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; bool MadeChange = false; @@ -79,8 +79,8 @@ bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) { } // We're looking for a sequence like this: - // %F0 = LFD 0, %X3, %QF0; mem:LD8[%a](tbaa=!2) - // %QF1 = QVESPLATI %QF0, 0, %RM + // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2) + // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm for (auto SI = Splats.begin(); SI != Splats.end();) { MachineInstr *SMI = *SI; diff --git a/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp new file mode 100644 index 000000000000..5b2d7191683c --- /dev/null +++ b/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -0,0 +1,535 @@ +//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This pass aims to reduce the number of logical operations on bits in the CR +// register. These instructions have a fairly high latency and only a single +// pipeline at their disposal in modern PPC cores. Furthermore, they have a +// tendency to occur in fairly small blocks where there's little opportunity +// to hide the latency between the CR logical operation and its user. +// +//===---------------------------------------------------------------------===// + +#include "PPCInstrInfo.h" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-reduce-cr-ops" +#include "PPCMachineBasicBlockUtils.h" + +STATISTIC(NumContainedSingleUseBinOps, + "Number of single-use binary CR logical ops contained in a block"); +STATISTIC(NumToSplitBlocks, + "Number of binary CR logical ops that can be used to split blocks"); +STATISTIC(TotalCRLogicals, "Number of CR logical ops."); +STATISTIC(TotalNullaryCRLogicals, + "Number of nullary CR logical ops (CRSET/CRUNSET)."); +STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops."); +STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops."); +STATISTIC(NumBlocksSplitOnBinaryCROp, + "Number of blocks split on CR binary logical ops."); +STATISTIC(NumNotSplitIdenticalOperands, + "Number of blocks not split due to operands being identical."); +STATISTIC(NumNotSplitChainCopies, + "Number of blocks not split due to operands being chained copies."); +STATISTIC(NumNotSplitWrongOpcode, + "Number of blocks not split due to the wrong opcode."); + +namespace llvm { + void initializePPCReduceCRLogicalsPass(PassRegistry&); +} + +namespace { + +static bool isBinary(MachineInstr &MI) { + return MI.getNumOperands() == 3; +} + +static bool isNullary(MachineInstr &MI) { + return MI.getNumOperands() == 1; +} + +/// Given a CR logical operation \p CROp, branch opcode \p BROp as well as +/// a flag to indicate if the first operand of \p CROp is used as the +/// SplitBefore operand, determines whether either of the branches are to be +/// inverted as well as whether the new target should be the original +/// fall-through block. +static void +computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1, + bool &InvertNewBranch, bool &InvertOrigBranch, + bool &TargetIsFallThrough) { + // The conditions under which each of the output operands should be [un]set + // can certainly be written much more concisely with just 3 if statements or + // ternary expressions. However, this provides a much clearer overview to the + // reader as to what is set for each combination. + if (BROp == PPC::BC || BROp == PPC::BCLR) { + // Regular branches. + switch (CROp) { + default: + llvm_unreachable("Don't know how to handle this CR logical."); + case PPC::CROR: + InvertNewBranch = false; + InvertOrigBranch = false; + TargetIsFallThrough = false; + return; + case PPC::CRAND: + InvertNewBranch = true; + InvertOrigBranch = false; + TargetIsFallThrough = true; + return; + case PPC::CRNAND: + InvertNewBranch = true; + InvertOrigBranch = true; + TargetIsFallThrough = false; + return; + case PPC::CRNOR: + InvertNewBranch = false; + InvertOrigBranch = true; + TargetIsFallThrough = true; + return; + case PPC::CRORC: + InvertNewBranch = UsingDef1; + InvertOrigBranch = !UsingDef1; + TargetIsFallThrough = false; + return; + case PPC::CRANDC: + InvertNewBranch = !UsingDef1; + InvertOrigBranch = !UsingDef1; + TargetIsFallThrough = true; + return; + } + } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) { + // Negated branches. + switch (CROp) { + default: + llvm_unreachable("Don't know how to handle this CR logical."); + case PPC::CROR: + InvertNewBranch = true; + InvertOrigBranch = false; + TargetIsFallThrough = true; + return; + case PPC::CRAND: + InvertNewBranch = false; + InvertOrigBranch = false; + TargetIsFallThrough = false; + return; + case PPC::CRNAND: + InvertNewBranch = false; + InvertOrigBranch = true; + TargetIsFallThrough = true; + return; + case PPC::CRNOR: + InvertNewBranch = true; + InvertOrigBranch = true; + TargetIsFallThrough = false; + return; + case PPC::CRORC: + InvertNewBranch = !UsingDef1; + InvertOrigBranch = !UsingDef1; + TargetIsFallThrough = true; + return; + case PPC::CRANDC: + InvertNewBranch = UsingDef1; + InvertOrigBranch = !UsingDef1; + TargetIsFallThrough = false; + return; + } + } else + llvm_unreachable("Don't know how to handle this branch."); +} + +class PPCReduceCRLogicals : public MachineFunctionPass { + +public: + static char ID; + struct CRLogicalOpInfo { + MachineInstr *MI; + // FIXME: If chains of copies are to be handled, this should be a vector. + std::pair CopyDefs; + std::pair TrueDefs; + unsigned IsBinary : 1; + unsigned IsNullary : 1; + unsigned ContainedInBlock : 1; + unsigned FeedsISEL : 1; + unsigned FeedsBR : 1; + unsigned FeedsLogical : 1; + unsigned SingleUse : 1; + unsigned DefsSingleUse : 1; + unsigned SubregDef1; + unsigned SubregDef2; + CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0), + ContainedInBlock(0), FeedsISEL(0), FeedsBR(0), + FeedsLogical(0), SingleUse(0), DefsSingleUse(1), + SubregDef1(0), SubregDef2(0) { } + void dump(); + }; + +private: + const PPCInstrInfo *TII; + MachineFunction *MF; + MachineRegisterInfo *MRI; + const MachineBranchProbabilityInfo *MBPI; + + // A vector to contain all the CR logical operations + std::vector AllCRLogicalOps; + void initialize(MachineFunction &MFParm); + void collectCRLogicals(); + bool handleCROp(CRLogicalOpInfo &CRI); + bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); + static bool isCRLogical(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR || + Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CREQV || + Opc == PPC::CRANDC || Opc == PPC::CRORC || Opc == PPC::CRSET || + Opc == PPC::CRUNSET || Opc == PPC::CR6SET || Opc == PPC::CR6UNSET; + } + bool simplifyCode() { + bool Changed = false; + // Not using a range-based for loop here as the vector may grow while being + // operated on. + for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) + Changed |= handleCROp(AllCRLogicalOps[i]); + return Changed; + } + +public: + PPCReduceCRLogicals() : MachineFunctionPass(ID) { + initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry()); + } + + MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg, + MachineInstr *&CpDef); + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + // If the subtarget doesn't use CR bits, there's nothing to do. + const PPCSubtarget &STI = MF.getSubtarget(); + if (!STI.useCRBits()) + return false; + + initialize(MF); + collectCRLogicals(); + return simplifyCode(); + } + CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() { + dbgs() << "CRLogicalOpMI: "; + MI->dump(); + dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL; + dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: "; + dbgs() << FeedsLogical << ", SingleUse: " << SingleUse; + dbgs() << ", DefsSingleUse: " << DefsSingleUse; + dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: "; + dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock; + if (!IsNullary) { + dbgs() << "\nDefs:\n"; + TrueDefs.first->dump(); + } + if (IsBinary) + TrueDefs.second->dump(); + dbgs() << "\n"; + if (CopyDefs.first) { + dbgs() << "CopyDef1: "; + CopyDefs.first->dump(); + } + if (CopyDefs.second) { + dbgs() << "CopyDef2: "; + CopyDefs.second->dump(); + } +} +#endif + +PPCReduceCRLogicals::CRLogicalOpInfo +PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) { + CRLogicalOpInfo Ret; + Ret.MI = &MIParam; + // Get the defs + if (isNullary(MIParam)) { + Ret.IsNullary = 1; + Ret.TrueDefs = std::make_pair(nullptr, nullptr); + Ret.CopyDefs = std::make_pair(nullptr, nullptr); + } else { + MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(), + Ret.SubregDef1, Ret.CopyDefs.first); + Ret.DefsSingleUse &= + MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg()); + Ret.DefsSingleUse &= + MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg()); + assert(Def1 && "Must be able to find a definition of operand 1."); + if (isBinary(MIParam)) { + Ret.IsBinary = 1; + MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(), + Ret.SubregDef2, + Ret.CopyDefs.second); + Ret.DefsSingleUse &= + MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg()); + Ret.DefsSingleUse &= + MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg()); + assert(Def2 && "Must be able to find a definition of operand 2."); + Ret.TrueDefs = std::make_pair(Def1, Def2); + } else { + Ret.TrueDefs = std::make_pair(Def1, nullptr); + Ret.CopyDefs.second = nullptr; + } + } + + Ret.ContainedInBlock = 1; + // Get the uses + for (MachineInstr &UseMI : + MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) { + unsigned Opc = UseMI.getOpcode(); + if (Opc == PPC::ISEL || Opc == PPC::ISEL8) + Ret.FeedsISEL = 1; + if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR || + Opc == PPC::BCLRn) + Ret.FeedsBR = 1; + Ret.FeedsLogical = isCRLogical(UseMI); + if (UseMI.getParent() != MIParam.getParent()) + Ret.ContainedInBlock = 0; + } + Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0; + + // We now know whether all the uses of the CR logical are in the same block. + if (!Ret.IsNullary) { + Ret.ContainedInBlock &= + (MIParam.getParent() == Ret.TrueDefs.first->getParent()); + if (Ret.IsBinary) + Ret.ContainedInBlock &= + (MIParam.getParent() == Ret.TrueDefs.second->getParent()); + } + DEBUG(Ret.dump()); + if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) { + NumContainedSingleUseBinOps++; + if (Ret.FeedsBR && Ret.DefsSingleUse) + NumToSplitBlocks++; + } + return Ret; +} + +/// Looks trhough a COPY instruction to the actual definition of the CR-bit +/// register and returns the instruction that defines it. +/// FIXME: This currently handles what is by-far the most common case: +/// an instruction that defines a CR field followed by a single copy of a bit +/// from that field into a virtual register. If chains of copies need to be +/// handled, this should have a loop until a non-copy instruction is found. +MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, + unsigned &Subreg, + MachineInstr *&CpDef) { + Subreg = -1; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return nullptr; + MachineInstr *Copy = MRI->getVRegDef(Reg); + CpDef = Copy; + if (!Copy->isCopy()) + return Copy; + unsigned CopySrc = Copy->getOperand(1).getReg(); + Subreg = Copy->getOperand(1).getSubReg(); + if (!TargetRegisterInfo::isVirtualRegister(CopySrc)) { + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + // Set the Subreg + if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) + Subreg = PPC::sub_eq; + if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT) + Subreg = PPC::sub_lt; + if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT) + Subreg = PPC::sub_gt; + if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN) + Subreg = PPC::sub_un; + // Loop backwards and return the first MI that modifies the physical CR Reg. + MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin(); + while (Me != B) + if ((--Me)->modifiesRegister(CopySrc, TRI)) + return &*Me; + return nullptr; + } + return MRI->getVRegDef(CopySrc); +} + +void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { + MF = &MFParam; + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget().getInstrInfo(); + MBPI = &getAnalysis(); + + AllCRLogicalOps.clear(); +} + +/// Contains all the implemented transformations on CR logical operations. +/// For example, a binary CR logical can be used to split a block on its inputs, +/// a unary CR logical might be used to change the condition code on a +/// comparison feeding it. A nullary CR logical might simply be removable +/// if the user of the bit it [un]sets can be transformed. +bool PPCReduceCRLogicals::handleCROp(CRLogicalOpInfo &CRI) { + // We can definitely split a block on the inputs to a binary CR operation + // whose defs and (single) use are within the same block. + bool Changed = false; + if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && + CRI.DefsSingleUse) { + Changed = splitBlockOnBinaryCROp(CRI); + if (Changed) + NumBlocksSplitOnBinaryCROp++; + } + return Changed; +} + +/// Splits a block that contains a CR-logical operation that feeds a branch +/// and whose operands are produced within the block. +/// Example: +/// %vr5 = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 +/// %vr6 = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 +/// %vr7 = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 +/// %vr8 = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 +/// %vr9 = CROR %vr6, %vr8; CRBITRC:%vr9,%vr6,%vr8 +/// BC %vr9, ; CRBITRC:%vr9 +/// Becomes: +/// %vr5 = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 +/// %vr6 = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 +/// BC %vr6, ; CRBITRC:%vr6 +/// +/// %vr7 = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 +/// %vr8 = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 +/// BC %vr9, ; CRBITRC:%vr9 +bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) { + if (CRI.CopyDefs.first == CRI.CopyDefs.second) { + DEBUG(dbgs() << "Unable to split as the two operands are the same\n"); + NumNotSplitIdenticalOperands++; + return false; + } + if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() || + CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) { + DEBUG(dbgs() << "Unable to split because one of the operands is a PHI or " + "chain of copies.\n"); + NumNotSplitChainCopies++; + return false; + } + // Note: keep in sync with computeBranchTargetAndInversion(). + if (CRI.MI->getOpcode() != PPC::CROR && + CRI.MI->getOpcode() != PPC::CRAND && + CRI.MI->getOpcode() != PPC::CRNOR && + CRI.MI->getOpcode() != PPC::CRNAND && + CRI.MI->getOpcode() != PPC::CRORC && + CRI.MI->getOpcode() != PPC::CRANDC) { + DEBUG(dbgs() << "Unable to split blocks on this opcode.\n"); + NumNotSplitWrongOpcode++; + return false; + } + DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump()); + MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first; + MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second; + + bool UsingDef1 = false; + MachineInstr *SplitBefore = &*Def2It; + for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) { + if (Def1It == Def2It) { // Def2 comes before Def1. + SplitBefore = &*Def1It; + UsingDef1 = true; + break; + } + } + + DEBUG(dbgs() << "We will split the following block:\n";); + DEBUG(CRI.MI->getParent()->dump()); + DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump()); + + // Get the branch instruction. + MachineInstr *Branch = + MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent(); + + // We want the new block to have no code in it other than the definition + // of the input to the CR logical and the CR logical itself. So we move + // those to the bottom of the block (just before the branch). Then we + // will split before the CR logical. + MachineBasicBlock *MBB = SplitBefore->getParent(); + auto FirstTerminator = MBB->getFirstTerminator(); + MachineBasicBlock::iterator FirstInstrToMove = + UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second; + MachineBasicBlock::iterator SecondInstrToMove = + UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second; + + // The instructions that need to be moved are not guaranteed to be + // contiguous. Move them individually. + // FIXME: If one of the operands is a chain of (single use) copies, they + // can all be moved and we can still split. + MBB->splice(FirstTerminator, MBB, FirstInstrToMove); + if (FirstInstrToMove != SecondInstrToMove) + MBB->splice(FirstTerminator, MBB, SecondInstrToMove); + MBB->splice(FirstTerminator, MBB, CRI.MI); + + unsigned Opc = CRI.MI->getOpcode(); + bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough; + computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1, + InvertNewBranch, InvertOrigBranch, + TargetIsFallThrough); + MachineInstr *SplitCond = + UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first; + DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy")); + DEBUG(dbgs() << " the original branch and the target is the " << + (TargetIsFallThrough ? "fallthrough block\n" : "orig. target block\n")); + DEBUG(dbgs() << "Original branch instruction: "; Branch->dump()); + BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch, + InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI, + UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second }; + bool Changed = splitMBB(BSI); + // If we've split on a CR logical that is fed by a CR logical, + // recompute the source CR logical as it may be usable for splitting. + if (Changed) { + bool Input1CRlogical = + CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first); + bool Input2CRlogical = + CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second); + if (Input1CRlogical) + AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first)); + if (Input2CRlogical) + AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second)); + } + return Changed; +} + +void PPCReduceCRLogicals::collectCRLogicals() { + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (isCRLogical(MI)) { + AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI)); + TotalCRLogicals++; + if (AllCRLogicalOps.back().IsNullary) + TotalNullaryCRLogicals++; + else if (AllCRLogicalOps.back().IsBinary) + TotalBinaryCRLogicals++; + else + TotalUnaryCRLogicals++; + } + } + } +} + +} // end annonymous namespace + +INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE, + "PowerPC Reduce CR logical Operation", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE, + "PowerPC Reduce CR logical Operation", false, false) + +char PPCReduceCRLogicals::ID = 0; +FunctionPass* +llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 78467e81795c..6b62a82ef7bf 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -123,7 +123,7 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const PPCSubtarget &Subtarget = MF->getSubtarget(); - if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) { + if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) { if (Subtarget.hasVSX()) return CSR_64_AllRegs_VSX_SaveList; if (Subtarget.hasAltivec()) @@ -161,7 +161,7 @@ PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { return nullptr; if (!TM.isPPC64()) return nullptr; - if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS) + if (MF->getFunction().getCallingConv() != CallingConv::CXX_FAST_TLS) return nullptr; if (!MF->getInfo()->isSplitCSR()) return nullptr; @@ -901,7 +901,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Naked functions have stack size 0, although getStackSize may not reflect // that because we didn't call all the pieces that compute it for naked // functions. - if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) { + if (!MF.getFunction().hasFnAttribute(Attribute::Naked)) { if (!(hasBasePointer(MF) && FrameIndex < 0)) Offset += MFI.getStackSize(); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index f7807907bd64..cd82faf3f589 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -208,10 +208,14 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; // VRsave register def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; +def XER: SPR<1, "xer">, DwarfRegNum<[76]>; + // Carry bit. In the architecture this is really bit 0 of the XER register // (which really is SPR register 1); this is the only bit interesting to a // compiler. -def CARRY: SPR<1, "ca">, DwarfRegNum<[76]>; +def CARRY: SPR<1, "xer">, DwarfRegNum<[76]> { + let Aliases = [XER]; +} // FP rounding mode: bits 30 and 31 of the FP status and control register // This is not allocated as a normal register; it appears only in @@ -351,7 +355,7 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { } def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; -def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { +def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 5f8085f4626e..49f2699ab082 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -25,7 +25,7 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 2babc8f64539..20a83c973026 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -88,6 +88,10 @@ EnableMachineCombinerPass("ppc-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +static cl::opt + ReduceCRLogical("ppc-reduce-cr-logicals", + cl::desc("Expand eligible cr-logical binary ops to branches"), + cl::init(false), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -97,7 +101,9 @@ extern "C" void LLVMInitializePowerPCTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); + initializePPCPreEmitPeepholePass(PR); initializePPCTLSDynamicCallPass(PR); + initializePPCMIPeepholePass(PR); } /// Return the datalayout string of a subtarget. @@ -392,6 +398,9 @@ void PPCPassConfig::addMachineSSAOptimization() { if (TM->getTargetTriple().getArch() == Triple::ppc64le && !DisableVSXSwapRemoval) addPass(createPPCVSXSwapRemovalPass()); + // Reduce the number of cr-logical ops. + if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None) + addPass(createPPCReduceCRLogicalsPass()); // Target-specific peephole cleanups performed after instruction // selection. if (!DisableMIPeephole) { @@ -433,6 +442,7 @@ void PPCPassConfig::addPreSched2() { } void PPCPassConfig::addPreEmitPass() { + addPass(createPPCPreEmitPeepholePass()); addPass(createPPCExpandISELPass()); if (getOptLevel() != CodeGenOpt::None) @@ -441,8 +451,7 @@ void PPCPassConfig::addPreEmitPass() { addPass(createPPCBranchSelectionPass(), false); } -TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(PPCTTIImpl(this, F)); - }); +TargetTransformInfo +PPCTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(PPCTTIImpl(this, F)); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 102bf7ca59c2..75b98a815ab4 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -49,7 +49,7 @@ class PPCTargetMachine final : public LLVMTargetMachine { // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index a57484e5abdf..f15af790de8f 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -20,7 +20,7 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -90,21 +90,21 @@ namespace { // This pass is run after register coalescing, and so we're looking for // a situation like this: // ... - // %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9 - // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16, - // %RM; VSLRC:%vreg5,%vreg17,%vreg16 + // %5 = COPY %9; VSLRC:%5,%9 + // %5 = XSMADDADP %5, %17, %16, + // implicit %rm; VSLRC:%5,%17,%16 // ... - // %vreg9 = XSMADDADP %vreg9, %vreg17, %vreg19, - // %RM; VSLRC:%vreg9,%vreg17,%vreg19 + // %9 = XSMADDADP %9, %17, %19, + // implicit %rm; VSLRC:%9,%17,%19 // ... // Where we can eliminate the copy by changing from the A-type to the // M-type instruction. Specifically, for this example, this means: - // %vreg5 = XSMADDADP %vreg5, %vreg17, %vreg16, - // %RM; VSLRC:%vreg5,%vreg17,%vreg16 + // %5 = XSMADDADP %5, %17, %16, + // implicit %rm; VSLRC:%5,%17,%16 // is replaced by: - // %vreg16 = XSMADDMDP %vreg16, %vreg18, %vreg9, - // %RM; VSLRC:%vreg16,%vreg18,%vreg9 - // and we remove: %vreg5 = COPY %vreg9; VSLRC:%vreg5,%vreg9 + // %16 = XSMADDMDP %16, %18, %9, + // implicit %rm; VSLRC:%16,%18,%9 + // and we remove: %5 = COPY %9; VSLRC:%5,%9 SlotIndex FMAIdx = LIS->getInstructionIndex(MI); @@ -150,13 +150,13 @@ namespace { // walking the MIs we may as well test liveness here. // // FIXME: There is a case that occurs in practice, like this: - // %vreg9 = COPY %F1; VSSRC:%vreg9 + // %9 = COPY %f1; VSSRC:%9 // ... - // %vreg6 = COPY %vreg9; VSSRC:%vreg6,%vreg9 - // %vreg7 = COPY %vreg9; VSSRC:%vreg7,%vreg9 - // %vreg9 = XSMADDASP %vreg9, %vreg1, %vreg4; VSSRC: - // %vreg6 = XSMADDASP %vreg6, %vreg1, %vreg2; VSSRC: - // %vreg7 = XSMADDASP %vreg7, %vreg1, %vreg3; VSSRC: + // %6 = COPY %9; VSSRC:%6,%9 + // %7 = COPY %9; VSSRC:%7,%9 + // %9 = XSMADDASP %9, %1, %4; VSSRC: + // %6 = XSMADDASP %6, %1, %2; VSSRC: + // %7 = XSMADDASP %7, %1, %3; VSSRC: // which prevents an otherwise-profitable transformation. bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); @@ -177,11 +177,11 @@ namespace { // The transformation doesn't work well with things like: - // %vreg5 = A-form-op %vreg5, %vreg11, %vreg5; - // unless vreg11 is also a kill, so skip when it is not, + // %5 = A-form-op %5, %11, %5; + // unless %11 is also a kill, so skip when it is not, // and check operand 3 to see it is also a kill to handle the case: - // %vreg5 = A-form-op %vreg5, %vreg5, %vreg11; - // where vreg5 and vreg11 are both kills. This case would be skipped + // %5 = A-form-op %5, %5, %11; + // where %5 and %11 are both kills. This case would be skipped // otherwise. unsigned OldFMAReg = MI.getOperand(0).getReg(); @@ -343,7 +343,7 @@ namespace { public: bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // If we don't have VSX then go ahead and return without doing diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index c51368d6d2af..8a5fb9fdaef1 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -191,7 +191,7 @@ struct PPCVSXSwapRemoval : public MachineFunctionPass { public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // If we don't have VSX on the subtarget, don't do anything. @@ -966,7 +966,7 @@ LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() { dbgs() << format("%6d", ID); dbgs() << format("%6d", EC->getLeaderValue(ID)); - dbgs() << format(" BB#%3d", MI->getParent()->getNumber()); + dbgs() << format(" %bb.%3d", MI->getParent()->getNumber()); dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str()); if (SwapVector[EntryIdx].IsLoad) diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index bc09d5f8a7e8..b4bf635dc2c7 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -256,7 +256,7 @@ _clamp0g: cmpwi cr0, r3, 0 li r2, 0 blt cr0, LBB1_2 -; BB#1: ; %entry +; %bb.1: ; %entry mr r2, r3 LBB1_2: ; %entry mr r3, r2 diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index f70ebd82bd5c..c38e01923161 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -233,7 +233,7 @@ declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: -# BB#0: # %entry +# %bb.0: # %entry addis 3, 2, .LCPI0_0@toc@ha addis 4, 2, .LCPI0_1@toc@ha addi 3, 3, .LCPI0_0@toc@l diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f0fd323bb582..563aee9e1a78 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1778,7 +1778,7 @@ We do get this at the codegen level, so something knows about it, but instcombine should catch it earlier: _foo: ## @foo -## BB#0: ## %entry +## %bb.0: ## %entry movl %edi, %eax sarl $4, %eax ret @@ -2234,13 +2234,13 @@ void foo(funcs f, int which) { which we compile to: foo: # @foo -# BB#0: # %entry +# %bb.0: # %entry pushq %rbp movq %rsp, %rbp testl %esi, %esi movq %rdi, %rax je .LBB0_2 -# BB#1: # %if.then +# %bb.1: # %if.then movl $5, %edi callq *%rax popq %rbp diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 3f76ce3b24a3..3299a53ff5ba 100644 --- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -31,6 +31,10 @@ struct RISCVOperand; class RISCVAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } + bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); } + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper, Twine Msg); @@ -88,6 +92,8 @@ struct RISCVOperand : public MCParsedAsmOperand { Immediate, } Kind; + bool IsRV64; + struct RegOp { unsigned RegNum; }; @@ -108,6 +114,7 @@ struct RISCVOperand : public MCParsedAsmOperand { public: RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; + IsRV64 = o.IsRV64; StartLoc = o.StartLoc; EndLoc = o.EndLoc; switch (Kind) { @@ -185,6 +192,42 @@ struct RISCVOperand : public MCParsedAsmOperand { return true; } + /// Return true if the operand is a valid floating point rounding mode. + bool isFRMArg() const { + if (!isImm()) + return false; + const MCExpr *Val = getImm(); + auto *SVal = dyn_cast(Val); + if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None) + return false; + + StringRef Str = SVal->getSymbol().getName(); + + return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid; + } + + bool isUImmLog2XLen() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; + if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None) + return false; + return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm); + } + + bool isUImmLog2XLenNonZero() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; + if (!evaluateConstantImm(Imm, VK) || VK != RISCVMCExpr::VK_RISCV_None) + return false; + if (Imm == 0) + return false; + return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm); + } + bool isUImm5() const { int64_t Imm; RISCVMCExpr::VariantKind VK; @@ -194,6 +237,79 @@ struct RISCVOperand : public MCParsedAsmOperand { return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } + bool isUImm5NonZero() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isSImm6() const { + RISCVMCExpr::VariantKind VK; + int64_t Imm; + bool IsValid; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + if (!IsConstantImm) + IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); + else + IsValid = isInt<6>(Imm); + return IsValid && + (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO); + } + + bool isUImm6NonZero() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isUInt<6>(Imm) && (Imm != 0) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isUImm7Lsb00() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedUInt<5, 2>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isUImm8Lsb00() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedUInt<6, 2>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isUImm8Lsb000() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedUInt<5, 3>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isSImm9Lsb0() const { return isBareSimmNLsb0<9>(); } + + bool isUImm9Lsb000() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedUInt<6, 3>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + + bool isUImm10Lsb00NonZero() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isSImm12() const { RISCVMCExpr::VariantKind VK; int64_t Imm; @@ -209,6 +325,8 @@ struct RISCVOperand : public MCParsedAsmOperand { (VK == RISCVMCExpr::VK_RISCV_None || VK == RISCVMCExpr::VK_RISCV_LO); } + bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); } + bool isUImm12() const { int64_t Imm; RISCVMCExpr::VariantKind VK; @@ -220,6 +338,14 @@ struct RISCVOperand : public MCParsedAsmOperand { bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); } + bool isSImm10Lsb0000() const { + int64_t Imm; + RISCVMCExpr::VariantKind VK; + bool IsConstantImm = evaluateConstantImm(Imm, VK); + return IsConstantImm && isShiftedInt<6, 4>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isUImm20() const { RISCVMCExpr::VariantKind VK; int64_t Imm; @@ -242,6 +368,8 @@ struct RISCVOperand : public MCParsedAsmOperand { SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Gets location of the last token of this operand SMLoc getEndLoc() const override { return EndLoc; } + /// True if this operand is for an RV64 instruction + bool isRV64() const { return IsRV64; } unsigned getReg() const override { assert(Kind == Register && "Invalid type access!"); @@ -273,29 +401,33 @@ struct RISCVOperand : public MCParsedAsmOperand { } } - static std::unique_ptr createToken(StringRef Str, SMLoc S) { + static std::unique_ptr createToken(StringRef Str, SMLoc S, + bool IsRV64) { auto Op = make_unique(Token); Op->Tok = Str; Op->StartLoc = S; Op->EndLoc = S; + Op->IsRV64 = IsRV64; return Op; } static std::unique_ptr createReg(unsigned RegNo, SMLoc S, - SMLoc E) { + SMLoc E, bool IsRV64) { auto Op = make_unique(Register); Op->Reg.RegNum = RegNo; Op->StartLoc = S; Op->EndLoc = E; + Op->IsRV64 = IsRV64; return Op; } static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, - SMLoc E) { + SMLoc E, bool IsRV64) { auto Op = make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; + Op->IsRV64 = IsRV64; return Op; } @@ -344,6 +476,22 @@ struct RISCVOperand : public MCParsedAsmOperand { } Inst.addOperand(MCOperand::createImm(Imm)); } + + // Returns the rounding mode represented by this RISCVOperand. Should only + // be called after checking isFRMArg. + RISCVFPRndMode::RoundingMode getRoundingMode() const { + // isFRMArg has validated the operand, meaning this cast is safe. + auto SE = cast(getImm()); + RISCVFPRndMode::RoundingMode FRM = + RISCVFPRndMode::stringToRoundingMode(SE->getSymbol().getName()); + assert(FRM != RISCVFPRndMode::Invalid && "Invalid rounding mode"); + return FRM; + } + + void addFRMArgOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getRoundingMode())); + } }; } // end anonymous namespace. @@ -351,6 +499,70 @@ struct RISCVOperand : public MCParsedAsmOperand { #define GET_MATCHER_IMPLEMENTATION #include "RISCVGenAsmMatcher.inc" +// Return the matching FPR64 register for the given FPR32. +// FIXME: Ideally this function could be removed in favour of using +// information from TableGen. +unsigned convertFPR32ToFPR64(unsigned Reg) { + switch (Reg) { + default: + llvm_unreachable("Not a recognised FPR32 register"); + case RISCV::F0_32: return RISCV::F0_64; + case RISCV::F1_32: return RISCV::F1_64; + case RISCV::F2_32: return RISCV::F2_64; + case RISCV::F3_32: return RISCV::F3_64; + case RISCV::F4_32: return RISCV::F4_64; + case RISCV::F5_32: return RISCV::F5_64; + case RISCV::F6_32: return RISCV::F6_64; + case RISCV::F7_32: return RISCV::F7_64; + case RISCV::F8_32: return RISCV::F8_64; + case RISCV::F9_32: return RISCV::F9_64; + case RISCV::F10_32: return RISCV::F10_64; + case RISCV::F11_32: return RISCV::F11_64; + case RISCV::F12_32: return RISCV::F12_64; + case RISCV::F13_32: return RISCV::F13_64; + case RISCV::F14_32: return RISCV::F14_64; + case RISCV::F15_32: return RISCV::F15_64; + case RISCV::F16_32: return RISCV::F16_64; + case RISCV::F17_32: return RISCV::F17_64; + case RISCV::F18_32: return RISCV::F18_64; + case RISCV::F19_32: return RISCV::F19_64; + case RISCV::F20_32: return RISCV::F20_64; + case RISCV::F21_32: return RISCV::F21_64; + case RISCV::F22_32: return RISCV::F22_64; + case RISCV::F23_32: return RISCV::F23_64; + case RISCV::F24_32: return RISCV::F24_64; + case RISCV::F25_32: return RISCV::F25_64; + case RISCV::F26_32: return RISCV::F26_64; + case RISCV::F27_32: return RISCV::F27_64; + case RISCV::F28_32: return RISCV::F28_64; + case RISCV::F29_32: return RISCV::F29_64; + case RISCV::F30_32: return RISCV::F30_64; + case RISCV::F31_32: return RISCV::F31_64; + } +} + +unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, + unsigned Kind) { + RISCVOperand &Op = static_cast(AsmOp); + if (!Op.isReg()) + return Match_InvalidOperand; + + unsigned Reg = Op.getReg(); + bool IsRegFPR32 = + RISCVMCRegisterClasses[RISCV::FPR32RegClassID].contains(Reg); + bool IsRegFPR32C = + RISCVMCRegisterClasses[RISCV::FPR32CRegClassID].contains(Reg); + + // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the + // register from FPR32 to FPR64 or FPR32C to FPR64C if necessary. + if ((IsRegFPR32 && Kind == MCK_FPR64) || + (IsRegFPR32C && Kind == MCK_FPR64C)) { + Op.Reg.RegNum = convertFPR32ToFPR64(Reg); + return Match_Success; + } + return Match_InvalidOperand; +} + bool RISCVAsmParser::generateImmOutOfRangeError( OperandVector &Operands, uint64_t ErrorInfo, int Lower, int Upper, Twine Msg = "immediate must be an integer in the range") { @@ -388,11 +600,56 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } return Error(ErrorLoc, "invalid operand for instruction"); } + case Match_InvalidUImmLog2XLen: + if (isRV64()) + return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1); + return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1); + case Match_InvalidUImmLog2XLenNonZero: + if (isRV64()) + return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6) - 1); + return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5) - 1); case Match_InvalidUImm5: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1); + case Match_InvalidSImm6: + return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5), + (1 << 5) - 1); + case Match_InvalidUImm6NonZero: + return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6) - 1); + case Match_InvalidUImm7Lsb00: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 0, (1 << 7) - 4, + "immediate must be a multiple of 4 bytes in the range"); + case Match_InvalidUImm8Lsb00: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 0, (1 << 8) - 4, + "immediate must be a multiple of 4 bytes in the range"); + case Match_InvalidUImm8Lsb000: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 0, (1 << 8) - 8, + "immediate must be a multiple of 8 bytes in the range"); + case Match_InvalidSImm9Lsb0: + return generateImmOutOfRangeError( + Operands, ErrorInfo, -(1 << 8), (1 << 8) - 2, + "immediate must be a multiple of 2 bytes in the range"); + case Match_InvalidUImm9Lsb000: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 0, (1 << 9) - 8, + "immediate must be a multiple of 8 bytes in the range"); + case Match_InvalidUImm10Lsb00NonZero: + return generateImmOutOfRangeError( + Operands, ErrorInfo, 4, (1 << 10) - 4, + "immediate must be a multiple of 4 bytes in the range"); + case Match_InvalidSImm10Lsb0000: + return generateImmOutOfRangeError( + Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16, + "immediate must be a multiple of 16 bytes in the range"); case Match_InvalidSImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1); + case Match_InvalidSImm12Lsb0: + return generateImmOutOfRangeError( + Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2, + "immediate must be a multiple of 2 bytes in the range"); case Match_InvalidUImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1); case Match_InvalidSImm13Lsb0: @@ -411,6 +668,12 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ErrorLoc, "operand must be formed of letters selected in-order from 'iorw'"); } + case Match_InvalidFRMArg: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error( + ErrorLoc, + "operand must be a valid floating point rounding mode mnemonic"); + } } llvm_unreachable("Unknown match type detected!"); @@ -462,16 +725,16 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, } } if (HadParens) - Operands.push_back(RISCVOperand::createToken("(", FirstS)); + Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64())); SMLoc S = getLoc(); SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); getLexer().Lex(); - Operands.push_back(RISCVOperand::createReg(RegNo, S, E)); + Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64())); } if (HadParens) { getParser().Lex(); // Eat ')' - Operands.push_back(RISCVOperand::createToken(")", getLoc())); + Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64())); } return MatchOperand_Success; @@ -505,7 +768,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { return parseOperandWithModifier(Operands); } - Operands.push_back(RISCVOperand::createImm(Res, S, E)); + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } @@ -545,7 +808,7 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { } const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext()); - Operands.push_back(RISCVOperand::createImm(ModExpr, S, E)); + Operands.push_back(RISCVOperand::createImm(ModExpr, S, E, isRV64())); return MatchOperand_Success; } @@ -557,7 +820,7 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { } getParser().Lex(); // Eat '(' - Operands.push_back(RISCVOperand::createToken("(", getLoc())); + Operands.push_back(RISCVOperand::createToken("(", getLoc(), isRV64())); if (parseRegister(Operands) != MatchOperand_Success) { Error(getLoc(), "expected register"); @@ -570,7 +833,7 @@ RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) { } getParser().Lex(); // Eat ')' - Operands.push_back(RISCVOperand::createToken(")", getLoc())); + Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64())); return MatchOperand_Success; } @@ -600,7 +863,7 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { // First operand is token for instruction - Operands.push_back(RISCVOperand::createToken(Name, NameLoc)); + Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64())); // If there are no more operands, then finish if (getLexer().is(AsmToken::EndOfStatement)) diff --git a/lib/Target/RISCV/CMakeLists.txt b/lib/Target/RISCV/CMakeLists.txt index bac4d4c353d2..66b50f8728e1 100644 --- a/lib/Target/RISCV/CMakeLists.txt +++ b/lib/Target/RISCV/CMakeLists.txt @@ -6,7 +6,6 @@ tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM RISCVGenCallingConv.inc -gen-callingconv) tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 24c6c8db8a4c..563edc9e29d8 100644 --- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -69,21 +69,143 @@ static const unsigned GPRDecoderTable[] = { static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > sizeof(GPRDecoderTable)) - return MCDisassembler::Fail; - - // We must define our own mapping from RegNo to register identifier. - // Accessing index RegNo in the register class will work in the case that - // registers were added in ascending order, but not in general. - unsigned Reg = GPRDecoderTable[RegNo]; - Inst.addOperand(MCOperand::createReg(Reg)); - return MCDisassembler::Success; + if (RegNo > sizeof(GPRDecoderTable)) + return MCDisassembler::Fail; + + // We must define our own mapping from RegNo to register identifier. + // Accessing index RegNo in the register class will work in the case that + // registers were added in ascending order, but not in general. + unsigned Reg = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static const unsigned FPR32DecoderTable[] = { + RISCV::F0_32, RISCV::F1_32, RISCV::F2_32, RISCV::F3_32, + RISCV::F4_32, RISCV::F5_32, RISCV::F6_32, RISCV::F7_32, + RISCV::F8_32, RISCV::F9_32, RISCV::F10_32, RISCV::F11_32, + RISCV::F12_32, RISCV::F13_32, RISCV::F14_32, RISCV::F15_32, + RISCV::F16_32, RISCV::F17_32, RISCV::F18_32, RISCV::F19_32, + RISCV::F20_32, RISCV::F21_32, RISCV::F22_32, RISCV::F23_32, + RISCV::F24_32, RISCV::F25_32, RISCV::F26_32, RISCV::F27_32, + RISCV::F28_32, RISCV::F29_32, RISCV::F30_32, RISCV::F31_32 +}; + +static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > sizeof(FPR32DecoderTable)) + return MCDisassembler::Fail; + + // We must define our own mapping from RegNo to register identifier. + // Accessing index RegNo in the register class will work in the case that + // registers were added in ascending order, but not in general. + unsigned Reg = FPR32DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 8) { + return MCDisassembler::Fail; + } + unsigned Reg = FPR32DecoderTable[RegNo + 8]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static const unsigned FPR64DecoderTable[] = { + RISCV::F0_64, RISCV::F1_64, RISCV::F2_64, RISCV::F3_64, + RISCV::F4_64, RISCV::F5_64, RISCV::F6_64, RISCV::F7_64, + RISCV::F8_64, RISCV::F9_64, RISCV::F10_64, RISCV::F11_64, + RISCV::F12_64, RISCV::F13_64, RISCV::F14_64, RISCV::F15_64, + RISCV::F16_64, RISCV::F17_64, RISCV::F18_64, RISCV::F19_64, + RISCV::F20_64, RISCV::F21_64, RISCV::F22_64, RISCV::F23_64, + RISCV::F24_64, RISCV::F25_64, RISCV::F26_64, RISCV::F27_64, + RISCV::F28_64, RISCV::F29_64, RISCV::F30_64, RISCV::F31_64 +}; + +static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > sizeof(FPR64DecoderTable)) + return MCDisassembler::Fail; + + // We must define our own mapping from RegNo to register identifier. + // Accessing index RegNo in the register class will work in the case that + // registers were added in ascending order, but not in general. + unsigned Reg = FPR64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 8) { + return MCDisassembler::Fail; + } + unsigned Reg = FPR64DecoderTable[RegNo + 8]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo == 0) { + return MCDisassembler::Fail; + } + + return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo == 2) { + return MCDisassembler::Fail; + } + + return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 8) + return MCDisassembler::Fail; + + unsigned Reg = GPRDecoderTable[RegNo + 8]; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + +// Add implied SP operand for instructions *SP compressed instructions. The SP +// operand isn't explicitly encoded in the instruction. +static void addImplySP(MCInst &Inst, int64_t Address, const void *Decoder) { + if (Inst.getOpcode() == RISCV::C_LWSP || Inst.getOpcode() == RISCV::C_SWSP || + Inst.getOpcode() == RISCV::C_LDSP || Inst.getOpcode() == RISCV::C_SDSP || + Inst.getOpcode() == RISCV::C_FLWSP || + Inst.getOpcode() == RISCV::C_FSWSP || + Inst.getOpcode() == RISCV::C_FLDSP || + Inst.getOpcode() == RISCV::C_FSDSP || + Inst.getOpcode() == RISCV::C_ADDI4SPN) { + DecodeGPRRegisterClass(Inst, 2, Address, Decoder); + } + if (Inst.getOpcode() == RISCV::C_ADDI16SP) { + DecodeGPRRegisterClass(Inst, 2, Address, Decoder); + DecodeGPRRegisterClass(Inst, 2, Address, Decoder); + } } template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); + addImplySP(Inst, Address, Decoder); Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } @@ -92,6 +214,7 @@ template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); + addImplySP(Inst, Address, Decoder); // Sign-extend the number in the bottom N bits of Imm Inst.addOperand(MCOperand::createImm(SignExtend64(Imm))); return MCDisassembler::Success; @@ -116,19 +239,36 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint64_t Address, raw_ostream &OS, raw_ostream &CS) const { - // TODO: although assuming 4-byte instructions is sufficient for RV32 and - // RV64, this will need modification when supporting the compressed - // instruction set extension (RVC) which uses 16-bit instructions. Other - // instruction set extensions have the option of defining instructions up to - // 176 bits wide. - Size = 4; - if (Bytes.size() < 4) { - Size = 0; - return MCDisassembler::Fail; - } + // TODO: This will need modification when supporting instruction set + // extensions with instructions > 32-bits (up to 176 bits wide). + uint32_t Insn; + DecodeStatus Result; - // Get the four bytes of the instruction. - uint32_t Inst = support::endian::read32le(Bytes.data()); + // It's a 32 bit instruction if bit 0 and 1 are 1. + if ((Bytes[0] & 0x3) == 0x3) { + Insn = support::endian::read32le(Bytes.data()); + DEBUG(dbgs() << "Trying RISCV32 table :\n"); + Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI); + Size = 4; + } else { + Insn = support::endian::read16le(Bytes.data()); + + if (!STI.getFeatureBits()[RISCV::Feature64Bit]) { + DEBUG(dbgs() << "Trying RISCV32Only_16 table (16-bit Instruction):\n"); + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTableRISCV32Only_16, MI, Insn, Address, + this, STI); + if (Result != MCDisassembler::Fail) { + Size = 2; + return Result; + } + } + + DEBUG(dbgs() << "Trying RISCV_C table (16-bit Instruction):\n"); + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI); + Size = 2; + } - return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); + return Result; } diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp index 6bc4ea2cd0d9..f1fa2ecbcb22 100644 --- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp +++ b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.cpp @@ -16,7 +16,10 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; @@ -24,11 +27,19 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" // Include the auto-generated portion of the assembly writer. +#define PRINT_ALIAS_INSTR #include "RISCVGenAsmWriter.inc" +static cl::opt +NoAliases("riscv-no-aliases", + cl::desc("Disable the emission of assembler pseudo instructions"), + cl::init(false), + cl::Hidden); + void RISCVInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { - printInstruction(MI, O); + if (NoAliases || !printAliasInstr(MI, STI, O)) + printInstruction(MI, STI, O); printAnnotation(O, Annot); } @@ -37,6 +48,7 @@ void RISCVInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const { } void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier) { assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); const MCOperand &MO = MI->getOperand(OpNo); @@ -56,6 +68,7 @@ void RISCVInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { unsigned FenceArg = MI->getOperand(OpNo).getImm(); if ((FenceArg & RISCVFenceField::I) != 0) @@ -67,3 +80,11 @@ void RISCVInstPrinter::printFenceArg(const MCInst *MI, unsigned OpNo, if ((FenceArg & RISCVFenceField::W) != 0) O << 'w'; } + +void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + auto FRMArg = + static_cast(MI->getOperand(OpNo).getImm()); + O << RISCVFPRndMode::roundingModeToString(FRMArg); +} diff --git a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h index 3bb4fa37f15f..241be8daf113 100644 --- a/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h +++ b/lib/Target/RISCV/InstPrinter/RISCVInstPrinter.h @@ -30,12 +30,21 @@ class RISCVInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI) override; void printRegName(raw_ostream &O, unsigned RegNo) const override; - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = nullptr); - void printFenceArg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier = nullptr); + void printFenceArg(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); + void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo, unsigned AltIdx = RISCV::ABIRegAltName); }; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index e4e17bed5af4..3dcd36f1b71b 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -27,12 +27,13 @@ using namespace llvm; namespace { class RISCVAsmBackend : public MCAsmBackend { + const MCSubtargetInfo &STI; uint8_t OSABI; bool Is64Bit; public: - RISCVAsmBackend(uint8_t OSABI, bool Is64Bit) - : MCAsmBackend(), OSABI(OSABI), Is64Bit(Is64Bit) {} + RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit) + : MCAsmBackend(), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit) {} ~RISCVAsmBackend() override {} void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, @@ -63,7 +64,9 @@ class RISCVAsmBackend : public MCAsmBackend { { "fixup_riscv_lo12_s", 0, 32, 0 }, { "fixup_riscv_pcrel_hi20", 12, 20, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_riscv_jal", 12, 20, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_riscv_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_riscv_rvc_jump", 2, 11, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_riscv_rvc_branch", 0, 16, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -86,15 +89,24 @@ class RISCVAsmBackend : public MCAsmBackend { }; bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Once support for the compressed instruction set is added, we will be able - // to conditionally support 16-bit NOPs - if ((Count % 4) != 0) + bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC]; + unsigned MinNopLen = HasStdExtC ? 2 : 4; + + if ((Count % MinNopLen) != 0) return false; - // The canonical nop on RISC-V is addi x0, x0, 0 - for (uint64_t i = 0; i < Count; i += 4) + // The canonical nop on RISC-V is addi x0, x0, 0. + uint64_t Nop32Count = Count / 4; + for (uint64_t i = Nop32Count; i != 0; --i) OW->write32(0x13); + // The canonical nop on RVC is c.nop. + if (HasStdExtC) { + uint64_t Nop16Count = (Count - Nop32Count * 4) / 2; + for (uint64_t i = Nop16Count; i != 0; --i) + OW->write16(0x01); + } + return true; } @@ -152,7 +164,42 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value = (Sbit << 31) | (Mid6 << 25) | (Lo4 << 8) | (Hi1 << 7); return Value; } + case RISCV::fixup_riscv_rvc_jump: { + // Need to produce offset[11|4|9:8|10|6|7|3:1|5] from the 11-bit Value. + unsigned Bit11 = (Value >> 11) & 0x1; + unsigned Bit4 = (Value >> 4) & 0x1; + unsigned Bit9_8 = (Value >> 8) & 0x3; + unsigned Bit10 = (Value >> 10) & 0x1; + unsigned Bit6 = (Value >> 6) & 0x1; + unsigned Bit7 = (Value >> 7) & 0x1; + unsigned Bit3_1 = (Value >> 1) & 0x7; + unsigned Bit5 = (Value >> 5) & 0x1; + Value = (Bit11 << 10) | (Bit4 << 9) | (Bit9_8 << 7) | (Bit10 << 6) | + (Bit6 << 5) | (Bit7 << 4) | (Bit3_1 << 1) | Bit5; + return Value; + } + case RISCV::fixup_riscv_rvc_branch: { + // Need to produce offset[8|4:3], [reg 3 bit], offset[7:6|2:1|5] + unsigned Bit8 = (Value >> 8) & 0x1; + unsigned Bit7_6 = (Value >> 6) & 0x3; + unsigned Bit5 = (Value >> 5) & 0x1; + unsigned Bit4_3 = (Value >> 3) & 0x3; + unsigned Bit2_1 = (Value >> 1) & 0x3; + Value = (Bit8 << 12) | (Bit4_3 << 10) | (Bit7_6 << 5) | (Bit2_1 << 3) | + (Bit5 << 2); + return Value; + } + + } +} +static unsigned getSize(unsigned Kind) { + switch (Kind) { + default: + return 4; + case RISCV::fixup_riscv_rvc_jump: + case RISCV::fixup_riscv_rvc_branch: + return 2; } } @@ -171,6 +218,7 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); + unsigned FullSize = getSize(Fixup.getKind()); #ifndef NDEBUG unsigned NumBytes = (Info.TargetSize + 7) / 8; @@ -179,7 +227,7 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. - for (unsigned i = 0; i != 4; ++i) { + for (unsigned i = 0; i != FullSize; ++i) { Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } } @@ -192,9 +240,10 @@ RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { } // end anonymous namespace MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - return new RISCVAsmBackend(OSABI, TT.isArch64Bit()); + return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit()); } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 9fafbb0a95ac..b278a2ed3903 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -15,6 +15,8 @@ #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H #include "RISCVMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" namespace llvm { @@ -24,14 +26,23 @@ namespace RISCVII { enum { InstFormatPseudo = 0, InstFormatR = 1, - InstFormatI = 2, - InstFormatS = 3, - InstFormatB = 4, - InstFormatU = 5, - InstFormatJ = 6, - InstFormatOther = 7, - - InstFormatMask = 15 + InstFormatR4 = 2, + InstFormatI = 3, + InstFormatS = 4, + InstFormatB = 5, + InstFormatU = 6, + InstFormatJ = 7, + InstFormatCR = 8, + InstFormatCI = 9, + InstFormatCSS = 10, + InstFormatCIW = 11, + InstFormatCL = 12, + InstFormatCS = 13, + InstFormatCB = 14, + InstFormatCJ = 15, + InstFormatOther = 16, + + InstFormatMask = 31 }; enum { @@ -51,6 +62,49 @@ enum FenceField { W = 1 }; } + +// Describes the supported floating point rounding mode encodings. +namespace RISCVFPRndMode { +enum RoundingMode { + RNE = 0, + RTZ = 1, + RDN = 2, + RUP = 3, + RMM = 4, + DYN = 7, + Invalid +}; + +inline static StringRef roundingModeToString(RoundingMode RndMode) { + switch (RndMode) { + default: + llvm_unreachable("Unknown floating point rounding mode"); + case RISCVFPRndMode::RNE: + return "rne"; + case RISCVFPRndMode::RTZ: + return "rtz"; + case RISCVFPRndMode::RDN: + return "rdn"; + case RISCVFPRndMode::RUP: + return "rup"; + case RISCVFPRndMode::RMM: + return "rmm"; + case RISCVFPRndMode::DYN: + return "dyn"; + } +} + +inline static RoundingMode stringToRoundingMode(StringRef Str) { + return StringSwitch(Str) + .Case("rne", RISCVFPRndMode::RNE) + .Case("rtz", RISCVFPRndMode::RTZ) + .Case("rdn", RISCVFPRndMode::RDN) + .Case("rup", RISCVFPRndMode::RUP) + .Case("rmm", RISCVFPRndMode::RMM) + .Case("dyn", RISCVFPRndMode::DYN) + .Default(RISCVFPRndMode::Invalid); +} +} // namespace RISCVFPRndMode } // namespace llvm #endif diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index e256156dc962..ad53228c104a 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -59,6 +59,10 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_JAL; case RISCV::fixup_riscv_branch: return ELF::R_RISCV_BRANCH; + case RISCV::fixup_riscv_rvc_jump: + return ELF::R_RISCV_RVC_JUMP; + case RISCV::fixup_riscv_rvc_branch: + return ELF::R_RISCV_RVC_BRANCH; } } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h index 115229414d5e..cfb5d99e79f5 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h @@ -35,6 +35,12 @@ enum Fixups { // fixup_riscv_branch - 12-bit fixup for symbol references in the branch // instructions fixup_riscv_branch, + // fixup_riscv_rvc_jump - 11-bit fixup for symbol references in the + // compressed jump instruction + fixup_riscv_rvc_jump, + // fixup_riscv_rvc_branch - 8-bit fixup for symbol references in the + // compressed branch instruction + fixup_riscv_rvc_branch, // fixup_riscv_invalid - used as a sentinel and a marker, must be last fixup fixup_riscv_invalid, diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index f94c37aae8f4..641997e67e06 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -83,9 +83,25 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII, void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - // For now, we only support RISC-V instructions with 32-bit length - uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); - support::endian::Writer(OS).write(Bits); + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + // Get byte count of instruction. + unsigned Size = Desc.getSize(); + + switch (Size) { + default: + llvm_unreachable("Unhandled encodeInstruction length!"); + case 2: { + uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + support::endian::Writer(OS).write(Bits); + break; + } + case 4: { + uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI); + support::endian::Writer(OS).write(Bits); + break; + } + } + ++MCNumEmitted; // Keep track of the # of mi's emitted. } @@ -161,6 +177,10 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, FixupKind = RISCV::fixup_riscv_jal; } else if (MIFrm == RISCVII::InstFormatB) { FixupKind = RISCV::fixup_riscv_branch; + } else if (MIFrm == RISCVII::InstFormatCJ) { + FixupKind = RISCV::fixup_riscv_rvc_jump; + } else if (MIFrm == RISCVII::InstFormatCB) { + FixupKind = RISCV::fixup_riscv_rvc_branch; } } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index bea2f8800fa6..ef58a6b8cbca 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -40,8 +40,8 @@ MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td index 63d2b827014b..4caaaa43c10b 100644 --- a/lib/Target/RISCV/RISCV.td +++ b/lib/Target/RISCV/RISCV.td @@ -25,8 +25,32 @@ def FeatureStdExtA def HasStdExtA : Predicate<"Subtarget->hasStdExtA()">, AssemblerPredicate<"FeatureStdExtA">; +def FeatureStdExtF + : SubtargetFeature<"f", "HasStdExtF", "true", + "'F' (Single-Precision Floating-Point)">; +def HasStdExtF : Predicate<"Subtarget->hasStdExtF()">, + AssemblerPredicate<"FeatureStdExtF">; + +def FeatureStdExtD + : SubtargetFeature<"d", "HasStdExtD", "true", + "'D' (Double-Precision Floating-Point)", + [FeatureStdExtF]>; +def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">, + AssemblerPredicate<"FeatureStdExtD">; + +def FeatureStdExtC + : SubtargetFeature<"c", "HasStdExtC", "true", + "'C' (Compressed Instructions)">; +def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">, + AssemblerPredicate<"FeatureStdExtC">; + + def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; +def IsRV64 : Predicate<"Subtarget->is64Bit()">, + AssemblerPredicate<"Feature64Bit">; +def IsRV32 : Predicate<"!Subtarget->is64Bit()">, + AssemblerPredicate<"!Feature64Bit">; def RV64 : HwMode<"+64bit">; def RV32 : HwMode<"-64bit">; @@ -57,9 +81,15 @@ def RISCVInstrInfo : InstrInfo { def RISCVAsmParser : AsmParser { let ShouldEmitMatchRegisterAltName = 1; + let AllowDuplicateRegisterNames = 1; +} + +def RISCVAsmWriter : AsmWriter { + int PassSubtarget = 1; } def RISCV : Target { let InstructionSet = RISCVInstrInfo; let AssemblyParsers = [RISCVAsmParser]; + let AssemblyWriters = [RISCVAsmWriter]; } diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp index 4808e6c73c50..bbaa8ec454fa 100644 --- a/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -41,6 +41,13 @@ class RISCVAsmPrinter : public AsmPrinter { void EmitInstruction(const MachineInstr *MI) override; + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS) override; + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); @@ -65,6 +72,54 @@ void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } +bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS)) + return false; + + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + OS << MO.getImm(); + return false; + case MachineOperand::MO_Register: + OS << RISCVInstPrinter::getRegisterName(MO.getReg()); + return false; + default: + break; + } + } + + return true; +} + +bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + if (AsmVariant != 0) + report_fatal_error("There are no defined alternate asm variants"); + + if (!ExtraCode) { + const MachineOperand &MO = MI->getOperand(OpNo); + // For now, we only support register memory operands in registers and + // assume there is no addend + if (!MO.isReg()) + return true; + + OS << "0(" << RISCVInstPrinter::getRegisterName(MO.getReg()) << ")"; + return false; + } + + return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS); +} + // Force static initialization. extern "C" void LLVMInitializeRISCVAsmPrinter() { RegisterAsmPrinter X(getTheRISCV32Target()); diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td index 0b7a523424c5..d2b17c64c9c2 100644 --- a/lib/Target/RISCV/RISCVCallingConv.td +++ b/lib/Target/RISCV/RISCVCallingConv.td @@ -11,20 +11,8 @@ // //===----------------------------------------------------------------------===// -// RISCV 32-bit C return-value convention. -def RetCC_RISCV32 : CallingConv<[CCIfType<[i32], CCAssignToReg<[X10, X11]>>]>; - -// RISCV 32-bit C Calling convention. -def CC_RISCV32 : CallingConv<[ - // Promote i8/i16 args to i32 - CCIfType<[ i8, i16 ], CCPromoteToType>, - - // All arguments get passed in integer registers if there is space. - CCIfType<[i32], CCAssignToReg<[ X10, X11, X12, X13, X14, X15, X16, X17]>>, - - // Could be assigned to the stack in 8-byte aligned units, but unsupported - CCAssignToStack<8, 8> -]>; +// The RISC-V calling convention is handled with custom code in +// RISCVISelLowering.cpp (CC_RISCV). def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp index fd3b258e26cc..33703f5ec205 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -12,18 +12,237 @@ //===----------------------------------------------------------------------===// #include "RISCVFrameLowering.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" using namespace llvm; -bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { return true; } +bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || + MFI.isFrameAddressTaken(); +} + +// Determines the size of the frame and maximum call frame size. +void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t FrameSize = MFI.getStackSize(); + + // Get the alignment. + uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() + : getStackAlignment(); + + // Get the maximum call frame size of all the calls. + uint64_t MaxCallFrameSize = MFI.getMaxCallFrameSize(); + + // If we have dynamic alloca then MaxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI.hasVarSizedObjects()) + MaxCallFrameSize = alignTo(MaxCallFrameSize, StackAlign); + + // Update maximum call frame size. + MFI.setMaxCallFrameSize(MaxCallFrameSize); + + // Include call frame size in total. + if (!(hasReservedCallFrame(MF) && MFI.adjustsStack())) + FrameSize += MaxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = alignTo(FrameSize, StackAlign); + + // Update frame info. + MFI.setStackSize(FrameSize); +} + +void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, int64_t Val, + MachineInstr::MIFlag Flag) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const RISCVInstrInfo *TII = STI.getInstrInfo(); + + if (DestReg == SrcReg && Val == 0) + return; + + if (isInt<12>(Val)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DestReg) + .addReg(SrcReg) + .addImm(Val) + .setMIFlag(Flag); + } else if (isInt<32>(Val)) { + unsigned Opc = RISCV::ADD; + bool isSub = Val < 0; + if (isSub) { + Val = -Val; + Opc = RISCV::SUB; + } + + unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm32(MBB, MBBI, DL, ScratchReg, Val, Flag); + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); + } else { + report_fatal_error("adjustReg cannot yet handle adjustments >32 bits"); + } +} + +// Returns the register used to hold the frame pointer. +static unsigned getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } + +// Returns the register used to hold the stack pointer. +static unsigned getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } void RISCVFrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const {} + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + + unsigned FPReg = getFPReg(STI); + unsigned SPReg = getSPReg(STI); + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // Determine the correct frame layout + determineFrameLayout(MF); + + // FIXME (note copied from Lanai): This appears to be overallocating. Needs + // investigation. Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Early exit if there is no need to allocate on the stack + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + // Allocate space on the stack if necessary. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + // FIXME: assumes exactly one instruction is used to save each callee-saved + // register. + const std::vector &CSI = MFI.getCalleeSavedInfo(); + std::advance(MBBI, CSI.size()); + + // Generate new FP. + if (hasFP(MF)) + adjustReg(MBB, MBBI, DL, FPReg, SPReg, + StackSize - RVFI->getVarArgsSaveSize(), MachineInstr::FrameSetup); +} void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const {} + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *RVFI = MF.getInfo(); + DebugLoc DL = MBBI->getDebugLoc(); + unsigned FPReg = getFPReg(STI); + unsigned SPReg = getSPReg(STI); + + // Skip to before the restores of callee-saved registers + // FIXME: assumes exactly one instruction is used to restore each + // callee-saved register. + MachineBasicBlock::iterator LastFrameDestroy = MBBI; + std::advance(LastFrameDestroy, -MFI.getCalleeSavedInfo().size()); + + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer using the value of the frame pointer. Only + // necessary if the stack pointer was modified, meaning the stack size is + // unknown. + if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) { + assert(hasFP(MF) && "frame pointer should not have been eliminated"); + adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, + -StackSize + RVFI->getVarArgsSaveSize(), + MachineInstr::FrameDestroy); + } + + // Deallocate stack + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); +} + +int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const auto *RVFI = MF.getInfo(); + + // Callee-saved registers should be referenced relative to the stack + // pointer (positive offset), otherwise use the frame pointer (negative + // offset). + const std::vector &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment(); + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + if (FI >= MinCSFI && FI <= MaxCSFI) { + FrameReg = RISCV::X2; + Offset += MF.getFrameInfo().getStackSize(); + } else { + FrameReg = RI->getFrameRegister(MF); + if (hasFP(MF)) + Offset += RVFI->getVarArgsSaveSize(); + else + Offset += MF.getFrameInfo().getStackSize(); + } + return Offset; +} + +void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + // Unconditionally spill RA and FP only if the function uses a frame + // pointer. + if (hasFP(MF)) { + SavedRegs.set(RISCV::X1); + SavedRegs.set(RISCV::X8); + } +} + +void RISCVFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &RISCV::GPRRegClass; + // estimateStackSize has been observed to under-estimate the final stack + // size, so give ourselves wiggle-room by checking for stack size + // representable an 11-bit signed field rather than 12-bits. + // FIXME: It may be possible to craft a function with a small stack that + // still needs an emergency spill slot for branch relaxation. This case + // would currently be missed. + if (!isInt<11>(MFI.estimateStackSize(MF))) { + int RegScavFI = MFI.CreateStackObject( + RegInfo->getSpillSize(*RC), RegInfo->getSpillAlignment(*RC), false); + RS->addScavengingFrameIndex(RegScavFI); + } +} diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h index 0b2c7a402982..ccf7e247b556 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.h +++ b/lib/Target/RISCV/RISCVFrameLowering.h @@ -24,11 +24,21 @@ class RISCVFrameLowering : public TargetFrameLowering { explicit RISCVFrameLowering(const RISCVSubtarget &STI) : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16, - /*LocalAreaOffset=*/0) {} + /*LocalAreaOffset=*/0), + STI(STI) {} void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + bool hasFP(const MachineFunction &MF) const override; MachineBasicBlock::iterator @@ -36,6 +46,15 @@ class RISCVFrameLowering : public TargetFrameLowering { MachineBasicBlock::iterator MI) const override { return MBB.erase(MI); } + +protected: + const RISCVSubtarget &STI; + +private: + void determineFrameLayout(MachineFunction &MF) const; + void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + int64_t Val, MachineInstr::MIFlag Flag) const; }; } #endif diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 5b038df4ae31..23a0382d4427 100644 --- a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "RISCVTargetMachine.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -43,6 +44,11 @@ class RISCVDAGToDAGISel final : public SelectionDAGISel { void Select(SDNode *Node) override; + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + + bool SelectAddrFI(SDValue Addr, SDValue &Base); + // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" }; @@ -76,11 +82,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } } + if (Opcode == ISD::FrameIndex) { + SDLoc DL(Node); + SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); + int FI = dyn_cast(Node)->getIndex(); + EVT VT = Node->getValueType(0); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); + return; + } // Select the default instruction. SelectCode(Node); } +bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + switch (ConstraintID) { + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_m: + // We just support simple memory operands that have a single address + // operand and need no special handling. + OutOps.push_back(Op); + return false; + default: + break; + } + + return true; +} + +bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { + if (auto FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); + return true; + } + return false; +} + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 4801884e242b..40a9683e9633 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14,6 +14,7 @@ #include "RISCVISelLowering.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" @@ -53,11 +54,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setLoadExtAction(N, XLenVT, MVT::i1, Promote); // TODO: add all necessary setOperationAction calls. + setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, XLenVT, Expand); setOperationAction(ISD::SELECT, XLenVT, Custom); setOperationAction(ISD::SELECT_CC, XLenVT, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); @@ -66,18 +77,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SUBC, XLenVT, Expand); setOperationAction(ISD::SUBE, XLenVT, Expand); - setOperationAction(ISD::SREM, XLenVT, Expand); + if (!Subtarget.hasStdExtM()) { + setOperationAction(ISD::MUL, XLenVT, Expand); + setOperationAction(ISD::MULHS, XLenVT, Expand); + setOperationAction(ISD::MULHU, XLenVT, Expand); + setOperationAction(ISD::SDIV, XLenVT, Expand); + setOperationAction(ISD::UDIV, XLenVT, Expand); + setOperationAction(ISD::SREM, XLenVT, Expand); + setOperationAction(ISD::UREM, XLenVT, Expand); + } + setOperationAction(ISD::SDIVREM, XLenVT, Expand); - setOperationAction(ISD::SDIV, XLenVT, Expand); - setOperationAction(ISD::UREM, XLenVT, Expand); setOperationAction(ISD::UDIVREM, XLenVT, Expand); - setOperationAction(ISD::UDIV, XLenVT, Expand); - - setOperationAction(ISD::MUL, XLenVT, Expand); setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); - setOperationAction(ISD::MULHS, XLenVT, Expand); - setOperationAction(ISD::MULHU, XLenVT, Expand); setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); @@ -153,6 +166,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerBlockAddress(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); } } @@ -256,6 +275,74 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } +SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = MF.getInfo(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + unsigned FrameReg = RI.getFrameRegister(MF); + int XLenInBytes = Subtarget.getXLen() / 8; + + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + while (Depth--) { + int Offset = -(XLenInBytes * 2); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, + DAG.getIntPtrConstant(Offset, DL)); + FrameAddr = + DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); + } + return FrameAddr; +} + +SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + MVT XLenVT = Subtarget.getXLenVT(); + int XLenInBytes = Subtarget.getXLen() / 8; + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + if (Depth) { + int Off = -XLenInBytes; + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(Off, DL, VT); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), + MachinePointerInfo()); + } + + // Return the value of the return address register, marking it an implicit + // live-in. + unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -323,7 +410,266 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } // Calling Convention Implementation. -#include "RISCVGenCallingConv.inc" +// The expectations for frontend ABI lowering vary from target to target. +// Ideally, an LLVM frontend would be able to avoid worrying about many ABI +// details, but this is a longer term goal. For now, we simply try to keep the +// role of the frontend as simple and well-defined as possible. The rules can +// be summarised as: +// * Never split up large scalar arguments. We handle them here. +// * If a hardfloat calling convention is being used, and the struct may be +// passed in a pair of registers (fp+fp, int+fp), and both registers are +// available, then pass as two separate arguments. If either the GPRs or FPRs +// are exhausted, then pass according to the rule below. +// * If a struct could never be passed in registers or directly in a stack +// slot (as it is larger than 2*XLEN and the floating point rules don't +// apply), then pass it using a pointer with the byval attribute. +// * If a struct is less than 2*XLEN, then coerce to either a two-element +// word-sized array or a 2*XLEN scalar (depending on alignment). +// * The frontend can determine whether a struct is returned by reference or +// not based on its size and fields. If it will be returned by reference, the +// frontend must modify the prototype so a pointer with the sret annotation is +// passed as the first argument. This is not necessary for large scalar +// returns. +// * Struct return values and varargs should be coerced to structs containing +// register-size fields in the same situations they would be for fixed +// arguments. + +static const MCPhysReg ArgGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, + RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 +}; + +// Pass a 2*XLEN argument that has been split into two XLEN values through +// registers or the stack as necessary. +static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, + ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, + MVT ValVT2, MVT LocVT2, + ISD::ArgFlagsTy ArgFlags2) { + unsigned XLenInBytes = XLen / 8; + if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + // At least one half can be passed via register. + State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, + VA1.getLocVT(), CCValAssign::Full)); + } else { + // Both halves must be passed on the stack, with proper alignment. + unsigned StackAlign = std::max(XLenInBytes, ArgFlags1.getOrigAlign()); + State.addLoc( + CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), + State.AllocateStack(XLenInBytes, StackAlign), + VA1.getLocVT(), CCValAssign::Full)); + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, + CCValAssign::Full)); + return false; + } + + if (unsigned Reg = State.AllocateReg(ArgGPRs)) { + // The second half can also be passed via register. + State.addLoc( + CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full)); + } else { + // The second half is passed via the stack, without additional alignment. + State.addLoc(CCValAssign::getMem( + ValNo2, ValVT2, State.AllocateStack(XLenInBytes, XLenInBytes), LocVT2, + CCValAssign::Full)); + } + + return false; +} + +// Implements the RISC-V calling convention. Returns true upon failure. +static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { + unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); + assert(XLen == 32 || XLen == 64); + MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; + assert(ValVT == XLenVT && "Unexpected ValVT"); + assert(LocVT == XLenVT && "Unexpected LocVT"); + + // Any return value split in to more than two values can't be returned + // directly. + if (IsRet && ValNo > 1) + return true; + + // If this is a variadic argument, the RISC-V calling convention requires + // that it is assigned an 'even' or 'aligned' register if it has 8-byte + // alignment (RV32) or 16-byte alignment (RV64). An aligned register should + // be used regardless of whether the original argument was split during + // legalisation or not. The argument will not be passed by registers if the + // original type is larger than 2*XLEN, so the register alignment rule does + // not apply. + unsigned TwoXLenInBytes = (2 * XLen) / 8; + if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + // Skip 'odd' register if necessary. + if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + State.AllocateReg(ArgGPRs); + } + + SmallVectorImpl &PendingLocs = State.getPendingLocs(); + SmallVectorImpl &PendingArgFlags = + State.getPendingArgFlags(); + + assert(PendingLocs.size() == PendingArgFlags.size() && + "PendingLocs and PendingArgFlags out of sync"); + + // Split arguments might be passed indirectly, so keep track of the pending + // values. + if (ArgFlags.isSplit() || !PendingLocs.empty()) { + LocVT = XLenVT; + LocInfo = CCValAssign::Indirect; + PendingLocs.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + PendingArgFlags.push_back(ArgFlags); + if (!ArgFlags.isSplitEnd()) { + return false; + } + } + + // If the split argument only had two elements, it should be passed directly + // in registers or on the stack. + if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) { + assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()"); + // Apply the normal calling convention rules to the first half of the + // split argument. + CCValAssign VA = PendingLocs[0]; + ISD::ArgFlagsTy AF = PendingArgFlags[0]; + PendingLocs.clear(); + PendingArgFlags.clear(); + return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, + ArgFlags); + } + + // Allocate to a register if possible, or else a stack slot. + unsigned Reg = State.AllocateReg(ArgGPRs); + unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); + + // If we reach this point and PendingLocs is non-empty, we must be at the + // end of a split argument that must be passed indirectly. + if (!PendingLocs.empty()) { + assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()"); + assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()"); + + for (auto &It : PendingLocs) { + if (Reg) + It.convertToReg(Reg); + else + It.convertToMem(StackOffset); + State.addLoc(It); + } + PendingLocs.clear(); + PendingArgFlags.clear(); + return false; + } + + assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); + + if (Reg) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + } else { + State.addLoc( + CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + } + return false; +} + +void RISCVTargetLowering::analyzeInputArgs( + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, bool IsRet) const { + unsigned NumArgs = Ins.size(); + FunctionType *FType = MF.getFunction().getFunctionType(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + + Type *ArgTy = nullptr; + if (IsRet) + ArgTy = FType->getReturnType(); + else if (Ins[i].isOrigArg()) + ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); + + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { + DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'); + llvm_unreachable(nullptr); + } + } +} + +void RISCVTargetLowering::analyzeOutputArgs( + MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Outs, bool IsRet, + CallLoweringInfo *CLI) const { + unsigned NumArgs = Outs.size(); + + for (unsigned i = 0; i != NumArgs; i++) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; + + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { + DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << "\n"); + llvm_unreachable(nullptr); + } + } +} + +// The caller is responsible for loading the full value if the argument is +// passed with CCValAssign::Indirect. +static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + EVT LocVT = VA.getLocVT(); + SDValue Val; + + unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + return Val; + } +} + +// The caller is responsible for loading the full value if the argument is +// passed with CCValAssign::Indirect. +static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + EVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)); + int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, + VA.getLocMemOffset(), /*Immutable=*/true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + SDValue Val; + + ISD::LoadExtType ExtType; + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + case CCValAssign::Indirect: + ExtType = ISD::NON_EXTLOAD; + break; + } + Val = DAG.getExtLoad( + ExtType, DL, LocVT, Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT); + return Val; +} // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( @@ -340,35 +686,111 @@ SDValue RISCVTargetLowering::LowerFormalArguments( } MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); - - if (IsVarArg) - report_fatal_error("VarArg not supported"); + unsigned XLenInBytes = Subtarget.getXLen() / 8; + // Used with vargs to acumulate store chains. + std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV32); - - for (auto &VA : ArgLocs) { - if (!VA.isRegLoc()) - report_fatal_error("Defined with too many args"); - - // Arguments passed in registers. - EVT RegVT = VA.getLocVT(); - if (RegVT != XLenVT) { - DEBUG(dbgs() << "LowerFormalArguments Unhandled argument type: " - << RegVT.getEVTString() << "\n"); - report_fatal_error("unhandled argument type"); + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + assert(VA.getLocVT() == XLenVT && "Unhandled argument type"); + SDValue ArgValue; + if (VA.isRegLoc()) + ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); + else + ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); + + if (VA.getLocInfo() == CCValAssign::Indirect) { + // If the original argument was split and passed by reference (e.g. i128 + // on RV32), we need to load all parts of it here (using the same + // address). + InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, + MachinePointerInfo())); + unsigned ArgIndex = Ins[i].OrigArgIndex; + assert(Ins[i].PartOffset == 0); + while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) { + CCValAssign &PartVA = ArgLocs[i + 1]; + unsigned PartOffset = Ins[i + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + ++i; + } + continue; + } + InVals.push_back(ArgValue); + } + + if (IsVarArg) { + ArrayRef ArgRegs = makeArrayRef(ArgGPRs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &RISCV::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + RISCVMachineFunctionInfo *RVFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getNextStackOffset(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + RVFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*XLEN-aligned. + if (Idx % 2) { + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, + true); + VarArgsSaveSize += XLenInBytes; } - const unsigned VReg = - RegInfo.createVirtualRegister(&RISCV::GPRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - SDValue ArgIn = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); - InVals.push_back(ArgIn); + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += XLenInBytes) { + const unsigned Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + cast(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + RVFI->setVarArgsSaveSize(VarArgsSaveSize); } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens for vararg functions. + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + return Chain; } @@ -387,54 +809,112 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); - - if (IsVarArg) { - report_fatal_error("LowerCall with varargs not implemented"); - } + MVT XLenVT = Subtarget.getXLenVT(); MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV32); + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); - for (auto &Arg : Outs) { - if (!Arg.Flags.isByVal()) + // Create local copies for byval args + SmallVector ByValArgs; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (!Flags.isByVal()) continue; - report_fatal_error("Passing arguments byval not yet implemented"); + + SDValue Arg = OutVals[i]; + unsigned Size = Flags.getByValSize(); + unsigned Align = Flags.getByValAlign(); + + int FI = MF.getFrameInfo().CreateStackObject(Size, Align, /*isSS=*/false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT); + + Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, + /*IsVolatile=*/false, + /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), + MachinePointerInfo()); + ByValArgs.push_back(FIPtr); } Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. SmallVector, 8> RegsToPass; + SmallVector MemOpChains; SDValue StackPtr; - for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { - CCValAssign &VA = ArgLocs[I]; - SDValue ArgValue = OutVals[I]; + for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // Promote the value if needed. - // For now, only handle fully promoted arguments. + // For now, only handle fully promoted and indirect arguments. switch (VA.getLocInfo()) { case CCValAssign::Full: break; + case CCValAssign::Indirect: { + // Store the argument in a stack slot and pass its address. + SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + // If the original argument was split (e.g. i128), we need + // to store all parts of it here (and pass just one address). + unsigned ArgIndex = Outs[i].OrigArgIndex; + assert(Outs[i].PartOffset == 0); + while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { + SDValue PartValue = OutVals[i + 1]; + unsigned PartOffset = Outs[i + 1].PartOffset; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, + DAG.getIntPtrConstant(PartOffset, DL)); + MemOpChains.push_back( + DAG.getStore(Chain, DL, PartValue, Address, + MachinePointerInfo::getFixedStack(MF, FI))); + ++i; + } + ArgValue = SpillSlot; + break; + } default: llvm_unreachable("Unknown loc info!"); } + // Use local copy if it is a byval arg. + if (Flags.isByVal()) + ArgValue = ByValArgs[j++]; + if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); - report_fatal_error("Passing arguments via the stack not yet implemented"); + + // Work out the address of the stack slot. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); + SDValue Address = + DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); + + // Emit the store. + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } + // Join the stores, which are independent of one another. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + SDValue Glue; // Build a sequence of copy-to-reg nodes, chained and glued together. @@ -484,7 +964,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - RetCCInfo.AnalyzeCallResult(Ins, RetCC_RISCV32); + analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true); // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { @@ -494,22 +974,34 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); - InVals.push_back(Chain.getValue(0)); + assert(VA.getLocInfo() == CCValAssign::Full && "Unknown loc info!"); + InVals.push_back(RetValue); } return Chain; } +bool RISCVTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, + CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) + return false; + } + return true; +} + SDValue RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - if (IsVarArg) { - report_fatal_error("VarArg not supported"); - } - // Stores the assignment of the return value to a location. SmallVector RVLocs; @@ -517,17 +1009,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC_RISCV32); + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, + nullptr); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { + SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); + assert(VA.getLocInfo() == CCValAssign::Full && + "Unexpected CCValAssign::LocInfo"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); // Guarantee that all emitted copies are stuck together. Flag = Chain.getValue(1); @@ -557,3 +1053,21 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { } return nullptr; } + +std::pair +RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + // First, see if this is a constraint that directly corresponds to a + // RISCV register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &RISCV::GPRRegClass); + default: + break; + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index 933bc6218d58..8ee00cd69a1c 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -43,17 +43,31 @@ class RISCVTargetLowering : public TargetLowering { // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; private: + void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Ins, + bool IsRet) const; + void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, + const SmallVectorImpl &Outs, + bool IsRet, CallLoweringInfo *CLI) const; // Lower incoming arguments, copy physregs into vregs SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, @@ -68,6 +82,9 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td index 3dca957e31fa..7479ffbc9532 100644 --- a/lib/Target/RISCV/RISCVInstrFormats.td +++ b/lib/Target/RISCV/RISCVInstrFormats.td @@ -28,17 +28,26 @@ // Format specifies the encoding used by the instruction. This is used by // RISCVMCCodeEmitter to determine which form of fixup to use. These // definitions must be kept in-sync with RISCVBaseInfo.h. -class InstFormat val> { - bits<4> Value = val; +class InstFormat val> { + bits<5> Value = val; } def InstFormatPseudo : InstFormat<0>; def InstFormatR : InstFormat<1>; -def InstFormatI : InstFormat<2>; -def InstFormatS : InstFormat<3>; -def InstFormatB : InstFormat<4>; -def InstFormatU : InstFormat<5>; -def InstFormatJ : InstFormat<6>; -def InstFormatOther : InstFormat<7>; +def InstFormatR4 : InstFormat<2>; +def InstFormatI : InstFormat<3>; +def InstFormatS : InstFormat<4>; +def InstFormatB : InstFormat<5>; +def InstFormatU : InstFormat<6>; +def InstFormatJ : InstFormat<7>; +def InstFormatCR : InstFormat<8>; +def InstFormatCI : InstFormat<9>; +def InstFormatCSS : InstFormat<10>; +def InstFormatCIW : InstFormat<11>; +def InstFormatCL : InstFormat<12>; +def InstFormatCS : InstFormat<13>; +def InstFormatCB : InstFormat<14>; +def InstFormatCJ : InstFormat<15>; +def InstFormatOther : InstFormat<16>; // The following opcode names and match those given in Table 19.1 in the // RISC-V User-level ISA specification ("RISC-V base opcode map"). @@ -89,7 +98,7 @@ class RVInst funct7, bits<3> funct3, RISCVOpcode opcode, dag outs, let Opcode = opcode.Value; } +class RVInstR4 funct2, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + bits<3> funct3; + bits<5> rd; + + let Inst{31-27} = rs3; + let Inst{26-25} = funct2; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Opcode = opcode.Value; +} + class RVInstRAtomic funct5, bit aq, bit rl, bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> @@ -136,6 +163,22 @@ class RVInstRAtomic funct5, bit aq, bit rl, bits<3> funct3, let Opcode = opcode.Value; } +class RVInstRFrm funct7, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { + bits<5> rs2; + bits<5> rs1; + bits<3> funct3; + bits<5> rd; + + let Inst{31-25} = funct7; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Opcode = opcode.Value; +} + class RVInstI funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> : RVInst { @@ -153,6 +196,23 @@ class RVInstI funct3, RISCVOpcode opcode, dag outs, dag ins, class RVInstIShift funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> : RVInst { + bits<6> shamt; + bits<5> rs1; + bits<5> rd; + + let Inst{31} = 0; + let Inst{30} = arithshift; + let Inst{29-26} = 0; + let Inst{25-20} = shamt; + let Inst{19-15} = rs1; + let Inst{14-12} = funct3; + let Inst{11-7} = rd; + let Opcode = opcode.Value; +} + +class RVInstIShiftW funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInst { bits<5> shamt; bits<5> rs1; bits<5> rd; diff --git a/lib/Target/RISCV/RISCVInstrFormatsC.td b/lib/Target/RISCV/RISCVInstrFormatsC.td new file mode 100644 index 000000000000..6abcbd7cc8a1 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrFormatsC.td @@ -0,0 +1,147 @@ +//===-- RISCVInstrFormatsC.td - RISCV C Instruction Formats --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V C extension instruction formats. +// +//===----------------------------------------------------------------------===// + +class RVInst16 pattern, InstFormat format> + : Instruction { + field bits<16> Inst; + // SoftFail is a field the disassembler can use to provide a way for + // instructions to not match without killing the whole decode process. It is + // mainly used for ARM, but Tablegen expects this field to exist or it fails + // to build the decode table. + field bits<16> SoftFail = 0; + let Size = 2; + + bits<2> Opcode = 0; + + let Namespace = "RISCV"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = opcodestr # "\t" # argstr; + let Pattern = pattern; + + let TSFlags{4-0} = format.Value; +} + +class RVInst16CR funct4, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<5> rs1; + bits<5> rs2; + + let Inst{15-12} = funct4; + let Inst{11-7} = rs1; + let Inst{6-2} = rs2; + let Inst{1-0} = opcode; +} + +// The immediate value encoding differs for each instruction, so each subclass +// is responsible for setting the appropriate bits in the Inst field. +// The bits Inst{6-2} must be set for each instruction. +class RVInst16CI funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<10> imm; + bits<5> rd; + bits<5> rs1; + + let Inst{15-13} = funct3; + let Inst{12} = imm{5}; + let Inst{11-7} = rd; + let Inst{1-0} = opcode; +} + +// The immediate value encoding differs for each instruction, so each subclass +// is responsible for setting the appropriate bits in the Inst field. +// The bits Inst{12-7} must be set for each instruction. +class RVInst16CSS funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<10> imm; + bits<5> rs2; + bits<5> rs1; + + let Inst{15-13} = funct3; + let Inst{6-2} = rs2; + let Inst{1-0} = opcode; +} + +class RVInst16CIW funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<10> imm; + bits<3> rd; + + let Inst{15-13} = funct3; + let Inst{4-2} = rd; + let Inst{1-0} = opcode; +} + +// The immediate value encoding differs for each instruction, so each subclass +// is responsible for setting the appropriate bits in the Inst field. +// The bits Inst{12-10} and Inst{6-5} must be set for each instruction. +class RVInst16CL funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<3> rd; + bits<3> rs1; + + let Inst{15-13} = funct3; + let Inst{9-7} = rs1; + let Inst{4-2} = rd; + let Inst{1-0} = opcode; +} + +// The immediate value encoding differs for each instruction, so each subclass +// is responsible for setting the appropriate bits in the Inst field. +// The bits Inst{12-10} and Inst{6-5} must be set for each instruction. +class RVInst16CS funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<3> rs2; + bits<3> rs1; + + let Inst{15-13} = funct3; + let Inst{9-7} = rs1; + let Inst{4-2} = rs2; + let Inst{1-0} = opcode; +} + +class RVInst16CB funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<9> imm; + bits<3> rs1; + + let Inst{15-13} = funct3; + let Inst{9-7} = rs1; + let Inst{1-0} = opcode; +} + +class RVInst16CJ funct3, bits<2> opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst16 { + bits<11> offset; + + let Inst{15-13} = funct3; + let Inst{12} = offset{10}; + let Inst{11} = offset{3}; + let Inst{10-9} = offset{8-7}; + let Inst{8} = offset{9}; + let Inst{7} = offset{5}; + let Inst{6} = offset{6}; + let Inst{5-3} = offset{2-0}; + let Inst{2} = offset{4}; + let Inst{1-0} = opcode; +} diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp index 5b4f4fcbb880..6a10329d4b8b 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -52,7 +53,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == &RISCV::GPRRegClass) + if (RISCV::GPRRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(RISCV::SW)) .addReg(SrcReg, getKillRegState(IsKill)) .addFrameIndex(FI) @@ -70,8 +71,298 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == &RISCV::GPRRegClass) + if (RISCV::GPRRegClass.hasSubClassEq(RC)) BuildMI(MBB, I, DL, get(RISCV::LW), DstReg).addFrameIndex(FI).addImm(0); else llvm_unreachable("Can't load this register from stack slot"); } + +void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DstReg, uint64_t Val, + MachineInstr::MIFlag Flag) const { + assert(isInt<32>(Val) && "Can only materialize 32-bit constants"); + + // TODO: If the value can be materialized using only one instruction, only + // insert a single instruction. + + uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff; + uint64_t Lo12 = SignExtend64<12>(Val); + BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg) + .addImm(Hi20) + .setMIFlag(Flag); + BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(Lo12) + .setMIFlag(Flag); +} + +// The contents of values added to Cond are not examined outside of +// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we +// push BranchOpcode, Reg1, Reg2. +static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + // Block ends with fall-through condbranch. + assert(LastInst.getDesc().isConditionalBranch() && + "Unknown conditional branch"); + Target = LastInst.getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode())); + Cond.push_back(LastInst.getOperand(0)); + Cond.push_back(LastInst.getOperand(1)); +} + +static unsigned getOppositeBranchOpcode(int Opc) { + switch (Opc) { + default: + llvm_unreachable("Unrecognized conditional branch"); + case RISCV::BEQ: + return RISCV::BNE; + case RISCV::BNE: + return RISCV::BEQ; + case RISCV::BLT: + return RISCV::BGE; + case RISCV::BGE: + return RISCV::BLT; + case RISCV::BLTU: + return RISCV::BGEU; + case RISCV::BGEU: + return RISCV::BLTU; + } +} + +bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + TBB = FBB = nullptr; + Cond.clear(); + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end() || !isUnpredicatedTerminator(*I)) + return false; + + // Count the number of terminators and find the first unconditional or + // indirect branch. + MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); + int NumTerminators = 0; + for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); + J++) { + NumTerminators++; + if (J->getDesc().isUnconditionalBranch() || + J->getDesc().isIndirectBranch()) { + FirstUncondOrIndirectBr = J.getReverse(); + } + } + + // If AllowModify is true, we can erase any terminators after + // FirstUncondOrIndirectBR. + if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { + while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { + std::next(FirstUncondOrIndirectBr)->eraseFromParent(); + NumTerminators--; + } + I = FirstUncondOrIndirectBr; + } + + // We can't handle blocks that end in an indirect branch. + if (I->getDesc().isIndirectBranch()) + return true; + + // We can't handle blocks with more than 2 terminators. + if (NumTerminators > 2) + return true; + + // Handle a single unconditional branch. + if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { + TBB = I->getOperand(0).getMBB(); + return false; + } + + // Handle a single conditional branch. + if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { + parseCondBranch(*I, TBB, Cond); + return false; + } + + // Handle a conditional branch followed by an unconditional branch. + if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && + I->getDesc().isUnconditionalBranch()) { + parseCondBranch(*std::prev(I), TBB, Cond); + FBB = I->getOperand(0).getMBB(); + return false; + } + + // Otherwise, we can't handle this. + return true; +} + +unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + if (BytesRemoved) + *BytesRemoved = 0; + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return 0; + + if (!I->getDesc().isUnconditionalBranch() && + !I->getDesc().isConditionalBranch()) + return 0; + + // Remove the branch. + I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + + I = MBB.end(); + + if (I == MBB.begin()) + return 1; + --I; + if (!I->getDesc().isConditionalBranch()) + return 1; + + // Remove the branch. + I->eraseFromParent(); + if (BytesRemoved) + *BytesRemoved += getInstSizeInBytes(*I); + return 2; +} + +// Inserts a branch into the end of the specific MachineBasicBlock, returning +// the number of instructions inserted. +unsigned RISCVInstrInfo::insertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { + if (BytesAdded) + *BytesAdded = 0; + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 3 || Cond.size() == 0) && + "RISCV branch conditions have two components!"); + + // Unconditional branch. + if (Cond.empty()) { + MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 1; + } + + // Either a one or two-way conditional branch. + unsigned Opc = Cond[0].getImm(); + MachineInstr &CondMI = + *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(CondMI); + + // One-way conditional branch. + if (!FBB) + return 1; + + // Two-way conditional branch. + MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); + return 2; +} + +unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &DestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + assert(RS && "RegScavenger required for long branching"); + assert(MBB.empty() && + "new block should be inserted for expanding unconditional branch"); + assert(MBB.pred_size() == 1); + + MachineFunction *MF = MBB.getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const auto &TM = static_cast(MF->getTarget()); + const auto &STI = MF->getSubtarget(); + + if (TM.isPositionIndependent() || STI.is64Bit()) + report_fatal_error("Unable to insert indirect branch"); + + if (!isInt<32>(BrOffset)) + report_fatal_error( + "Branch offsets outside of the signed 32-bit range not supported"); + + // FIXME: A virtual register must be used initially, as the register + // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch + // uses the same workaround). + unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + auto II = MBB.end(); + + MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg) + .addMBB(&DestBB, RISCVII::MO_HI); + BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND)) + .addReg(ScratchReg, RegState::Kill) + .addMBB(&DestBB, RISCVII::MO_LO); + + RS->enterBasicBlockEnd(MBB); + unsigned Scav = RS->scavengeRegisterBackwards( + RISCV::GPRRegClass, MachineBasicBlock::iterator(LuiMI), false, 0); + MRI.replaceRegWith(ScratchReg, Scav); + MRI.clearVirtRegs(); + RS->setRegUsed(Scav); + return 8; +} + +bool RISCVInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + assert((Cond.size() == 3) && "Invalid branch condition!"); + Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm())); + return false; +} + +MachineBasicBlock * +RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + assert(MI.getDesc().isBranch() && "Unexpected opcode!"); + // The branch target is always the last operand. + int NumOp = MI.getNumExplicitOperands(); + return MI.getOperand(NumOp - 1).getMBB(); +} + +bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + // Ideally we could determine the supported branch offset from the + // RISCVII::FormMask, but this can't be used for Pseudo instructions like + // PseudoBR. + switch (BranchOp) { + default: + llvm_unreachable("Unexpected opcode!"); + case RISCV::BEQ: + case RISCV::BNE: + case RISCV::BLT: + case RISCV::BGE: + case RISCV::BLTU: + case RISCV::BGEU: + return isIntN(13, BrOffset); + case RISCV::JAL: + case RISCV::PseudoBR: + return isIntN(21, BrOffset); + } +} + +unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + + switch (Opcode) { + default: { return get(Opcode).getSize(); } + case TargetOpcode::EH_LABEL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::DBG_VALUE: + return 0; + case TargetOpcode::INLINEASM: { + const MachineFunction &MF = *MI.getParent()->getParent(); + const auto &TM = static_cast(MF.getTarget()); + return getInlineAsmLength(MI.getOperand(0).getSymbolName(), + *TM.getMCAsmInfo()); + } + } +} diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h index 05c8378445cf..5761d9bedd79 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.h +++ b/lib/Target/RISCV/RISCVInstrInfo.h @@ -41,6 +41,39 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { MachineBasicBlock::iterator MBBI, unsigned DstReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + + // Materializes the given int32 Val into DstReg. + void movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, unsigned DstReg, uint64_t Val, + MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &dl, + int *BytesAdded = nullptr) const override; + + unsigned insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, int64_t BrOffset, + RegScavenger *RS = nullptr) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; }; } #endif diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td index f0015021c163..1aae2f39dbdd 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.td +++ b/lib/Target/RISCV/RISCVInstrInfo.td @@ -69,6 +69,22 @@ def fencearg : Operand { let DecoderMethod = "decodeUImmOperand<4>"; } +def UImmLog2XLenAsmOperand : AsmOperandClass { + let Name = "UImmLog2XLen"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidUImmLog2XLen"; +} + +def uimmlog2xlen : Operand, ImmLeafis64Bit()) + return isUInt<6>(Imm); + return isUInt<5>(Imm); +}]> { + let ParserMatchClass = UImmLog2XLenAsmOperand; + // TODO: should ensure invalid shamt is rejected when decoding. + let DecoderMethod = "decodeUImmOperand<6>"; +} + def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; let DecoderMethod = "decodeUImmOperand<5>"; @@ -111,6 +127,10 @@ def ixlenimm : Operand; // Standalone (codegen-only) immleaf patterns. def simm32 : ImmLeaf(Imm);}]>; +// Addressing modes. +// Necessary because a frameindex can't be matched directly in a pattern. +def AddrFI : ComplexPattern; + // Extract least significant 12 bits from an immediate value and sign extend // them. def LO12Sext : SDNodeXForm funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_ri funct3, string opcodestr> : RVInstIShift; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in @@ -170,16 +190,32 @@ class ALU_rr funct7, bits<3> funct3, string opcodestr> opcodestr, "$rd, $rs1, $rs2">; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -class CSR_ir funct3, string opcodestr> : - RVInstI funct3, string opcodestr> + : RVInstI; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -class CSR_ii funct3, string opcodestr> : - RVInstI funct3, string opcodestr> + : RVInstI; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class ShiftW_ri funct3, string opcodestr> + : RVInstIShiftW; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class ALUW_rr funct7, bits<3> funct3, string opcodestr> + : RVInstR; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class Priv funct7> + : RVInstR; + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -279,6 +315,153 @@ def CSRRWI : CSR_ii<0b101, "csrrwi">; def CSRRSI : CSR_ii<0b110, "csrrsi">; def CSRRCI : CSR_ii<0b111, "csrrci">; +/// RV64I instructions + +let Predicates = [IsRV64] in { +def LWU : Load_ri<0b110, "lwu">; +def LD : Load_ri<0b011, "ld">; +def SD : Store_rri<0b011, "sd">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd), + (ins GPR:$rs1, simm12:$imm12), + "addiw", "$rd, $rs1, $imm12">; + +def SLLIW : ShiftW_ri<0, 0b001, "slliw">; +def SRLIW : ShiftW_ri<0, 0b101, "srliw">; +def SRAIW : ShiftW_ri<1, 0b101, "sraiw">; + +def ADDW : ALUW_rr<0b0000000, 0b000, "addw">; +def SUBW : ALUW_rr<0b0100000, 0b000, "subw">; +def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">; +def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">; +def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; +} // Predicates = [IsRV64] + +//===----------------------------------------------------------------------===// +// Privileged instructions +//===----------------------------------------------------------------------===// + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in { +def URET : Priv<"uret", 0b0000000> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b00010; +} + +def SRET : Priv<"sret", 0b0001000> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b00010; +} + +def MRET : Priv<"mret", 0b0011000> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b00010; +} +} // isBarrier = 1, isReturn = 1, isTerminator = 1 + +def WFI : Priv<"wfi", 0b0001000> { + let rd = 0; + let rs1 = 0; + let rs2 = 0b00101; +} + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), + (ins GPR:$rs1, GPR:$rs2), + "sfence.vma", "$rs1, $rs2"> { + let rd = 0; +} + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +//===----------------------------------------------------------------------===// + +// TODO la +// TODO lb lh lw +// TODO RV64I: ld +// TODO sb sh sw +// TODO RV64I: sd + +def : InstAlias<"nop", (ADDI X0, X0, 0)>; +// TODO li +def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; +def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>; +def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>; + +let Predicates = [IsRV64] in { +def : InstAlias<"negw $rd, $rs", (SUBW GPR:$rd, X0, GPR:$rs)>; +def : InstAlias<"sext.w $rd, $rs", (ADDIW GPR:$rd, GPR:$rs, 0)>; +} // Predicates = [IsRV64] + +def : InstAlias<"seqz $rd, $rs", (SLTIU GPR:$rd, GPR:$rs, 1)>; +def : InstAlias<"snez $rd, $rs", (SLTU GPR:$rd, X0, GPR:$rs)>; +def : InstAlias<"sltz $rd, $rs", (SLT GPR:$rd, GPR:$rs, X0)>; +def : InstAlias<"sgtz $rd, $rs", (SLT GPR:$rd, X0, GPR:$rs)>; + +def : InstAlias<"beqz $rs, $offset", + (BEQ GPR:$rs, X0, simm13_lsb0:$offset)>; +def : InstAlias<"bnez $rs, $offset", + (BNE GPR:$rs, X0, simm13_lsb0:$offset)>; +def : InstAlias<"blez $rs, $offset", + (BGE X0, GPR:$rs, simm13_lsb0:$offset)>; +def : InstAlias<"bgez $rs, $offset", + (BGE GPR:$rs, X0, simm13_lsb0:$offset)>; +def : InstAlias<"bltz $rs, $offset", + (BLT GPR:$rs, X0, simm13_lsb0:$offset)>; +def : InstAlias<"bgtz $rs, $offset", + (BLT X0, GPR:$rs, simm13_lsb0:$offset)>; + +// Always output the canonical mnemonic for the pseudo branch instructions. +// The GNU tools emit the canonical mnemonic for the branch pseudo instructions +// as well (e.g. "bgt" will be recognised by the assembler but never printed by +// objdump). Match this behaviour by setting a zero weight. +def : InstAlias<"bgt $rs, $rt, $offset", + (BLT GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; +def : InstAlias<"ble $rs, $rt, $offset", + (BGE GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; +def : InstAlias<"bgtu $rs, $rt, $offset", + (BLTU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; +def : InstAlias<"bleu $rs, $rt, $offset", + (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; + +// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction. +def : InstAlias<"j $offset", (JAL X0, simm21_lsb0:$offset)>; +def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0:$offset)>; +def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>; +def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>; +def : InstAlias<"ret", (JALR X0, X1, 0), 2>; +// TODO call +// TODO tail + +def : InstAlias<"fence", (FENCE 0xF, 0xF)>; // 0xF == iorw + +// CSR Addresses: 0xC00 == cycle, 0xC01 == time, 0xC02 == instret +// 0xC80 == cycleh, 0xC81 == timeh, 0xC82 == instreth +def : InstAlias<"rdinstret $rd", (CSRRS GPR:$rd, 0xC02, X0)>; +def : InstAlias<"rdcycle $rd", (CSRRS GPR:$rd, 0xC00, X0)>; +def : InstAlias<"rdtime $rd", (CSRRS GPR:$rd, 0xC01, X0)>; + +let Predicates = [IsRV32] in { +def : InstAlias<"rdinstreth $rd", (CSRRS GPR:$rd, 0xC82, X0)>; +def : InstAlias<"rdcycleh $rd", (CSRRS GPR:$rd, 0xC80, X0)>; +def : InstAlias<"rdtimeh $rd", (CSRRS GPR:$rd, 0xC81, X0)>; +} // Predicates = [IsRV32] + +def : InstAlias<"csrr $rd, $csr", (CSRRS GPR:$rd, uimm12:$csr, X0)>; +def : InstAlias<"csrw $csr, $rs", (CSRRW X0, uimm12:$csr, GPR:$rs)>; +def : InstAlias<"csrs $csr, $rs", (CSRRS X0, uimm12:$csr, GPR:$rs)>; +def : InstAlias<"csrc $csr, $rs", (CSRRC X0, uimm12:$csr, GPR:$rs)>; + +def : InstAlias<"csrwi $csr, $imm", (CSRRWI X0, uimm12:$csr, uimm5:$imm)>; +def : InstAlias<"csrsi $csr, $imm", (CSRRSI X0, uimm12:$csr, uimm5:$imm)>; +def : InstAlias<"csrci $csr, $imm", (CSRRCI X0, uimm12:$csr, uimm5:$imm)>; + +def : InstAlias<"sfence.vma", (SFENCE_VMA X0, X0)>; +def : InstAlias<"sfence.vma $rs", (SFENCE_VMA GPR:$rs, X0)>; + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns // @@ -293,9 +476,15 @@ class PatGprGpr : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>; class PatGprSimm12 : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>; -class PatGprUimm5 - : Pat<(OpNode GPR:$rs1, uimm5:$shamt), - (Inst GPR:$rs1, uimm5:$shamt)>; +class PatGprUimmLog2XLen + : Pat<(OpNode GPR:$rs1, uimmlog2xlen:$shamt), + (Inst GPR:$rs1, uimmlog2xlen:$shamt)>; + +/// Predicates + +def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ + return isOrEquivalentToAdd(N); +}]>; /// Immediates @@ -315,11 +504,18 @@ def : PatGprSimm12; def : PatGprGpr; def : PatGprSimm12; def : PatGprGpr; -def : PatGprUimm5; +def : PatGprUimmLog2XLen; def : PatGprGpr; -def : PatGprUimm5; +def : PatGprUimmLog2XLen; def : PatGprGpr; -def : PatGprUimm5; +def : PatGprUimmLog2XLen; + +/// FrameIndex calculations + +def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12), + (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(IsOrAdd (i32 AddrFI:$Rs), simm12:$imm12), + (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; /// Setcc @@ -401,8 +597,13 @@ def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>, multiclass LdPat { def : Pat<(LoadOp GPR:$rs1), (Inst GPR:$rs1, 0)>; + def : Pat<(LoadOp AddrFI:$rs1), (Inst AddrFI:$rs1, 0)>; def : Pat<(LoadOp (add GPR:$rs1, simm12:$imm12)), (Inst GPR:$rs1, simm12:$imm12)>; + def : Pat<(LoadOp (add AddrFI:$rs1, simm12:$imm12)), + (Inst AddrFI:$rs1, simm12:$imm12)>; + def : Pat<(LoadOp (IsOrAdd AddrFI:$rs1, simm12:$imm12)), + (Inst AddrFI:$rs1, simm12:$imm12)>; } defm : LdPat; @@ -417,8 +618,13 @@ defm : LdPat; multiclass StPat { def : Pat<(StoreOp GPR:$rs2, GPR:$rs1), (Inst GPR:$rs2, GPR:$rs1, 0)>; + def : Pat<(StoreOp GPR:$rs2, AddrFI:$rs1), (Inst GPR:$rs2, AddrFI:$rs1, 0)>; def : Pat<(StoreOp GPR:$rs2, (add GPR:$rs1, simm12:$imm12)), (Inst GPR:$rs2, GPR:$rs1, simm12:$imm12)>; + def : Pat<(StoreOp GPR:$rs2, (add AddrFI:$rs1, simm12:$imm12)), + (Inst GPR:$rs2, AddrFI:$rs1, simm12:$imm12)>; + def : Pat<(StoreOp GPR:$rs2, (IsOrAdd AddrFI:$rs1, simm12:$imm12)), + (Inst GPR:$rs2, AddrFI:$rs1, simm12:$imm12)>; } defm : StPat; @@ -441,3 +647,6 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), include "RISCVInstrInfoM.td" include "RISCVInstrInfoA.td" +include "RISCVInstrInfoF.td" +include "RISCVInstrInfoD.td" +include "RISCVInstrInfoC.td" diff --git a/lib/Target/RISCV/RISCVInstrInfoA.td b/lib/Target/RISCV/RISCVInstrInfoA.td index 54f35c3c0bab..33e863ba6a10 100644 --- a/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/lib/Target/RISCV/RISCVInstrInfoA.td @@ -61,3 +61,17 @@ defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">; defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">; defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">; } // Predicates = [HasStdExtA] + +let Predicates = [HasStdExtA, IsRV64] in { +defm LR_D : LR_r_aq_rl<0b011, "lr.d">; +defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">; +defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">; +defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">; +defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">; +defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">; +defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">; +defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">; +defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">; +defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">; +defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">; +} // Predicates = [HasStedExtA, IsRV64] diff --git a/lib/Target/RISCV/RISCVInstrInfoC.td b/lib/Target/RISCV/RISCVInstrInfoC.td new file mode 100644 index 000000000000..f39b128099d6 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrInfoC.td @@ -0,0 +1,421 @@ +//===- RISCVInstrInfoC.td - Compressed RISCV instructions -*- tblgen-*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "RISCVInstrFormatsC.td" + +//===----------------------------------------------------------------------===// +// Operand definitions. +//===----------------------------------------------------------------------===// + +def UImmLog2XLenNonZeroAsmOperand : AsmOperandClass { + let Name = "UImmLog2XLenNonZero"; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "InvalidUImmLog2XLenNonZero"; +} + +def uimmlog2xlennonzero : Operand, ImmLeafis64Bit()) + return isUInt<6>(Imm) && (Imm != 0); + return isUInt<5>(Imm) && (Imm != 0); +}]> { + let ParserMatchClass = UImmLog2XLenNonZeroAsmOperand; + // TODO: should ensure invalid shamt is rejected when decoding. + let DecoderMethod = "decodeUImmOperand<6>"; +} + +def simm6 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<6>; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmOperand<6>"; +} + +def uimm6nonzero : Operand, + ImmLeaf(Imm) && (Imm != 0);}]> { + let ParserMatchClass = UImmAsmOperand<6, "NonZero">; + let DecoderMethod = "decodeUImmOperand<6>"; +} + +// A 7-bit unsigned immediate where the least significant two bits are zero. +def uimm7_lsb00 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<7>"; +} + +// A 8-bit unsigned immediate where the least significant two bits are zero. +def uimm8_lsb00 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; +} + +// A 8-bit unsigned immediate where the least significant three bits are zero. +def uimm8_lsb000 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; +} + +// A 9-bit signed immediate where the least significant bit is zero. +def simm9_lsb0 : Operand { + let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; +} + +// A 9-bit unsigned immediate where the least significant three bits are zero. +def uimm9_lsb000 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<9>"; +} + +// A 10-bit unsigned immediate where the least significant two bits are zero +// and the immediate can't be zero. +def uimm10_lsb00nonzero : Operand, + ImmLeaf(Imm) && (Imm != 0);}]> { + let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<10>"; +} + +// A 10-bit signed immediate where the least significant four bits are zero. +def simm10_lsb0000 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<10, "Lsb0000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmOperand<10>"; +} + +// A 12-bit signed immediate where the least significant bit is zero. +def simm12_lsb0 : Operand { + let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; +} + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class CStackLoad funct3, string OpcodeStr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CI; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class CStackStore funct3, string OpcodeStr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CSS; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class CLoad_ri funct3, string OpcodeStr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CL; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class CStore_rri funct3, string OpcodeStr, + RegisterClass cls, DAGOperand opnd> + : RVInst16CS; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class Bcz funct3, string OpcodeStr, PatFrag CondOp, + RegisterClass cls> + : RVInst16CB { + let isBranch = 1; + let isTerminator = 1; + let Inst{12} = imm{7}; + let Inst{11-10} = imm{3-2}; + let Inst{6-5} = imm{6-5}; + let Inst{4-3} = imm{1-0}; + let Inst{2} = imm{4}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class Shift_right funct2, string OpcodeStr, RegisterClass cls, + Operand ImmOpnd> + : RVInst16CB<0b100, 0b01, (outs cls:$rs1_wb), (ins cls:$rs1, ImmOpnd:$imm), + OpcodeStr, "$rs1, $imm"> { + let Constraints = "$rs1 = $rs1_wb"; + let Inst{12} = imm{5}; + let Inst{11-10} = funct2; + let Inst{6-2} = imm{4-0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class CS_ALU funct2, string OpcodeStr, RegisterClass cls, + bit RV64only> + : RVInst16CS<0b100, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2), + OpcodeStr, "$rd, $rs2"> { + bits<3> rd; + let Constraints = "$rd = $rd_wb"; + let Inst{12} = RV64only; + let Inst{11-10} = 0b11; + let Inst{9-7} = rd; + let Inst{6-5} = funct2; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtC] in { + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in +def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), + (ins SP:$rs1, uimm10_lsb00nonzero:$imm), + "c.addi4spn", "$rd, $rs1, $imm"> { + bits<5> rs1; + let Inst{12-11} = imm{5-4}; + let Inst{10-7} = imm{9-6}; + let Inst{6} = imm{2}; + let Inst{5} = imm{3}; +} + +let Predicates = [HasStdExtC, HasStdExtD] in +def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; +} + +def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + +let DecoderNamespace = "RISCV32Only_", + Predicates = [HasStdExtC, HasStdExtF, IsRV32] in +def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + +let Predicates = [HasStdExtC, IsRV64] in +def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; +} + +let Predicates = [HasStdExtC, HasStdExtD] in +def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; +} + +def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + +let DecoderNamespace = "RISCV32Only_", + Predicates = [HasStdExtC, HasStdExtF, IsRV32] in +def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + +let Predicates = [HasStdExtC, IsRV64] in +def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; +} + +let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, simm6:$imm), + "c.addi", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, + DecoderNamespace = "RISCV32Only_", Defs = [X1], + Predicates = [HasStdExtC, IsRV32] in +def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset), + "c.jal", "$offset">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, + Predicates = [HasStdExtC, IsRV64] in +def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, simm6:$imm), + "c.addiw", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), + "c.li", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), + (ins SP:$rd, simm10_lsb0000:$imm), + "c.addi16sp", "$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{12} = imm{9}; + let Inst{11-7} = 2; + let Inst{6} = imm{4}; + let Inst{5} = imm{6}; + let Inst{4-3} = imm{8-7}; + let Inst{2} = imm{5}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), + (ins uimm6nonzero:$imm), + "c.lui", "$rd, $imm"> { + let Inst{6-2} = imm{4-0}; +} + +def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>; +def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:$imm), + "c.andi", "$rs1, $imm"> { + let Constraints = "$rs1 = $rs1_wb"; + let Inst{12} = imm{5}; + let Inst{11-10} = 0b10; + let Inst{6-2} = imm{4-0}; +} + +def C_SUB : CS_ALU<0b00, "c.sub", GPRC, 0>; +def C_XOR : CS_ALU<0b01, "c.xor", GPRC, 0>; +def C_OR : CS_ALU<0b10, "c.or" , GPRC, 0>; +def C_AND : CS_ALU<0b11, "c.and", GPRC, 0>; + +let Predicates = [HasStdExtC, IsRV64] in { +def C_SUBW : CS_ALU<0b00, "c.subw", GPRC, 1>; +def C_ADDW : CS_ALU<0b01, "c.addw", GPRC, 1>; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset), + "c.j", "$offset"> { + let isBranch = 1; + let isTerminator=1; + let isBarrier=1; +} + +def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>; +def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), + (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm), + "c.slli" ,"$rd, $imm"> { + let Constraints = "$rd = $rd_wb"; + let Inst{6-2} = imm{4-0}; +} + +let Predicates = [HasStdExtC, HasStdExtD] in +def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> { + let Inst{6-5} = imm{4-3}; + let Inst{4-2} = imm{8-6}; +} + +def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> { + let Inst{6-4} = imm{4-2}; + let Inst{3-2} = imm{7-6}; +} + +let DecoderNamespace = "RISCV32Only_", + Predicates = [HasStdExtC, HasStdExtF, IsRV32] in +def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00> { + let Inst{6-4} = imm{4-2}; + let Inst{3-2} = imm{7-6}; +} + +let Predicates = [HasStdExtC, IsRV64] in +def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> { + let Inst{6-5} = imm{4-3}; + let Inst{4-2} = imm{8-6}; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), + "c.jr", "$rs1"> { + let isBranch = 1; + let isBarrier = 1; + let isTerminator = 1; + let isIndirectBranch = 1; + let rs2 = 0; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), + "c.mv", "$rs1, $rs2">; + +let rs1 = 0, rs2 = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, + isCall=1, Defs=[X1], rs2 = 0 in +def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1), + "c.jalr", "$rs1">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rs1_wb), + (ins GPRNoX0:$rs1, GPRNoX0:$rs2), + "c.add", "$rs1, $rs2"> { + let Constraints = "$rs1 = $rs1_wb"; +} + +let Predicates = [HasStdExtC, HasStdExtD] in +def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> { + let Inst{12-10} = imm{5-3}; + let Inst{9-7} = imm{8-6}; +} + +def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> { + let Inst{12-9} = imm{5-2}; + let Inst{8-7} = imm{7-6}; +} + +let DecoderNamespace = "RISCV32Only_", + Predicates = [HasStdExtC, HasStdExtF, IsRV32] in +def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00> { + let Inst{12-9} = imm{5-2}; + let Inst{8-7} = imm{7-6}; +} + +let Predicates = [HasStdExtC, IsRV64] in +def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { + let Inst{12-10} = imm{5-3}; + let Inst{9-7} = imm{8-6}; +} + +} // Predicates = [HasStdExtC] diff --git a/lib/Target/RISCV/RISCVInstrInfoD.td b/lib/Target/RISCV/RISCVInstrInfoD.td new file mode 100644 index 000000000000..48d91c0054d3 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrInfoD.td @@ -0,0 +1,174 @@ +//===-- RISCVInstrInfoD.td - RISC-V 'D' instructions -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard 'D', +// Double-Precision Floating-Point instruction set extension. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPFMAD_rrr_frm + : RVInstR4<0b01, opcode, (outs FPR64:$rd), + (ins FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, frmarg:$funct3), + opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; + +class FPFMADDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUD_rr funct7, bits<3> funct3, string opcodestr> + : RVInstR; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUD_rr_frm funct7, string opcodestr> + : RVInstRFrm; + +class FPALUDDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPCmpD_rr funct3, string opcodestr> + : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd), + (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtD] in { + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), + (ins GPR:$rs1, simm12:$imm12), + "fld", "$rd, ${imm12}(${rs1})">; + +// Operands for stores are in the order srcreg, base, offset rather than +// reflecting the order these fields are specified in the instruction +// encoding. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def FSD : RVInstS<0b011, OPC_STORE_FP, (outs), + (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12), + "fsd", "$rs2, ${imm12}(${rs1})">; + +def FMADD_D : FPFMAD_rrr_frm; +def : FPFMADDynFrmAlias; +def FMSUB_D : FPFMAD_rrr_frm; +def : FPFMADDynFrmAlias; +def FNMSUB_D : FPFMAD_rrr_frm; +def : FPFMADDynFrmAlias; +def FNMADD_D : FPFMAD_rrr_frm; +def : FPFMADDynFrmAlias; + +def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">; +def : FPALUDDynFrmAlias; +def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">; +def : FPALUDDynFrmAlias; +def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">; +def : FPALUDDynFrmAlias; +def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">; +def : FPALUDDynFrmAlias; + +def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d"> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">; +def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">; +def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">; +def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">; +def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">; + +def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d"> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s"> { + let rs2 = 0b00000; +} + +def FEQ_D : FPCmpD_rr<0b010, "feq.d">; +def FLT_D : FPCmpD_rr<0b001, "flt.d">; +def FLE_D : FPCmpD_rr<0b000, "fle.d">; + +def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d"> { + let rs2 = 0b00000; +} + +def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d"> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d"> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w"> { + let rs2 = 0b00000; +} + +def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu"> { + let rs2 = 0b00001; +} +} // Predicates = [HasStdExtD] + +let Predicates = [HasStdExtD, IsRV64] in { +def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d"> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d"> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; + +def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d"> { + let rs2 = 0b00000; +} + +def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l"> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu"> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; + +def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtD, IsRV64] + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtD] in { +// TODO fld +// TODO fsd + +def : InstAlias<"fmv.d $rd, $rs", (FSGNJ_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>; +def : InstAlias<"fabs.d $rd, $rs", (FSGNJX_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>; +def : InstAlias<"fneg.d $rd, $rs", (FSGNJN_D FPR64:$rd, FPR64:$rs, FPR64:$rs)>; +} // Predicates = [HasStdExtD] diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td new file mode 100644 index 000000000000..07722d2cbf34 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrInfoF.td @@ -0,0 +1,222 @@ +//===-- RISCVInstrInfoF.td - RISC-V 'F' instructions -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard 'F', +// Single-Precision Floating-Point instruction set extension. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +// Floating-point rounding mode + +def FRMArg : AsmOperandClass { + let Name = "FRMArg"; + let RenderMethod = "addFRMArgOperands"; + let DiagnosticType = "InvalidFRMArg"; +} + +def frmarg : Operand { + let ParserMatchClass = FRMArg; + let PrintMethod = "printFRMArg"; + let DecoderMethod = "decodeUImmOperand<3>"; +} + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPFMAS_rrr_frm + : RVInstR4<0b00, opcode, (outs FPR32:$rd), + (ins FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, frmarg:$funct3), + opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; + +class FPFMASDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUS_rr funct7, bits<3> funct3, string opcodestr> + : RVInstR; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUS_rr_frm funct7, string opcodestr> + : RVInstRFrm; + +class FPALUSDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPUnaryOp_r funct7, bits<3> funct3, RegisterClass rdty, + RegisterClass rs1ty, string opcodestr> + : RVInstR; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPUnaryOp_r_frm funct7, RegisterClass rdty, RegisterClass rs1ty, + string opcodestr> + : RVInstRFrm; + +class FPUnaryOpDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPCmpS_rr funct3, string opcodestr> + : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd), + (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtF] in { +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), + (ins GPR:$rs1, simm12:$imm12), + "flw", "$rd, ${imm12}(${rs1})">; + +// Operands for stores are in the order srcreg, base, offset rather than +// reflecting the order these fields are specified in the instruction +// encoding. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def FSW : RVInstS<0b010, OPC_STORE_FP, (outs), + (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12), + "fsw", "$rs2, ${imm12}(${rs1})">; + +def FMADD_S : FPFMAS_rrr_frm; +def : FPFMASDynFrmAlias; +def FMSUB_S : FPFMAS_rrr_frm; +def : FPFMASDynFrmAlias; +def FNMSUB_S : FPFMAS_rrr_frm; +def : FPFMASDynFrmAlias; +def FNMADD_S : FPFMAS_rrr_frm; +def : FPFMASDynFrmAlias; + +def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">; +def : FPALUSDynFrmAlias; +def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">; +def : FPALUSDynFrmAlias; +def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">; +def : FPALUSDynFrmAlias; +def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">; +def : FPALUSDynFrmAlias; + +def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s"> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">; +def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">; +def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">; +def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">; +def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">; + +def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s"> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s"> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w"> { + let rs2 = 0b00000; +} + +def FEQ_S : FPCmpS_rr<0b010, "feq.s">; +def FLT_S : FPCmpS_rr<0b001, "flt.s">; +def FLE_S : FPCmpS_rr<0b000, "fle.s">; + +def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s"> { + let rs2 = 0b00000; +} + +def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w"> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu"> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x"> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtF] + +let Predicates = [HasStdExtF, IsRV64] in { +def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s"> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s"> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l"> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu"> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; +} // Predicates = [HasStdExtF, IsRV64] + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtF] in { +// TODO flw +// TODO fsw + +def : InstAlias<"fmv.s $rd, $rs", (FSGNJ_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; +def : InstAlias<"fabs.s $rd, $rs", (FSGNJX_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; +def : InstAlias<"fneg.s $rd, $rs", (FSGNJN_S FPR32:$rd, FPR32:$rs, FPR32:$rs)>; + +// The following csr instructions actually alias instructions from the base ISA. +// However, it only makes sense to support them when the F extension is enabled. +// CSR Addresses: 0x003 == fcsr, 0x002 == frm, 0x001 == fflags +// NOTE: "frcsr", "frrm", and "frflags" are more specialized version of "csrr". +def : InstAlias<"frcsr $rd", (CSRRS GPR:$rd, 0x003, X0), 2>; +def : InstAlias<"fscsr $rd, $rs", (CSRRW GPR:$rd, 0x003, GPR:$rs)>; +def : InstAlias<"fscsr $rs", (CSRRW X0, 0x003, GPR:$rs), 2>; + +def : InstAlias<"frrm $rd", (CSRRS GPR:$rd, 0x002, X0), 2>; +def : InstAlias<"fsrm $rd, $rs", (CSRRW GPR:$rd, 0x002, GPR:$rs)>; +def : InstAlias<"fsrm $rs", (CSRRW X0, 0x002, GPR:$rs), 2>; +def : InstAlias<"fsrmi $rd, $imm", (CSRRWI GPR:$rd, 0x002, uimm5:$imm)>; +def : InstAlias<"fsrmi $imm", (CSRRWI X0, 0x002, uimm5:$imm), 2>; + +def : InstAlias<"frflags $rd", (CSRRS GPR:$rd, 0x001, X0), 2>; +def : InstAlias<"fsflags $rd, $rs", (CSRRW GPR:$rd, 0x001, GPR:$rs)>; +def : InstAlias<"fsflags $rs", (CSRRW X0, 0x001, GPR:$rs), 2>; +def : InstAlias<"fsflagsi $rd, $imm", (CSRRWI GPR:$rd, 0x001, uimm5:$imm)>; +def : InstAlias<"fsflagsi $imm", (CSRRWI X0, 0x001, uimm5:$imm), 2>; +} // Predicates = [HasStdExtF] diff --git a/lib/Target/RISCV/RISCVInstrInfoM.td b/lib/Target/RISCV/RISCVInstrInfoM.td index a253c1eb8118..2dd10ada4003 100644 --- a/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/lib/Target/RISCV/RISCVInstrInfoM.td @@ -26,3 +26,26 @@ def DIVU : ALU_rr<0b0000001, 0b101, "divu">; def REM : ALU_rr<0b0000001, 0b110, "rem">; def REMU : ALU_rr<0b0000001, 0b111, "remu">; } // Predicates = [HasStdExtM] + +let Predicates = [HasStdExtM, IsRV64] in { +def MULW : ALUW_rr<0b0000001, 0b000, "mulw">; +def DIVW : ALUW_rr<0b0000001, 0b100, "divw">; +def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">; +def REMW : ALUW_rr<0b0000001, 0b110, "remw">; +def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">; +} // Predicates = [HasStdExtM, IsRV64] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtM] in { +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +// No ISDOpcode for mulhsu +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; +} // Predicates = [HasStdExtM] diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp index d8ae11f2bd90..b72b45c3dcc0 100644 --- a/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -48,11 +48,12 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, const MCExpr *ME = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); - if (!MO.isJTI() && MO.getOffset()) + if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) ME = MCBinaryExpr::createAdd( ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); - ME = RISCVMCExpr::create(ME, Kind, Ctx); + if (Kind != RISCVMCExpr::VK_RISCV_None) + ME = RISCVMCExpr::create(ME, Kind, Ctx); return MCOperand::createExpr(ME); } @@ -75,8 +76,7 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::createExpr( - MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), AP.OutContext)); + MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP); break; case MachineOperand::MO_GlobalAddress: MCOp = lowerSymbolOperand(MO, AP.getSymbol(MO.getGlobal()), AP); diff --git a/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/lib/Target/RISCV/RISCVMachineFunctionInfo.h new file mode 100644 index 000000000000..433a3fb1543c --- /dev/null +++ b/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -0,0 +1,44 @@ +//=- RISCVMachineFunctionInfo.h - RISCV machine function info -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares RISCV-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo +/// and contains private RISCV-specific information for each MachineFunction. +class RISCVMachineFunctionInfo : public MachineFunctionInfo { + + /// FrameIndex for start of varargs area + int VarArgsFrameIndex = 0; + /// Size of the save area used for varargs + int VarArgsSaveSize = 0; + +public: + RISCVMachineFunctionInfo() = default; + + explicit RISCVMachineFunctionInfo(MachineFunction &MF) {} + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; } + void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp index 75b277531ce9..6ad8bf7bca09 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -57,35 +57,50 @@ const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const { void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { - // TODO: this implementation is a temporary placeholder which does just - // enough to allow other aspects of code generation to be tested - assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const RISCVInstrInfo *TII = MF.getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); - unsigned FrameReg = getFrameRegister(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - int Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); - Offset += MI.getOperand(FIOperandNum + 1).getImm(); + unsigned FrameReg; + int Offset = + getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg) + + MI.getOperand(FIOperandNum + 1).getImm(); - assert(TFI->hasFP(MF) && "eliminateFrameIndex currently requires hasFP"); + if (!isInt<32>(Offset)) { + report_fatal_error( + "Frame offsets outside of the signed 32-bit range not supported"); + } + + MachineBasicBlock &MBB = *MI.getParent(); + bool FrameRegIsKill = false; - // Offsets must be directly encoded in a 12-bit immediate field if (!isInt<12>(Offset)) { - report_fatal_error( - "Frame offsets outside of the signed 12-bit range not supported"); + assert(isInt<32>(Offset) && "Int32 expected"); + // The offset won't fit in an immediate, so use a scratch register instead + // Modify Offset and FrameReg appropriately + unsigned ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm32(MBB, II, DL, ScratchReg, Offset); + BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) + .addReg(FrameReg) + .addReg(ScratchReg, RegState::Kill); + Offset = 0; + FrameReg = ScratchReg; + FrameRegIsKill = true; } - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return RISCV::X8; + const TargetFrameLowering *TFI = getFrameLowering(MF); + return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2; } const uint32_t * diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h index 0b2bc3776fc6..a81dea094019 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/lib/Target/RISCV/RISCVRegisterInfo.h @@ -39,6 +39,18 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { RegScavenger *RS = nullptr) const override; unsigned getFrameRegister(const MachineFunction &MF) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &) const override { + return true; + } }; } diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td index 78c036a37b90..21be2e332e59 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/lib/Target/RISCV/RISCVRegisterInfo.td @@ -16,6 +16,24 @@ class RISCVReg Enc, string n, list alt = []> : Register { let HWEncoding{4-0} = Enc; let AltNames = alt; } + +class RISCVReg32 Enc, string n, list alt = []> : Register { + let HWEncoding{4-0} = Enc; + let AltNames = alt; +} + +// Because RISCVReg64 register have AsmName and AltNames that alias with their +// 32-bit sub-register, RISCVAsmParser will need to coerce a register number +// from a RISCVReg32 to the equivalent RISCVReg64 when appropriate. +def sub_32 : SubRegIndex<32>; +class RISCVReg64 : Register<""> { + let HWEncoding{4-0} = subreg.HWEncoding{4-0}; + let SubRegs = [subreg]; + let SubRegIndices = [sub_32]; + let AsmName = subreg.AsmName; + let AltNames = subreg.AltNames; +} + def ABIRegAltName : RegAltNameIndex; } // Namespace = "RISCV" @@ -72,3 +90,117 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add [RV32, RV64, DefaultMode], [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; } + +// The order of registers represents the preferred allocation sequence. +// Registers are listed in the order caller-save, callee-save, specials. +def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 10, 17), + (sequence "X%u", 5, 7), + (sequence "X%u", 28, 31), + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 27), + (sequence "X%u", 1, 4) + )> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + +def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 10, 17), + (sequence "X%u", 5, 7), + (sequence "X%u", 28, 31), + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 27), + X1, X3, X4 + )> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + +def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 10, 15), + (sequence "X%u", 8, 9) + )> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + +def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} + +// Floating point registers +let RegAltNameIndices = [ABIRegAltName] in { + def F0_32 : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; + def F1_32 : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; + def F2_32 : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; + def F3_32 : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; + def F4_32 : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; + def F5_32 : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; + def F6_32 : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; + def F7_32 : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; + def F8_32 : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; + def F9_32 : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; + def F10_32 : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; + def F11_32 : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; + def F12_32 : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; + def F13_32 : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; + def F14_32 : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; + def F15_32 : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; + def F16_32 : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; + def F17_32 : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; + def F18_32 : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; + def F19_32 : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; + def F20_32 : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; + def F21_32 : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; + def F22_32 : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; + def F23_32 : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; + def F24_32 : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; + def F25_32 : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; + def F26_32 : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; + def F27_32 : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; + def F28_32 : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; + def F29_32 : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; + def F30_32 : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; + def F31_32 : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + + foreach Index = 0-31 in { + def F#Index#_64 : RISCVReg64("F"#Index#"_32")>, + DwarfRegNum<[!add(Index, 32)]>; + } +} + +// The order of registers represents the preferred allocation sequence, +// meaning caller-save regs are listed before callee-save. +def FPR32 : RegisterClass<"RISCV", [f32], 32, (add + (sequence "F%u_32", 0, 7), + (sequence "F%u_32", 10, 17), + (sequence "F%u_32", 28, 31), + (sequence "F%u_32", 8, 9), + (sequence "F%u_32", 18, 27) +)>; + +def FPR32C : RegisterClass<"RISCV", [f32], 32, (add + (sequence "F%u_32", 10, 15), + (sequence "F%u_32", 8, 9) +)>; + +// The order of registers represents the preferred allocation sequence, +// meaning caller-save regs are listed before callee-save. +def FPR64 : RegisterClass<"RISCV", [f64], 64, (add + (sequence "F%u_64", 0, 7), + (sequence "F%u_64", 10, 17), + (sequence "F%u_64", 28, 31), + (sequence "F%u_64", 8, 9), + (sequence "F%u_64", 18, 27) +)>; + +def FPR64C : RegisterClass<"RISCV", [f64], 64, (add + (sequence "F%u_64", 10, 15), + (sequence "F%u_64", 8, 9) +)>; diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h index 7080ce58efa1..928ba5815a22 100644 --- a/lib/Target/RISCV/RISCVSubtarget.h +++ b/lib/Target/RISCV/RISCVSubtarget.h @@ -32,6 +32,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { virtual void anchor(); bool HasStdExtM = false; bool HasStdExtA = false; + bool HasStdExtF = false; + bool HasStdExtD = false; + bool HasStdExtC = false; bool HasRV64 = false; unsigned XLen = 32; MVT XLenVT = MVT::i32; @@ -70,6 +73,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { } bool hasStdExtM() const { return HasStdExtM; } bool hasStdExtA() const { return HasStdExtA; } + bool hasStdExtF() const { return HasStdExtF; } + bool hasStdExtD() const { return HasStdExtD; } + bool hasStdExtC() const { return HasStdExtC; } bool is64Bit() const { return HasRV64; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index e12168b73999..e75fb3b701cd 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -75,6 +75,7 @@ class RISCVPassConfig : public TargetPassConfig { } bool addInstSelector() override; + void addPreEmitPass() override; }; } @@ -87,3 +88,5 @@ bool RISCVPassConfig::addInstSelector() { return false; } + +void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index a38545ecf430..f2438ee43075 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/TargetRegistry.h" @@ -301,8 +302,8 @@ namespace { } // end anonymous namespace MCAsmBackend *llvm::createSparcAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - return new ELFSparcAsmBackend(T, TT.getOS()); + return new ELFSparcAsmBackend(T, STI.getTargetTriple().getOS()); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 563e6f4efbe6..8390198479ba 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -40,8 +40,8 @@ Target &getTheSparcelTarget(); MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); std::unique_ptr createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index c07cc213c3ed..9864aa372354 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -306,8 +306,8 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const return !(MFI.hasCalls() // has calls || MRI.isPhysRegUsed(SP::L0) // Too many registers needed - || MRI.isPhysRegUsed(SP::O6) // %SP is used - || hasFP(MF)); // need %FP + || MRI.isPhysRegUsed(SP::O6) // %sp is used + || hasFP(MF)); // need %fp } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index b355b9c5a763..d9548ff90d7f 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -264,7 +264,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv, unsigned RetAddrOffset = 8; // Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 - if (MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction().hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo(); unsigned Reg = SFI->getSRetReturnReg(); if (!Reg) @@ -519,7 +519,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32( InVals.push_back(Load); } - if (MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction().hasStructRetAttr()) { // Copy the SRet Argument to SRetReturnReg. SparcMachineFunctionInfo *SFI = MF.getInfo(); unsigned Reg = SFI->getSRetReturnReg(); @@ -701,8 +701,8 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee, CalleeFn = dyn_cast(G->getGlobal()); } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { - const Function *Fn = DAG.getMachineFunction().getFunction(); - const Module *M = Fn->getParent(); + const Function &Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn.getParent(); const char *CalleeName = E->getSymbol(); CalleeFn = M->getFunction(CalleeName); } @@ -1057,8 +1057,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const CalleeFn = dyn_cast(G->getGlobal()); } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { - const Function *Fn = DAG.getMachineFunction().getFunction(); - const Module *M = Fn->getParent(); + const Function &F = DAG.getMachineFunction().getFunction(); + const Module *M = F.getParent(); const char *CalleeName = E->getSymbol(); CalleeFn = M->getFunction(CalleeName); if (!CalleeFn && isFP128ABICall(CalleeName)) @@ -1559,6 +1559,9 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + // Sparc has no select or setcc: expand to SELECT_CC. setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); @@ -1587,14 +1590,13 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::f32, Custom); - if (Subtarget->is64Bit()) { setOperationAction(ISD::ADDC, MVT::i64, Custom); setOperationAction(ISD::ADDE, MVT::i64, Custom); setOperationAction(ISD::SUBC, MVT::i64, Custom); setOperationAction(ISD::SUBE, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SETCC, MVT::i64, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Custom); @@ -1608,9 +1610,6 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ROTL , MVT::i64, Expand); setOperationAction(ISD::ROTR , MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); - - setOperationAction(ISD::BITCAST, MVT::i64, Custom); - setOperationAction(ISD::BITCAST, MVT::f64, Custom); } // ATOMICs. @@ -2426,76 +2425,23 @@ static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG, 1); } -SDValue SparcTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT SrcVT = Op.getOperand(0).getValueType(); - - EVT DstVT = Op.getValueType(); - - if (Subtarget->isVIS3()) { - if (DstVT == MVT::f32 && SrcVT == MVT::i32) { - return Op; // Legal - } else if (DstVT == MVT::f64 && SrcVT == MVT::i64) { - return (Subtarget->is64Bit()) - ? Op - : SDValue(); // Legal on 64 bit, otherwise Expand - } else if (DstVT == MVT::i64 && SrcVT == MVT::f64) { - return (Subtarget->is64Bit()) - ? Op - : SDValue(); // Legal on 64 bit, otherwise Expand - } - } - - // Expand - return SDValue(); -} - -SDValue SparcTargetLowering::LowerUINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG, + const SparcTargetLowering &TLI, + bool hasHardQuad) { SDLoc dl(Op); EVT OpVT = Op.getOperand(0).getValueType(); assert(OpVT == MVT::i32 || OpVT == MVT::i64); - // Expand f128 operations to fp128 ABI calls. - if (Op.getValueType() == MVT::f128 && - (!Subtarget->hasHardQuad() || !isTypeLegal(OpVT))) { - return LowerF128Op(Op, DAG, - getLibcallName(OpVT == MVT::i32 - ? RTLIB::UINTTOFP_I32_F128 - : RTLIB::UINTTOFP_I64_F128), - 1); - } - - // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't - // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform - // the optimization here. - if (DAG.SignBitIsZero(Op.getOperand(0))) { - - EVT floatVT = MVT::f32; - unsigned IntToFloatOpcode = SPISD::ITOF; - - if (OpVT == MVT::i64) { - floatVT = MVT::f64; - IntToFloatOpcode = SPISD::XTOF; - } - - // Convert the int value to FP in an FP register. - SDValue FloatTmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0)); - - return DAG.getNode(IntToFloatOpcode, dl, Op.getValueType(), FloatTmp); - } - - if (OpVT == MVT::i32 && Subtarget->is64Bit()) { - - SDValue Int64Tmp = - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op.getOperand(0)); - - SDValue Float64Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Int64Tmp); - - return DAG.getNode(SPISD::XTOF, dl, Op.getValueType(), Float64Tmp); - } + // Expand if it does not involve f128 or the target has support for + // quad floating point instructions and the operand type is legal. + if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT))) + return SDValue(); - return SDValue(); + return TLI.LowerF128Op(Op, DAG, + TLI.getLibcallName(OpVT == MVT::i32 + ? RTLIB::UINTTOFP_I32_F128 + : RTLIB::UINTTOFP_I64_F128), + 1); } static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, @@ -3113,7 +3059,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { hasHardQuad); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this, hasHardQuad); - case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this, + hasHardQuad); case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this, hasHardQuad); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this, @@ -3150,7 +3097,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::BITCAST: return LowerBITCAST(Op, DAG); } } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 501e16dc2d96..bf700d6a99d8 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -192,10 +192,6 @@ namespace llvm { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - bool ShouldShrinkFPConstant(EVT VT) const override { // Do not shrink FP constpool if VT == MVT::f128. // (ldd, call _Q_fdtoq) is more expensive than two ldds. diff --git a/lib/Target/Sparc/SparcInstrVIS.td b/lib/Target/Sparc/SparcInstrVIS.td index dc3aa45bedc9..d9adf3e8b0f5 100644 --- a/lib/Target/Sparc/SparcInstrVIS.td +++ b/lib/Target/Sparc/SparcInstrVIS.td @@ -243,21 +243,16 @@ def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd), (ins I64Regs:$rs2), "lzcnt $rs2, $rd", []>; let rs1 = 0 in { -def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), (ins FPRegs:$rs2), - "movstosw $rs2, $rd", - [(set I64Regs:$rd, (sext (i32 (bitconvert FPRegs:$rs2))))]>; -def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), (ins FPRegs:$rs2), - "movstouw $rs2, $rd", - [(set I64Regs:$rd, (zext (i32 (bitconvert FPRegs:$rs2))))]>; -def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), (ins DFPRegs:$rs2), - "movdtox $rs2, $rd", - [(set I64Regs:$rd, (bitconvert DFPRegs:$rs2))]>; -def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd), (ins IntRegs:$rs2), - "movwtos $rs2, $rd", - [(set FPRegs:$rd, (bitconvert i32:$rs2))]>; -def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), (ins I64Regs:$rs2), - "movxtod $rs2, $rd", - [(set DFPRegs:$rd, (bitconvert I64Regs:$rs2))]>; +def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd), + (ins DFPRegs:$rs2), "movstosw $rs2, $rd", []>; +def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd), + (ins DFPRegs:$rs2), "movstouw $rs2, $rd", []>; +def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd), + (ins DFPRegs:$rs2), "movdtox $rs2, $rd", []>; +def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd), + (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>; +def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd), + (ins I64Regs:$rs2), "movdtox $rs2, $rd", []>; } def PDISTN : VISInst<0b000111111, "pdistn">; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index e035c3b87a40..5cd4a7daf0fa 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSubtargetInfo.h" using namespace llvm; @@ -122,9 +123,10 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count, } MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options) { - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); + uint8_t OSABI = + MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS()); return new SystemZMCAsmBackend(OSABI); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index 99b157e37275..ed1b1b95b8f3 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -89,8 +89,8 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createSystemZMCAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); std::unique_ptr createSystemZObjectWriter(raw_pwrite_stream &OS, diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index 41300a1b6295..06905fb41e44 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -58,7 +58,7 @@ include "SystemZInstrHFP.td" include "SystemZInstrDFP.td" include "SystemZInstrSystem.td" -def SystemZInstrInfo : InstrInfo {} +def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; } //===----------------------------------------------------------------------===// // Assembly parser diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index 449c2f8cb78f..2df8985f85b3 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -86,9 +86,11 @@ class SystemZElimCompare : public MachineFunctionPass { SmallVectorImpl &CCUsers); bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, SmallVectorImpl &CCUsers); - bool convertToLoadAndTest(MachineInstr &MI); + bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl &CCUsers); bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, - SmallVectorImpl &CCUsers); + SmallVectorImpl &CCUsers, + unsigned ConvOpc = 0); bool optimizeCompareZero(MachineInstr &Compare, SmallVectorImpl &CCUsers); bool fuseCompareOperations(MachineInstr &Compare, @@ -282,9 +284,13 @@ bool SystemZElimCompare::convertToLoadAndTrap( // If MI is a load instruction, try to convert it into a LOAD AND TEST. // Return true on success. -bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) { +bool SystemZElimCompare::convertToLoadAndTest( + MachineInstr &MI, MachineInstr &Compare, + SmallVectorImpl &CCUsers) { + + // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. unsigned Opcode = TII->getLoadAndTest(MI.getOpcode()); - if (!Opcode) + if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, Opcode)) return false; MI.setDesc(TII->get(Opcode)); @@ -294,14 +300,16 @@ bool SystemZElimCompare::convertToLoadAndTest(MachineInstr &MI) { } // The CC users in CCUsers are testing the result of a comparison of some -// value X against zero and we know that any CC value produced by MI -// would also reflect the value of X. Try to adjust CCUsers so that -// they test the result of MI directly, returning true on success. -// Leave everything unchanged on failure. +// value X against zero and we know that any CC value produced by MI would +// also reflect the value of X. ConvOpc may be used to pass the transfomed +// opcode MI will have if this succeeds. Try to adjust CCUsers so that they +// test the result of MI directly, returning true on success. Leave +// everything unchanged on failure. bool SystemZElimCompare::adjustCCMasksForInstr( MachineInstr &MI, MachineInstr &Compare, - SmallVectorImpl &CCUsers) { - int Opcode = MI.getOpcode(); + SmallVectorImpl &CCUsers, + unsigned ConvOpc) { + int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; @@ -319,53 +327,72 @@ bool SystemZElimCompare::adjustCCMasksForInstr( unsigned CCValues = SystemZII::getCCValues(MIFlags); assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues"); - // Now check whether these flags are enough for all users. - SmallVector AlterMasks; - for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { - MachineInstr *MI = CCUsers[I]; - - // Fail if this isn't a use of CC that we understand. - unsigned Flags = MI->getDesc().TSFlags; - unsigned FirstOpNum; - if (Flags & SystemZII::CCMaskFirst) - FirstOpNum = 0; - else if (Flags & SystemZII::CCMaskLast) - FirstOpNum = MI->getNumExplicitOperands() - 2; - else - return false; - - // Check whether the instruction predicate treats all CC values - // outside of ReusableCCMask in the same way. In that case it - // doesn't matter what those CC values mean. - unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); - unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); - unsigned OutValid = ~ReusableCCMask & CCValid; - unsigned OutMask = ~ReusableCCMask & CCMask; - if (OutMask != 0 && OutMask != OutValid) - return false; + bool MIEquivalentToCmp = + (ReusableCCMask == CCValues && + CCValues == SystemZII::getCCValues(CompareFlags)); + + if (!MIEquivalentToCmp) { + // Now check whether these flags are enough for all users. + SmallVector AlterMasks; + for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { + MachineInstr *MI = CCUsers[I]; + + // Fail if this isn't a use of CC that we understand. + unsigned Flags = MI->getDesc().TSFlags; + unsigned FirstOpNum; + if (Flags & SystemZII::CCMaskFirst) + FirstOpNum = 0; + else if (Flags & SystemZII::CCMaskLast) + FirstOpNum = MI->getNumExplicitOperands() - 2; + else + return false; + + // Check whether the instruction predicate treats all CC values + // outside of ReusableCCMask in the same way. In that case it + // doesn't matter what those CC values mean. + unsigned CCValid = MI->getOperand(FirstOpNum).getImm(); + unsigned CCMask = MI->getOperand(FirstOpNum + 1).getImm(); + unsigned OutValid = ~ReusableCCMask & CCValid; + unsigned OutMask = ~ReusableCCMask & CCMask; + if (OutMask != 0 && OutMask != OutValid) + return false; + + AlterMasks.push_back(&MI->getOperand(FirstOpNum)); + AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + } - AlterMasks.push_back(&MI->getOperand(FirstOpNum)); - AlterMasks.push_back(&MI->getOperand(FirstOpNum + 1)); + // All users are OK. Adjust the masks for MI. + for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { + AlterMasks[I]->setImm(CCValues); + unsigned CCMask = AlterMasks[I + 1]->getImm(); + if (CCMask & ~ReusableCCMask) + AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | + (CCValues & ~ReusableCCMask)); + } } - // All users are OK. Adjust the masks for MI. - for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { - AlterMasks[I]->setImm(CCValues); - unsigned CCMask = AlterMasks[I + 1]->getImm(); - if (CCMask & ~ReusableCCMask) - AlterMasks[I + 1]->setImm((CCMask & ReusableCCMask) | - (CCValues & ~ReusableCCMask)); + // CC is now live after MI. + if (!ConvOpc) { + int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); + assert(CCDef >= 0 && "Couldn't find CC set"); + MI.getOperand(CCDef).setIsDead(false); } - // CC is now live after MI. - int CCDef = MI.findRegisterDefOperandIdx(SystemZ::CC, false, true, TRI); - assert(CCDef >= 0 && "Couldn't find CC set"); - MI.getOperand(CCDef).setIsDead(false); + // Check if MI lies before Compare. + bool BeforeCmp = false; + MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) + if (MBBI == Compare) { + BeforeCmp = true; + break; + } // Clear any intervening kills of CC. - MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; - for (++MBBI; MBBI != MBBE; ++MBBI) - MBBI->clearRegisterKills(SystemZ::CC, TRI); + if (BeforeCmp) { + MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) + MBBI->clearRegisterKills(SystemZ::CC, TRI); + } return true; } @@ -419,7 +446,7 @@ bool SystemZElimCompare::optimizeCompareZero( } } // Try to eliminate Compare by reusing a CC result from MI. - if ((!CCRefs && convertToLoadAndTest(MI)) || + if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { EliminatedComparisons += 1; return true; @@ -434,17 +461,14 @@ bool SystemZElimCompare::optimizeCompareZero( } // Also do a forward search to handle cases where an instruction after the - // compare can be converted like - // - // LTEBRCompare %F0S, %F0S, %CC LTEBRCompare %F0S, %F0S, %CC - // %F2S = LER %F0S - // + // compare can be converted, like + // LTEBRCompare %f0s, %f0s; %f2s = LER %f0s => LTEBRCompare %f2s, %f0s MBBI = Compare, MBBE = MBB.end(); while (++MBBI != MBBE) { MachineInstr &MI = *MBBI; if (preservesValueOf(MI, SrcReg)) { // Try to eliminate Compare by reusing a CC result from MI. - if (convertToLoadAndTest(MI)) { + if (convertToLoadAndTest(MI, Compare, CCUsers)) { EliminatedComparisons += 1; return true; } @@ -593,7 +617,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { } bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; TII = static_cast(F.getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 3183c3acc69a..b600aa61cd0b 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -71,7 +71,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); bool HasFP = hasFP(MF); SystemZMachineFunctionInfo *MFI = MF.getInfo(); - bool IsVarArg = MF.getFunction()->isVarArg(); + bool IsVarArg = MF.getFunction().isVarArg(); // va_start stores incoming FPR varargs in the normal way, but delegates // the saving of incoming GPR varargs to spillCalleeSavedRegisters(). @@ -139,7 +139,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); SystemZMachineFunctionInfo *ZFI = MF.getInfo(); - bool IsVarArg = MF.getFunction()->isVarArg(); + bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; // Scan the call-saved GPRs and find the bounds of the register spill area. @@ -374,7 +374,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, uint64_t StackSize = getAllocatedStackSize(MF); if (StackSize) { // Determine if we want to store a backchain. - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); // If we need backchain, save current stack pointer. R1 is free at this // point. diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 3073d2fcde1d..81175013ed2a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -330,6 +330,9 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // to X. bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; + // Try to expand a boolean SELECT_CCMASK using an IPM sequence. + SDValue expandSelectBoolean(SDNode *Node); + public: SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) : SelectionDAGISel(TM, OptLevel) {} @@ -348,6 +351,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; + void PreprocessISelDAG() override; // Include the pieces autogenerated from the target description. #include "SystemZGenDAGISel.inc" @@ -838,9 +842,16 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { case ISD::SIGN_EXTEND: { // Check that the extension bits are don't-care (i.e. are masked out // by the final mask). + unsigned BitSize = N.getValueSizeInBits(); unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits(); - if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize))) - return false; + if (maskMatters(RxSBG, allOnes(BitSize) - allOnes(InnerBitSize))) { + // In the case where only the sign bit is active, increase Rotate with + // the extension width. + if (RxSBG.Mask == 1 && RxSBG.Rotate == 1) + RxSBG.Rotate += (BitSize - InnerBitSize); + else + return false; + } RxSBG.Input = N.getOperand(0); return true; @@ -1431,3 +1442,182 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, return true; } + +namespace { +// Represents a sequence for extracting a 0/1 value from an IPM result: +// (((X ^ XORValue) + AddValue) >> Bit) +struct IPMConversion { + IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) + : XORValue(xorValue), AddValue(addValue), Bit(bit) {} + + int64_t XORValue; + int64_t AddValue; + unsigned Bit; +}; +} // end anonymous namespace + +// Return a sequence for getting a 1 from an IPM result when CC has a +// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. +// The handling of CC values outside CCValid doesn't matter. +static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { + // Deal with cases where the result can be taken directly from a bit + // of the IPM result. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC); + if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) + return IPMConversion(0, 0, SystemZ::IPM_CC + 1); + + // Deal with cases where we can add a value to force the sign bit + // to contain the right value. Putting the bit in 31 means we can + // use SRL rather than RISBG(L), and also makes it easier to get a + // 0/-1 value, so it has priority over the other tests below. + // + // These sequences rely on the fact that the upper two bits of the + // IPM result are zero. + uint64_t TopBit = uint64_t(1) << 31; + if (CCMask == (CCValid & SystemZ::CCMASK_0)) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) + return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_2))) + return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_3)) + return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_1 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); + + // Next try inverting the value and testing a bit. 0/1 could be + // handled this way too, but we dealt with that case above. + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) + return IPMConversion(-1, 0, SystemZ::IPM_CC); + + // Handle cases where adding a value forces a non-sign bit to contain + // the right value. + if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) + return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) + return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); + + // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are + // can be done by inverting the low CC bit and applying one of the + // sign-based extractions above. + if (CCMask == (CCValid & SystemZ::CCMASK_1)) + return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & SystemZ::CCMASK_2)) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_1 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); + if (CCMask == (CCValid & (SystemZ::CCMASK_0 + | SystemZ::CCMASK_2 + | SystemZ::CCMASK_3))) + return IPMConversion(1 << SystemZ::IPM_CC, + TopBit - (1 << SystemZ::IPM_CC), 31); + + llvm_unreachable("Unexpected CC combination"); +} + +SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { + auto *TrueOp = dyn_cast(Node->getOperand(0)); + auto *FalseOp = dyn_cast(Node->getOperand(1)); + if (!TrueOp || !FalseOp) + return SDValue(); + if (FalseOp->getZExtValue() != 0) + return SDValue(); + if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1) + return SDValue(); + + auto *CCValidOp = dyn_cast(Node->getOperand(2)); + auto *CCMaskOp = dyn_cast(Node->getOperand(3)); + if (!CCValidOp || !CCMaskOp) + return SDValue(); + int CCValid = CCValidOp->getZExtValue(); + int CCMask = CCMaskOp->getZExtValue(); + + SDLoc DL(Node); + SDValue Glue = Node->getOperand(4); + IPMConversion IPM = getIPMConversion(CCValid, CCMask); + SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, Glue); + + if (IPM.XORValue) + Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.XORValue, DL, MVT::i32)); + + if (IPM.AddValue) + Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.AddValue, DL, MVT::i32)); + + EVT VT = Node->getValueType(0); + if (VT == MVT::i32 && IPM.Bit == 31) { + unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA; + Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result, + CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); + } else { + if (VT != MVT::i32) + Result = CurDAG->getNode(ISD::ANY_EXTEND, DL, VT, Result); + + if (TrueOp->getSExtValue() == 1) { + // The SHR/AND sequence should get optimized to an RISBG. + Result = CurDAG->getNode(ISD::SRL, DL, VT, Result, + CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); + Result = CurDAG->getNode(ISD::AND, DL, VT, Result, + CurDAG->getConstant(1, DL, VT)); + } else { + // Sign-extend from IPM.Bit using a pair of shifts. + int ShlAmt = VT.getSizeInBits() - 1 - IPM.Bit; + int SraAmt = VT.getSizeInBits() - 1; + Result = CurDAG->getNode(ISD::SHL, DL, VT, Result, + CurDAG->getConstant(ShlAmt, DL, MVT::i32)); + Result = CurDAG->getNode(ISD::SRA, DL, VT, Result, + CurDAG->getConstant(SraAmt, DL, MVT::i32)); + } + } + + return Result; +} + +void SystemZDAGToDAGISel::PreprocessISelDAG() { + // If we have conditional immediate loads, we always prefer + // using those over an IPM sequence. + if (Subtarget->hasLoadStoreOnCond2()) + return; + + bool MadeChange = false; + + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *N = &*I++; + if (N->use_empty()) + continue; + + SDValue Res; + switch (N->getOpcode()) { + default: break; + case SystemZISD::SELECT_CCMASK: + Res = expandSelectBoolean(N); + break; + } + + if (Res) { + DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\nNew: "); + DEBUG(Res.getNode()->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + MadeChange = true; + } + } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); +} + diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d49d7316e682..0d29676f5007 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -31,17 +31,6 @@ using namespace llvm; #define DEBUG_TYPE "systemz-lower" namespace { -// Represents a sequence for extracting a 0/1 value from an IPM result: -// (((X ^ XORValue) + AddValue) >> Bit) -struct IPMConversion { - IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) - : XORValue(xorValue), AddValue(addValue), Bit(bit) {} - - int64_t XORValue; - int64_t AddValue; - unsigned Bit; -}; - // Represents information about a comparison. struct Comparison { Comparison(SDValue Op0In, SDValue Op1In) @@ -517,7 +506,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VAEND, MVT::Other, Expand); // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); @@ -1699,73 +1690,6 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) { #undef CONV } -// Return a sequence for getting a 1 from an IPM result when CC has a -// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. -// The handling of CC values outside CCValid doesn't matter. -static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { - // Deal with cases where the result can be taken directly from a bit - // of the IPM result. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC); - if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) - return IPMConversion(0, 0, SystemZ::IPM_CC + 1); - - // Deal with cases where we can add a value to force the sign bit - // to contain the right value. Putting the bit in 31 means we can - // use SRL rather than RISBG(L), and also makes it easier to get a - // 0/-1 value, so it has priority over the other tests below. - // - // These sequences rely on the fact that the upper two bits of the - // IPM result are zero. - uint64_t TopBit = uint64_t(1) << 31; - if (CCMask == (CCValid & SystemZ::CCMASK_0)) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) - return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_2))) - return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_3)) - return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_1 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); - - // Next try inverting the value and testing a bit. 0/1 could be - // handled this way too, but we dealt with that case above. - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) - return IPMConversion(-1, 0, SystemZ::IPM_CC); - - // Handle cases where adding a value forces a non-sign bit to contain - // the right value. - if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) - return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) - return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); - - // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are - // can be done by inverting the low CC bit and applying one of the - // sign-based extractions above. - if (CCMask == (CCValid & SystemZ::CCMASK_1)) - return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & SystemZ::CCMASK_2)) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_1 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); - if (CCMask == (CCValid & (SystemZ::CCMASK_0 - | SystemZ::CCMASK_2 - | SystemZ::CCMASK_3))) - return IPMConversion(1 << SystemZ::IPM_CC, - TopBit - (1 << SystemZ::IPM_CC), 31); - - llvm_unreachable("Unexpected CC combination"); -} - // If C can be converted to a comparison against zero, adjust the operands // as necessary. static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { @@ -1844,11 +1768,14 @@ static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, ISD::SEXTLOAD : ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || - Load->getExtensionType() != ExtType) + Load->getExtensionType() != ExtType) { C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->getAlignment(), Load->getMemOperand()->getFlags()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); + } // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || @@ -2198,6 +2125,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal >> ShiftVal != 0) && + ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, @@ -2208,6 +2136,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal << ShiftVal != 0) && + ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, @@ -2232,6 +2161,24 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, C.CCMask = NewCCMask; } +// See whether the comparison argument contains a redundant AND +// and remove it if so. This sometimes happens due to the generic +// BRCOND expansion. +static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, + Comparison &C) { + if (C.Op0.getOpcode() != ISD::AND) + return; + auto *Mask = dyn_cast(C.Op0.getOperand(1)); + if (!Mask) + return; + KnownBits Known; + DAG.computeKnownBits(C.Op0.getOperand(0), Known); + if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) + return; + + C.Op0 = C.Op0.getOperand(0); +} + // Return a Comparison that tests the condition-code result of intrinsic // node Call against constant integer CC using comparison code Cond. // Opcode is the opcode of the SystemZISD operation for the intrinsic @@ -2306,6 +2253,7 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; + adjustForRedundantAnd(DAG, DL, C); adjustZeroCmp(DAG, DL, C); adjustSubwordCmp(DAG, DL, C); adjustForSubtraction(DAG, DL, C); @@ -2383,24 +2331,11 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue Glue, unsigned CCValid, unsigned CCMask) { - IPMConversion Conversion = getIPMConversion(CCValid, CCMask); - SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - - if (Conversion.XORValue) - Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, - DAG.getConstant(Conversion.XORValue, DL, MVT::i32)); - - if (Conversion.AddValue) - Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, - DAG.getConstant(Conversion.AddValue, DL, MVT::i32)); - - // The SHR/AND sequence should get optimized to an RISBG. - Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, - DAG.getConstant(Conversion.Bit, DL, MVT::i32)); - if (Conversion.Bit != 31) - Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, - DAG.getConstant(1, DL, MVT::i32)); - return Result; + SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(CCValid, DL, MVT::i32), + DAG.getConstant(CCMask, DL, MVT::i32), Glue }; + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot @@ -2615,35 +2550,10 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, } SDValue Glue = emitCmp(DAG, DL, C); - - // Special case for handling -1/0 results. The shifts we use here - // should get optimized with the IPM conversion sequence. - auto *TrueC = dyn_cast(TrueOp); - auto *FalseC = dyn_cast(FalseOp); - if (TrueC && FalseC) { - int64_t TrueVal = TrueC->getSExtValue(); - int64_t FalseVal = FalseC->getSExtValue(); - if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { - // Invert the condition if we want -1 on false. - if (TrueVal == 0) - C.CCMask ^= C.CCValid; - SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); - EVT VT = Op.getValueType(); - // Extend the result to VT. Upper bits are ignored. - if (!is32Bit(VT)) - Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); - // Sign-extend from the low bit. - SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, DL, MVT::i32); - SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); - return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); - } - } - SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32), DAG.getConstant(C.CCMask, DL, MVT::i32), Glue}; - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -2940,9 +2850,13 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, // but we need this case for bitcasts that are created during lowering // and which are then lowered themselves. if (auto *LoadN = dyn_cast(In)) - if (ISD::isNormalLoad(LoadN)) - return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), - LoadN->getMemOperand()); + if (ISD::isNormalLoad(LoadN)) { + SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), + LoadN->getBasePtr(), LoadN->getMemOperand()); + // Update the chain uses. + DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); + return NewLoad; + } if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; @@ -3030,8 +2944,8 @@ SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); - bool RealignOpt = !MF.getFunction()-> hasFnAttribute("no-realign-stack"); - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -3563,7 +3477,7 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo()->setManipulatesSP(true); - bool StoreBackchain = MF.getFunction()->hasFnAttribute("backchain"); + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); @@ -5165,6 +5079,54 @@ SDValue SystemZTargetLowering::combineTruncateExtract( return SDValue(); } +SDValue SystemZTargetLowering::combineZERO_EXTEND( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { + auto *TrueOp = dyn_cast(N0.getOperand(0)); + auto *FalseOp = dyn_cast(N0.getOperand(1)); + if (TrueOp && FalseOp) { + SDLoc DL(N0); + SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), + DAG.getConstant(FalseOp->getZExtValue(), DL, VT), + N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; + SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); + // If N0 has multiple uses, change other uses as well. + if (!N0.hasOneUse()) { + SDValue TruncSelect = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); + DCI.CombineTo(N0.getNode(), TruncSelect); + } + return NewSelect; + } + } + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( + SDNode *N, DAGCombinerInfo &DCI) const { + // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) + // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) + // into (select_cc LHS, RHS, -1, 0, COND) + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast(N->getOperand(1))->getVT(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) + N0 = N0.getOperand(0); + if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { + SDLoc DL(N0); + SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + return SDValue(); +} + SDValue SystemZTargetLowering::combineSIGN_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext (ashr (shl X, C1), C2)) to @@ -5466,11 +5428,135 @@ SDValue SystemZTargetLowering::combineSHIFTROT( return SDValue(); } +static bool combineCCMask(SDValue &Glue, int &CCValid, int &CCMask) { + // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code + // set by the glued instruction using the CCValid / CCMask masks, + // If the glued instruction is itself a (ICMP (SELECT_CCMASK)) testing + // the condition code set by some other instruction, see whether we + // can directly use that condition code. + bool Invert = false; + + // Verify that we have an appropriate mask for a EQ or NE comparison. + if (CCValid != SystemZ::CCMASK_ICMP) + return false; + if (CCMask == SystemZ::CCMASK_CMP_NE) + Invert = !Invert; + else if (CCMask != SystemZ::CCMASK_CMP_EQ) + return false; + + // Verify that we have an ICMP that is the single user of a SELECT_CCMASK. + SDNode *ICmp = Glue.getNode(); + if (ICmp->getOpcode() != SystemZISD::ICMP) + return false; + SDNode *Select = ICmp->getOperand(0).getNode(); + if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) + return false; + if (!Select->hasOneUse()) + return false; + + // Verify that the ICMP compares against one of select values. + auto *CompareVal = dyn_cast(ICmp->getOperand(1)); + if (!CompareVal) + return false; + auto *TrueVal = dyn_cast(Select->getOperand(0)); + if (!TrueVal) + return false; + auto *FalseVal = dyn_cast(Select->getOperand(1)); + if (!FalseVal) + return false; + if (CompareVal->getZExtValue() == FalseVal->getZExtValue()) + Invert = !Invert; + else if (CompareVal->getZExtValue() != TrueVal->getZExtValue()) + return false; + + // Compute the effective CC mask for the new branch or select. + auto *NewCCValid = dyn_cast(Select->getOperand(2)); + auto *NewCCMask = dyn_cast(Select->getOperand(3)); + if (!NewCCValid || !NewCCMask) + return false; + CCValid = NewCCValid->getZExtValue(); + CCMask = NewCCMask->getZExtValue(); + if (Invert) + CCMask ^= CCValid; + + // Return the updated Glue link. + Glue = Select->getOperand(4); + return true; +} + +static bool combineMergeChains(SDValue &Chain, SDValue Glue) { + // We are about to glue an instruction with input chain Chain to the + // instruction Glue. Verify that this would not create an invalid + // topological sort due to intervening chain nodes. + + SDNode *Node = Glue.getNode(); + for (int ResNo = Node->getNumValues() - 1; ResNo >= 0; --ResNo) + if (Node->getValueType(ResNo) == MVT::Other) { + SDValue OutChain = SDValue(Node, ResNo); + // FIXME: We should be able to at least handle an intervening + // TokenFactor node by swapping chains around a bit ... + return Chain == OutChain; + } + + return true; +} + +SDValue SystemZTargetLowering::combineBR_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. + auto *CCValid = dyn_cast(N->getOperand(1)); + auto *CCMask = dyn_cast(N->getOperand(2)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Chain = N->getOperand(0); + SDValue Glue = N->getOperand(4); + + if (combineCCMask(Glue, CCValidVal, CCMaskVal) + && combineMergeChains(Chain, Glue)) + return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), + Chain, + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + N->getOperand(3), Glue); + return SDValue(); +} + +SDValue SystemZTargetLowering::combineSELECT_CCMASK( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. + auto *CCValid = dyn_cast(N->getOperand(2)); + auto *CCMask = dyn_cast(N->getOperand(3)); + if (!CCValid || !CCMask) + return SDValue(); + + int CCValidVal = CCValid->getZExtValue(); + int CCMaskVal = CCMask->getZExtValue(); + SDValue Glue = N->getOperand(4); + + if (combineCCMask(Glue, CCValidVal, CCMaskVal)) + return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), + N->getOperand(0), + N->getOperand(1), + DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32), + DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32), + Glue); + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { default: break; + case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); + case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); @@ -5482,11 +5568,37 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRA: case ISD::SRL: case ISD::ROTL: return combineSHIFTROT(N, DCI); + case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); + case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); } return SDValue(); } +void +SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + unsigned BitWidth = Known.getBitWidth(); + + Known.resetAll(); + switch (Op.getOpcode()) { + case SystemZISD::SELECT_CCMASK: { + KnownBits TrueKnown(BitWidth), FalseKnown(BitWidth); + DAG.computeKnownBits(Op.getOperand(0), TrueKnown, Depth + 1); + DAG.computeKnownBits(Op.getOperand(1), FalseKnown, Depth + 1); + Known.Zero = TrueKnown.Zero & FalseKnown.Zero; + Known.One = TrueKnown.One & FalseKnown.One; + break; + } + + default: + break; + } +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 2cdc88db5a4d..be20cd619969 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -490,6 +490,14 @@ class SystemZTargetLowering : public TargetLowering { SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + /// Determine which of the bits specified in Mask are known to be either + /// zero or one and return them in the KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + ISD::NodeType getExtendForAtomicOps() const override { return ISD::ANY_EXTEND; } @@ -563,7 +571,9 @@ class SystemZTargetLowering : public TargetLowering { bool Force) const; SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const; + SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const; @@ -571,6 +581,8 @@ class SystemZTargetLowering : public TargetLowering { SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 02aeaadad0d9..16edbea87cda 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -7,6 +7,9 @@ // //===----------------------------------------------------------------------===// +// TODO: Most floating-point instructions (except for simple moves and the +// like) can raise exceptions -- should they have hasSideEffects=1 ? + //===----------------------------------------------------------------------===// // Select instructions //===----------------------------------------------------------------------===// @@ -29,22 +32,20 @@ defm CondStoreF64 : CondStores; def LZDR : InherentRRE<"lzdr", 0xB375, FP64, fpimm0>; def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>; } // Moves between two floating-point registers. -let hasSideEffects = 0 in { - def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; - def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; - def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; +def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>; +def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>; +def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>; - // For z13 we prefer LDR over LER to avoid partial register dependencies. - let isCodeGenOnly = 1 in - def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; -} +// For z13 we prefer LDR over LER to avoid partial register dependencies. +let isCodeGenOnly = 1 in + def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>; // Moves between two floating-point registers that also set the condition // codes. @@ -130,7 +131,7 @@ defm LoadStoreF128 : MVCLoadStore; // Load instructions //===----------------------------------------------------------------------===// -let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; @@ -150,7 +151,7 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { // Store instructions //===----------------------------------------------------------------------===// -let SimpleBDXStore = 1 in { +let SimpleBDXStore = 1, mayStore = 1 in { defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32, 4>; defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64, 8>; @@ -525,11 +526,14 @@ let Defs = [CC], CCValues = 0xC in { //===----------------------------------------------------------------------===// let hasSideEffects = 1 in { - def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; - def STFPC : StoreInherentS<"stfpc", 0xB29C, storei, 4>; + let mayLoad = 1, mayStore = 1 in { + // TODO: EFPC and SFPC do not touch memory at all + def EFPC : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>; + def STFPC : StoreInherentS<"stfpc", 0xB29C, storei, 4>; - def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; - def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu, 4>; + def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>; + def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu, 4>; + } def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>; def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 033a0a879d37..06da66ad8764 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -21,6 +21,10 @@ class InstSystemZ opcode> : InstRXYb { let CCMaskFirst = 1; + let mayLoad = 1; } class AsmCondBranchRXY opcode> : InstRXYb; + mnemonic#"\t$M1, $XBD2", []> { + let mayLoad = 1; +} class FixedCondBranchRXY opcode, SDPatternOperator operator = null_frag> @@ -2113,6 +2120,7 @@ class FixedCondBranchRXY opcode, [(operator (load bdxaddr20only:$XBD2))]> { let isAsmParserOnly = V.alternate; let M1 = V.ccmask; + let mayLoad = 1; } class CmpBranchRIEa opcode, @@ -2784,7 +2792,6 @@ multiclass CondUnaryRSYPair opcode, def Asm : AsmCondUnaryRSY; } - class UnaryRX opcode, SDPatternOperator operator, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -4688,7 +4695,8 @@ class SelectWrapper // Stores $new to $addr if $cc is true ("" case) or false (Inv case). multiclass CondStores { - let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { + let Defs = [CC], Uses = [CC], usesCustomInserter = 1, + mayLoad = 1, mayStore = 1 in { def "" : Pseudo<(outs), (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask cls:$new, (load mode:$addr), diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 62948817ce7e..572446c1aa12 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -18,7 +18,7 @@ #include "SystemZSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 55a796cddf43..5c874dea0874 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -11,24 +11,25 @@ // Stack allocation //===----------------------------------------------------------------------===// -let hasNoSchedulingInfo = 1 in { +// The callseq_start node requires the hasSideEffects flag, even though these +// instructions are noops on SystemZ. +let hasNoSchedulingInfo = 1, hasSideEffects = 1 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } -let hasSideEffects = 0 in { - // Takes as input the value of the stack pointer after a dynamic allocation - // has been made. Sets the output to the address of the dynamically- - // allocated area itself, skipping the outgoing arguments. - // - // This expands to an LA or LAY instruction. We restrict the offset - // to the range of LA and keep the LAY range in reserve for when - // the size of the outgoing arguments is added. - def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), - [(set GR64:$dst, dynalloc12only:$src)]>; -} +// Takes as input the value of the stack pointer after a dynamic allocation +// has been made. Sets the output to the address of the dynamically- +// allocated area itself, skipping the outgoing arguments. +// +// This expands to an LA or LAY instruction. We restrict the offset +// to the range of LA and keep the LAY range in reserve for when +// the size of the outgoing arguments is added. +def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src), + [(set GR64:$dst, dynalloc12only:$src)]>; + //===----------------------------------------------------------------------===// // Branch instructions @@ -197,15 +198,15 @@ let isBranch = 1, isTerminator = 1 in { //===----------------------------------------------------------------------===// // Unconditional trap. -let hasCtrlDep = 1 in +let hasCtrlDep = 1, hasSideEffects = 1 in def Trap : Alias<4, (outs), (ins), [(trap)]>; // Conditional trap. -let hasCtrlDep = 1, Uses = [CC] in +let hasCtrlDep = 1, Uses = [CC], hasSideEffects = 1 in def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>; // Fused compare-and-trap instructions. -let hasCtrlDep = 1 in { +let hasCtrlDep = 1, hasSideEffects = 1 in { // These patterns work the same way as for compare-and-branch. defm CRT : CmpBranchRRFcPair<"crt", 0xB972, GR32>; defm CGRT : CmpBranchRRFcPair<"cgrt", 0xB960, GR64>; @@ -360,13 +361,12 @@ defm CondStore64 : CondStores, - Requires<[FeatureHighWord]>; - def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; - def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; -} +// Expands to LR, RISBHG or RISBLG, depending on the choice of registers. +def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>; +def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>; + let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { def LTR : UnaryRR <"ltr", 0x12, null_frag, GR32, GR32>; def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>; @@ -376,8 +376,7 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in def PAIR128 : Pseudo<(outs GR128:$dst), (ins GR64:$hi, GR64:$lo), []>; // Immediate moves. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in { +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF, // deopending on the choice of register. def LHIMux : UnaryRIPseudo, @@ -398,7 +397,7 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, } // Register loads. -let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { +let canFoldAsLoad = 1, SimpleBDXLoad = 1, mayLoad = 1 in { // Expands to L, LY or LFH, depending on the choice of register. def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, Requires<[FeatureHighWord]>; @@ -435,14 +434,14 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in { } // Load and trap. -let Predicates = [FeatureLoadAndTrap] in { +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { def LAT : UnaryRXY<"lat", 0xE39F, null_frag, GR32, 4>; def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>; def LGAT : UnaryRXY<"lgat", 0xE385, null_frag, GR64, 8>; } // Register stores. -let SimpleBDXStore = 1 in { +let SimpleBDXStore = 1, mayStore = 1 in { // Expands to ST, STY or STFH, depending on the choice of register. def STMux : StoreRXYPseudo, Requires<[FeatureHighWord]>; @@ -489,17 +488,16 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { // Load immediate on condition. Matched via DAG pattern and created // by the PeepholeOptimizer via FoldImmediate. - let hasSideEffects = 0 in { - // Expands to LOCHI or LOCHHI, depending on the choice of register. - def LOCHIMux : CondBinaryRIEPseudo; - defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; - defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; - } + + // Expands to LOCHI or LOCHHI, depending on the choice of register. + def LOCHIMux : CondBinaryRIEPseudo; + defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; + defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; // Move register on condition. Expanded from Select* pseudos and // created by early if-conversion. - let hasSideEffects = 0, isCommutable = 1 in { + let isCommutable = 1 in { // Expands to LOCR or LOCFHR or a branch-and-move sequence, // depending on the choice of registers. def LOCRMux : CondBinaryRRFPseudo; @@ -534,7 +532,7 @@ let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in { let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { // Move register on condition. Expanded from Select* pseudos and // created by early if-conversion. - let hasSideEffects = 0, isCommutable = 1 in { + let isCommutable = 1 in { defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>; defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>; } @@ -570,17 +568,14 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in { //===----------------------------------------------------------------------===// // 32-bit extensions from registers. -let hasSideEffects = 0 in { - def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; - def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; -} +def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>; +def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>; // 64-bit extensions from registers. -let hasSideEffects = 0 in { - def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; - def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; - def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; -} +def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>; +def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>; +def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>; + let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>; @@ -620,23 +615,20 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in //===----------------------------------------------------------------------===// // 32-bit extensions from registers. -let hasSideEffects = 0 in { - // Expands to LLCR or RISB[LH]G, depending on the choice of registers. - def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>, - Requires<[FeatureHighWord]>; - def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; - // Expands to LLHR or RISB[LH]G, depending on the choice of registers. - def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>, - Requires<[FeatureHighWord]>; - def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; -} + +// Expands to LLCR or RISB[LH]G, depending on the choice of registers. +def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>; +// Expands to LLHR or RISB[LH]G, depending on the choice of registers. +def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>, + Requires<[FeatureHighWord]>; +def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>; // 64-bit extensions from registers. -let hasSideEffects = 0 in { - def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; - def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; - def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; -} +def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>; +def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>; +def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>; // Match 32-to-64-bit zero extensions in which the source is already // in a 64-bit register. @@ -683,7 +675,7 @@ let Predicates = [FeatureLoadAndZeroRightmostByte] in { } // Load and trap. -let Predicates = [FeatureLoadAndTrap] in { +let Predicates = [FeatureLoadAndTrap], hasSideEffects = 1 in { def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>; def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; } @@ -760,10 +752,8 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>; //===----------------------------------------------------------------------===// // Byte-swapping register moves. -let hasSideEffects = 0 in { - def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; - def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; -} +def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>; +def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>; // Byte-swapping loads. Unlike normal loads, these instructions are // allowed to access storage more than once. @@ -785,13 +775,12 @@ let mayLoad = 1, mayStore = 1 in //===----------------------------------------------------------------------===// // Load BDX-style addresses. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1 in +let isAsCheapAsAMove = 1, isReMaterializable = 1 in defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>; // Load a PC-relative address. There's no version of this instruction // with a 16-bit offset, so there's no relaxation. -let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in +let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>; // Load the Global Offset Table address. This will be lowered into a @@ -1267,6 +1256,7 @@ def MGRK : BinaryRRFa<"mgrk", 0xB9EC, null_frag, GR128, GR64, GR64>, Requires<[FeatureMiscellaneousExtensions2]>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; + def : Pat<(z_smul_lohi GR64:$src1, GR64:$src2), (MGRK GR64:$src1, GR64:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), @@ -1279,6 +1269,7 @@ def MG : BinaryRXY<"mg", 0xE384, null_frag, GR128, load, 8>, Requires<[FeatureMiscellaneousExtensions2]>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; + def : Pat<(z_smul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), (MG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), @@ -1328,11 +1319,9 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), //===----------------------------------------------------------------------===// // Logical shift left. -let hasSideEffects = 0 in { - defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; - def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; - def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; -} +defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; +def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; +def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; // Arithmetic shift left. let Defs = [CC] in { @@ -1342,11 +1331,9 @@ let Defs = [CC] in { } // Logical shift right. -let hasSideEffects = 0 in { - defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; - def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; - def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; -} +defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; +def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; +def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { @@ -1356,10 +1343,8 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { } // Rotate left. -let hasSideEffects = 0 in { - def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; - def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; -} +def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; +def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; // Rotate second operand left and inserted selected bits into first operand. // These can act like 32-bit operands provided that the constant start and @@ -1550,10 +1535,12 @@ let Defs = [CC] in { // Prefetch and execution hint //===----------------------------------------------------------------------===// -def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; -def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; +let mayLoad = 1, mayStore = 1 in { + def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; + def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; +} -let Predicates = [FeatureExecutionHint] in { +let Predicates = [FeatureExecutionHint], hasSideEffects = 1 in { // Branch Prediction Preload def BPP : BranchPreloadSMI<"bpp", 0xC7>; def BPRP : BranchPreloadMII<"bprp", 0xC5>; @@ -1820,7 +1807,10 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { // Guarded storage //===----------------------------------------------------------------------===// -let Predicates = [FeatureGuardedStorage] in { +// These instructions use and/or modify the guarded storage control +// registers, which we do not otherwise model, so they should have +// hasSideEffects. +let Predicates = [FeatureGuardedStorage], hasSideEffects = 1 in { def LGG : UnaryRXY<"lgg", 0xE34C, null_frag, GR64, 8>; def LLGFSG : UnaryRXY<"llgfsg", 0xE348, null_frag, GR64, 4>; @@ -1896,7 +1886,7 @@ defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>; // Load access multiple. defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>; -// Load access multiple. +// Store access multiple. defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>; //===----------------------------------------------------------------------===// @@ -1945,7 +1935,6 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in { def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>; def TBEGIN_nofloat : SideEffectBinarySILPseudo; - def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561, int_s390_tbeginc, imm32zx16>; } @@ -1955,7 +1944,8 @@ let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in { def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>; // Transaction Abort - let isTerminator = 1, isBarrier = 1 in + let isTerminator = 1, isBarrier = 1, mayStore = 1, + hasSideEffects = 1 in def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>; // Nontransactional Store @@ -2031,7 +2021,7 @@ let hasSideEffects = 1 in { // .insn directive instructions //===----------------------------------------------------------------------===// -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 1 in { def InsnE : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>; def InsnRI : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1, imm32sx16:$I2), @@ -2141,18 +2131,6 @@ def : Pat<(add GR64:$src1, imm64zx32n:$src2), def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), (SLGF GR64:$src1, bdxaddr20only:$addr)>; -// Optimize sign-extended 1/0 selects to -1/0 selects. This is important -// for vector legalization. -def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)), - (i32 31)), - (i32 31)), - (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>; -def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, - imm32zx4:$cc)))), - (i32 63)), - (i32 63)), - (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; - // Avoid generating 2 XOR instructions. (xor (and x, y), y) is // equivalent to (and (xor x, -1), y) def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y), diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td index 0112ebf1eb10..c351577fa5bd 100644 --- a/lib/Target/SystemZ/SystemZInstrSystem.td +++ b/lib/Target/SystemZ/SystemZInstrSystem.td @@ -23,7 +23,7 @@ let hasSideEffects = 1, Uses = [CC] in def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; // Load PSW (extended). -let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in { +let hasSideEffects = 1, Defs = [CC] in { def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>; def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>; } @@ -37,7 +37,7 @@ let hasSideEffects = 1 in def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>; // Set system mask. -let hasSideEffects = 1, mayLoad = 1 in +let hasSideEffects = 1 in def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>; // Store then AND/OR system mask. @@ -60,13 +60,15 @@ let hasSideEffects = 1 in { // Control Register Instructions. //===----------------------------------------------------------------------===// -// Load control. -def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; -def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; +let hasSideEffects = 1 in { + // Load control. + def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; + def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; -// Store control. -def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; -def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; + // Store control. + def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; + def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; +} // Extract primary ASN (and instance). let hasSideEffects = 1 in { diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td index c9a02d9c8082..92b86575235a 100644 --- a/lib/Target/SystemZ/SystemZInstrVector.td +++ b/lib/Target/SystemZ/SystemZInstrVector.td @@ -56,8 +56,7 @@ def : VectorExtractSubreg; //===----------------------------------------------------------------------===// let Predicates = [FeatureVector] in { - let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, - isReMaterializable = 1 in { + let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in { // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; @@ -141,8 +140,10 @@ let Predicates = [FeatureVector] in { // LEY and LDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VL32 : UnaryAliasVRX; - def VL64 : UnaryAliasVRX; + let mayLoad = 1 in { + def VL32 : UnaryAliasVRX; + def VL64 : UnaryAliasVRX; + } // Load logical element and zero. def VLLEZ : UnaryVRXGeneric<"vllez", 0xE704>; @@ -231,8 +232,10 @@ let Predicates = [FeatureVector] in { // STEY and STDY offer full 20-bit displacement fields. It's often better // to use those instructions rather than force a 20-bit displacement // into a GPR temporary. - def VST32 : StoreAliasVRX; - def VST64 : StoreAliasVRX; + let mayStore = 1 in { + def VST32 : StoreAliasVRX; + def VST64 : StoreAliasVRX; + } // Scatter element. def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index 0f7594338766..f532e9e23b1f 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -64,7 +64,7 @@ void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { } bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; TII = static_cast(F.getSubtarget().getInstrInfo()); diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 791f0334e0f1..ef8b9806f892 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -312,7 +312,7 @@ uint64_t SystemZLongBranch::initMBBInfo() { // relaxed if it were placed at address Address. bool SystemZLongBranch::mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address) { - if (!Terminator.Branch) + if (!Terminator.Branch || Terminator.ExtraRelaxSize == 0) return false; const MBBInfo &Target = MBBs[Terminator.TargetBlock]; diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index 4b0f92567636..08eb73fc362e 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -74,7 +74,7 @@ advanceTo(MachineBasicBlock::iterator NextBegin) { void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { assert ((SchedStates.find(NextMBB) == SchedStates.end()) && "Entering MBB twice?"); - DEBUG (dbgs() << "+++ Entering MBB#" << NextMBB->getNumber()); + DEBUG(dbgs() << "+++ Entering " << printMBBReference(*NextMBB)); MBB = NextMBB; /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to @@ -93,8 +93,8 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { SchedStates.find(SinglePredMBB) == SchedStates.end()) return; - DEBUG (dbgs() << "+++ Continued scheduling from MBB#" - << SinglePredMBB->getNumber() << "\n";); + DEBUG(dbgs() << "+++ Continued scheduling from " + << printMBBReference(*SinglePredMBB) << "\n";); HazardRec->copyState(SchedStates[SinglePredMBB]); @@ -113,7 +113,7 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) { } void SystemZPostRASchedStrategy::leaveMBB() { - DEBUG (dbgs() << "+++ Leaving MBB#" << MBB->getNumber() << "\n";); + DEBUG(dbgs() << "+++ Leaving " << printMBBReference(*MBB) << "\n";); // Advance to first terminator. The successor block will handle terminators // dependent on CFG layout (T/NT branch etc). diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 3b6ffd230b31..856505e00a10 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -10,7 +10,7 @@ #include "SystemZRegisterInfo.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -109,7 +109,7 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg, const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (MF->getSubtarget().getTargetLowering()->supportSwiftError() && - MF->getFunction()->getAttributes().hasAttrSomewhere( + MF->getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_SystemZ_SwiftError_SaveList; return CSR_SystemZ_SaveList; @@ -119,7 +119,7 @@ const uint32_t * SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { if (MF.getSubtarget().getTargetLowering()->supportSwiftError() && - MF.getFunction()->getAttributes().hasAttrSomewhere( + MF.getFunction().getAttributes().hasAttrSomewhere( Attribute::SwiftError)) return CSR_SystemZ_SwiftError_RegMask; return CSR_SystemZ_RegMask; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 5f8f8ca9143d..8787a90b1e25 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -51,6 +51,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + bool enableMultipleCopyHints() const override { return true; } + // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index d9c8fab56343..195fa20a2c90 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -309,7 +309,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { } bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) { - if (skipFunction(*F.getFunction())) + if (skipFunction(F.getFunction())) return false; const SystemZSubtarget &ST = F.getSubtarget(); diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index e74d68182949..3a167a6d452a 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -257,8 +257,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { return new SystemZPassConfig(*this, PM); } -TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(SystemZTTIImpl(this, F)); - }); +TargetTransformInfo +SystemZTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(SystemZTTIImpl(this, F)); } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 95ad5e339e0b..52bf8bba55de 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -44,7 +44,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { // Override LLVMTargetMachine TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 983923cbb6a1..72baf5985eac 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -15,14 +15,12 @@ #include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 5d75223d979c..ee5b010ecf27 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -13,8 +13,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" @@ -143,12 +141,10 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M, // produce a 0 if it turns out the symbol is undefined. While this // is ABI and relocation depended, it seems worth it to handle it // here. - // FIXME: this is probably not ELF specific. - if (GV && isPositionIndependent() && TT.isOSBinFormatELF() && - GV->hasExternalWeakLinkage()) + if (GV && isPositionIndependent() && GV->hasExternalWeakLinkage()) return false; - if (GV && (GV->hasLocalLinkage() || !GV->hasDefaultVisibility())) + if (GV && !GV->hasDefaultVisibility()) return true; if (TT.isOSBinFormatMachO()) { @@ -221,10 +217,8 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; } void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; } -TargetIRAnalysis TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([](const Function &F) { - return TargetTransformInfo(F.getParent()->getDataLayout()); - }); +TargetTransformInfo TargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(F.getParent()->getDataLayout()); } void TargetMachine::getNameWithPrefix(SmallVectorImpl &Name, @@ -246,3 +240,10 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const { getNameWithPrefix(NameStr, GV, TLOF->getMangler()); return TLOF->getContext().getOrCreateSymbol(NameStr); } + +TargetIRAnalysis TargetMachine::getTargetIRAnalysis() { + // Since Analysis can't depend on Target, use a std::function to invert the + // dependency. + return TargetIRAnalysis( + [this](const Function &F) { return this->getTargetTransformInfo(F); }); +} diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 5ca19d020541..74fe7c5d3cde 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -15,7 +15,6 @@ #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 78b2cdb61b76..68b68bd797b5 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyInstrInfo.cpp WebAssemblyLowerBrUnless.cpp WebAssemblyLowerEmscriptenEHSjLj.cpp + WebAssemblyLowerGlobalDtors.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp WebAssemblyOptimizeLiveIntervals.cpp diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 18de4273d1d0..e7c8809de70e 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -69,10 +69,10 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, } static MCAsmBackend *createAsmBackend(const Target & /*T*/, + const MCSubtargetInfo &STI, const MCRegisterInfo & /*MRI*/, - const Triple &TT, StringRef /*CPU*/, const MCTargetOptions & /*Options*/) { - return createWebAssemblyAsmBackend(TT); + return createWebAssemblyAsmBackend(STI.getTargetTriple()); } static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU, diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index c82a64d58246..0ca52ad651b5 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -108,10 +108,6 @@ void WebAssemblyTargetAsmStreamer::emitGlobal( } } -void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) { - OS << "\t.stack_pointer\t" << Index << '\n'; -} - void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType( @@ -157,11 +153,6 @@ void WebAssemblyTargetELFStreamer::emitGlobal( llvm_unreachable(".globalvar encoding not yet implemented"); } -void WebAssemblyTargetELFStreamer::emitStackPointer( - uint32_t Index) { - llvm_unreachable(".stack_pointer encoding not yet implemented"); -} - void WebAssemblyTargetELFStreamer::emitEndFunc() { Streamer.EmitIntValue(WebAssembly::End, 1); } @@ -238,14 +229,6 @@ void WebAssemblyTargetWasmStreamer::emitGlobal( Streamer.PopSection(); } -void WebAssemblyTargetWasmStreamer::emitStackPointer(uint32_t Index) { - Streamer.PushSection(); - Streamer.SwitchSection(Streamer.getContext().getWasmSection( - ".stack_pointer", SectionKind::getMetadata())); - Streamer.EmitIntValue(Index, 4); - Streamer.PopSection(); -} - void WebAssemblyTargetWasmStreamer::emitEndFunc() { llvm_unreachable(".end_func is not needed for direct wasm output"); } @@ -277,4 +260,5 @@ void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType( } void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) { + llvm_unreachable(".global_import is not needed for direct wasm output"); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 102d7219a1e7..2cb21a20580b 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -39,8 +39,6 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer { virtual void emitLocal(ArrayRef Types) = 0; /// .globalvar virtual void emitGlobal(ArrayRef Globals) = 0; - /// .stack_pointer - virtual void emitStackPointer(uint32_t Index) = 0; /// .endfunc virtual void emitEndFunc() = 0; /// .functype @@ -67,7 +65,6 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { void emitResult(MCSymbol *Symbol, ArrayRef Types) override; void emitLocal(ArrayRef Types) override; void emitGlobal(ArrayRef Globals) override; - void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, @@ -85,7 +82,6 @@ class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer { void emitResult(MCSymbol *Symbol, ArrayRef Types) override; void emitLocal(ArrayRef Types) override; void emitGlobal(ArrayRef Globals) override; - void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, @@ -103,7 +99,6 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer { void emitResult(MCSymbol *Symbol, ArrayRef Types) override; void emitLocal(ArrayRef Types) override; void emitGlobal(ArrayRef Globals) override; - void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index 3433b1553e8c..ef0099f07efb 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -2,15 +2,42 @@ This WebAssembly backend is presently under development. -Currently the easiest way to use it is through Emscripten, which provides a -compilation environment that includes standard libraries, tools, and packaging -for producing WebAssembly applications that can run in browsers and other -environments. For more information, see the Emscripten documentation in -general, and this page in particular: +The most notable feature which is not yet stable is the ".o" file format. +".o" file support is needed for many common ways of using LLVM, such as +using it through "clang -c", so this backend is not yet considered widely +usable. However, this backend is usable within some language toolchain +packages: + +Emscripten provides a C/C++ compilation environment that includes standard +libraries, tools, and packaging for producing WebAssembly applications that +can run in browsers and other environments. For more information, see the +Emscripten documentation in general, and this page in particular: + * https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend + +Rust provides WebAssembly support integrated into Cargo. There are two +main options: + - wasm32-unknown-unknown, which provides a relatively minimal environment + that has an emphasis on being "native" + - wasm32-unknown-emscripten, which uses Emscripten internally and + provides standard C/C++ libraries, filesystem emulation, GL and SDL + bindings +For more information, see: + * https://www.hellorust.com/ + + +This backend does not yet support debug info. Full DWARF support needs a +design for how DWARF should be represented in WebAssembly. Sourcemap support +has an existing design and some corresponding browser implementations, so it +just needs implementing in LLVM. -Other ways of using this backend, such as via a standalone "clang", are also -under development, though they are not generally usable yet. +Work-in-progress documentation for the ".o" file format is here: + + * https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + +A corresponding linker implementation is also under development: + + * https://lld.llvm.org/WebAssembly.html For more information on WebAssembly itself, see the home page: * https://webassembly.github.io/ @@ -30,6 +57,8 @@ turn red if not. Once most of these pass, further testing will use LLVM's own test suite. The tests can be run locally using: https://github.com/WebAssembly/waterfall/blob/master/src/compile_torture_tests.py +Some notes on ways that the generated code could be improved follow: + //===---------------------------------------------------------------------===// Br, br_if, and br_table instructions can support having a value on the value @@ -127,7 +156,7 @@ However, if moving the binary operator to its user moves it to a place where its operands can't be moved to, it would be better to leave it in place, or perhaps move it up, so that it can stackify its operands. A binary operator has two operands and one result, so in such cases there could be a net win by -prefering the operands. +preferring the operands. //===---------------------------------------------------------------------===// @@ -138,11 +167,10 @@ instructions advantageously for this purpose. //===---------------------------------------------------------------------===// -WebAssembly is now officially a stack machine, rather than an AST, and this -comes with additional opportunities for WebAssemblyRegStackify. Specifically, -the stack doesn't need to be empty after an instruction with no return values. -WebAssemblyRegStackify could be extended, or possibly rewritten, to take -advantage of the new opportunities. +WebAssemblyRegStackify currently assumes that the stack must be empty after +an instruction with no return values, however wasm doesn't actually require +this. WebAssemblyRegStackify could be extended, or possibly rewritten, to take +full advantage of what WebAssembly permits. //===---------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h index e04c4db19c8c..7ac6c3991531 100644 --- a/lib/Target/WebAssembly/WebAssembly.h +++ b/lib/Target/WebAssembly/WebAssembly.h @@ -28,6 +28,7 @@ class FunctionPass; // LLVM IR passes. ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj); void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &); +ModulePass *createWebAssemblyLowerGlobalDtors(); ModulePass *createWebAssemblyFixFunctionBitcasts(); FunctionPass *createWebAssemblyOptimizeReturned(); diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td index 99cf1f119a20..76b3ddbbfffa 100644 --- a/lib/Target/WebAssembly/WebAssembly.td +++ b/lib/Target/WebAssembly/WebAssembly.td @@ -32,6 +32,11 @@ def FeatureNontrappingFPToInt : "HasNontrappingFPToInt", "true", "Enable non-trapping float-to-int conversion operators">; +def FeatureSignExt : + SubtargetFeature<"sign-ext", + "HasSignExt", "true", + "Enable sign extension operators">; + //===----------------------------------------------------------------------===// // Architectures. //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index ee60c8f3a7a3..204d97cbdd44 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -90,10 +90,13 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { } for (const auto &G : M.globals()) { if (!G.hasInitializer() && G.hasExternalLinkage()) { - uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType()); - getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier()); - OutStreamer->emitELFSize(getSymbol(&G), - MCConstantExpr::create(Size, OutContext)); + if (G.getValueType()->isSized()) { + uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType()); + if (TM.getTargetTriple().isOSBinFormatELF()) + getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier()); + OutStreamer->emitELFSize(getSymbol(&G), + MCConstantExpr::create(Size, OutContext)); + } } } } @@ -111,7 +114,7 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams()); SmallVector ResultVTs; - const Function &F(*MF->getFunction()); + const Function &F = MF->getFunction(); // Emit the function index. if (MDNode *Idx = F.getMetadata("wasm.index")) { @@ -187,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isVerbose()) { OutStreamer->AddComment("fallthrough-return: $pop" + - utostr(MFI->getWARegStackId( + Twine(MFI->getWARegStackId( MFI->getWAReg(MI->getOperand(0).getReg())))); OutStreamer->AddBlankLine(); } diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index b2330a232093..1af92f02d8e0 100644 --- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -27,7 +27,7 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index 19df75c7091b..666337acccce 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -36,6 +36,11 @@ using namespace llvm; #define DEBUG_TYPE "wasm-fix-function-bitcasts" +static cl::opt TemporaryWorkarounds( + "wasm-temporary-workarounds", + cl::desc("Apply certain temporary workarounds"), + cl::init(true), cl::Hidden); + namespace { class FixFunctionBitcasts final : public ModulePass { StringRef getPassName() const override { @@ -107,9 +112,10 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) { // Determine what arguments to pass. SmallVector Args; Function::arg_iterator AI = Wrapper->arg_begin(); + Function::arg_iterator AE = Wrapper->arg_end(); FunctionType::param_iterator PI = F->getFunctionType()->param_begin(); FunctionType::param_iterator PE = F->getFunctionType()->param_end(); - for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) { + for (; AI != AE && PI != PE; ++AI, ++PI) { if (AI->getType() != *PI) { Wrapper->eraseFromParent(); return nullptr; @@ -118,6 +124,9 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) { } for (; PI != PE; ++PI) Args.push_back(UndefValue::get(*PI)); + if (F->isVarArg()) + for (; AI != AE; ++AI) + Args.push_back(&*AI); CallInst *Call = CallInst::Create(F, Args, "", BB); @@ -138,11 +147,41 @@ static Function *CreateWrapper(Function *F, FunctionType *Ty) { } bool FixFunctionBitcasts::runOnModule(Module &M) { + Function *Main = nullptr; + CallInst *CallMain = nullptr; SmallVector, 0> Uses; SmallPtrSet ConstantBCs; // Collect all the places that need wrappers. - for (Function &F : M) FindUses(&F, F, Uses, ConstantBCs); + for (Function &F : M) { + FindUses(&F, F, Uses, ConstantBCs); + + // If we have a "main" function, and its type isn't + // "int main(int argc, char *argv[])", create an artificial call with it + // bitcasted to that type so that we generate a wrapper for it, so that + // the C runtime can call it. + if (!TemporaryWorkarounds && !F.isDeclaration() && F.getName() == "main") { + Main = &F; + LLVMContext &C = M.getContext(); + Type *MainArgTys[] = { + PointerType::get(Type::getInt8PtrTy(C), 0), + Type::getInt32Ty(C) + }; + FunctionType *MainTy = FunctionType::get(Type::getInt32Ty(C), MainArgTys, + /*isVarArg=*/false); + if (F.getFunctionType() != MainTy) { + Value *Args[] = { + UndefValue::get(MainArgTys[0]), + UndefValue::get(MainArgTys[1]) + }; + Value *Casted = ConstantExpr::getBitCast(Main, + PointerType::get(MainTy, 0)); + CallMain = CallInst::Create(Casted, Args, "call_main"); + Use *UseMain = &CallMain->getOperandUse(2); + Uses.push_back(std::make_pair(UseMain, &F)); + } + } + } DenseMap, Function *> Wrappers; @@ -158,9 +197,9 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { if (!Ty) continue; - // Wasm varargs are not ABI-compatible with non-varargs. Just ignore - // such casts for now. - if (Ty->isVarArg() || F->isVarArg()) + // Bitcasted vararg functions occur in Emscripten's implementation of + // EM_ASM, so suppress wrappers for them for now. + if (TemporaryWorkarounds && (Ty->isVarArg() || F->isVarArg())) continue; auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr)); @@ -177,5 +216,19 @@ bool FixFunctionBitcasts::runOnModule(Module &M) { U->set(Wrapper); } + // If we created a wrapper for main, rename the wrapper so that it's the + // one that gets called from startup. + if (CallMain) { + Main->setName("__original_main"); + Function *MainWrapper = + cast(CallMain->getCalledValue()->stripPointerCasts()); + MainWrapper->setName("main"); + MainWrapper->setLinkage(Main->getLinkage()); + MainWrapper->setVisibility(Main->getVisibility()); + Main->setLinkage(Function::PrivateLinkage); + Main->setVisibility(Function::DefaultVisibility); + delete CallMain; + } + return true; } diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 41f315c2825b..88daea7e3681 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -205,8 +205,7 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, continue; unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; - DEBUG(dbgs() << "MBB#" << MBB->getNumber() << " has index " << Index - << "\n"); + DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index << "\n"); Pair.first->second = Index; for (auto Pred : MBB->predecessors()) diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index a37d6136e44e..84246052f601 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -94,7 +94,7 @@ bool WebAssemblyFrameLowering::needsSPWriteback( const MachineFunction &MF, const MachineFrameInfo &MFI) const { assert(needsSP(MF, MFI)); return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() || - MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + MF.getFunction().hasFnAttribute(Attribute::NoRedZone); } static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 4f3ae57733e5..9f40d35689a5 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -48,9 +48,8 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { } bool runOnMachineFunction(MachineFunction &MF) override { - ForCodeSize = - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) || - MF.getFunction()->hasFnAttribute(Attribute::MinSize); + ForCodeSize = MF.getFunction().hasFnAttribute(Attribute::OptimizeForSize) || + MF.getFunction().hasFnAttribute(Attribute::MinSize); Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 79e5e14764e8..d0b3ad371191 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -117,8 +117,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // As a special case, these operators use the type to mean the type to // sign-extend from. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!Subtarget->hasAtomics()) { - // The Atomics feature includes signext intructions. + if (!Subtarget->hasSignExt()) { for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); } @@ -207,11 +206,14 @@ LowerFPToInt( unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; + unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; + unsigned Eqz = WebAssembly::EQZ_I32; + unsigned And = WebAssembly::AND_I32; int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; int64_t Substitute = IsUnsigned ? 0 : Limit; double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; - auto &Context = BB->getParent()->getFunction()->getContext(); + auto &Context = BB->getParent()->getFunction().getContext(); Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -236,14 +238,17 @@ LowerFPToInt( TrueMBB->addSuccessor(DoneMBB); FalseMBB->addSuccessor(DoneMBB); - unsigned Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); - Tmp2 = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - Tmp3 = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); - Tmp4 = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); + CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); + TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); MI.eraseFromParent(); + // For signed numbers, we can do a single comparison to determine whether + // fabs(x) is within range. if (IsUnsigned) { Tmp0 = InReg; } else { @@ -252,24 +257,44 @@ LowerFPToInt( } BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, CmpVal))); - BuildMI(BB, DL, TII.get(LT), Tmp2) + BuildMI(BB, DL, TII.get(LT), CmpReg) .addReg(Tmp0) .addReg(Tmp1); + + // For unsigned numbers, we have to do a separate comparison with zero. + if (IsUnsigned) { + Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); + unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(BB, DL, TII.get(FConst), Tmp1) + .addFPImm(cast(ConstantFP::get(Ty, 0.0))); + BuildMI(BB, DL, TII.get(GE), SecondCmpReg) + .addReg(Tmp0) + .addReg(Tmp1); + BuildMI(BB, DL, TII.get(And), AndReg) + .addReg(CmpReg) + .addReg(SecondCmpReg); + CmpReg = AndReg; + } + + BuildMI(BB, DL, TII.get(Eqz), EqzReg) + .addReg(CmpReg); + + // Create the CFG diamond to select between doing the conversion or using + // the substitute value. BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)) .addMBB(TrueMBB) - .addReg(Tmp2); - - BuildMI(FalseMBB, DL, TII.get(IConst), Tmp3) - .addImm(Substitute); + .addReg(EqzReg); + BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg) + .addReg(InReg); BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)) .addMBB(DoneMBB); - BuildMI(TrueMBB, DL, TII.get(LoweredOpcode), Tmp4) - .addReg(InReg); - + BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg) + .addImm(Substitute); BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) - .addReg(Tmp3) + .addReg(FalseReg) .addMBB(FalseMBB) - .addReg(Tmp4) + .addReg(TrueReg) .addMBB(TrueMBB); return DoneMBB; @@ -412,7 +437,7 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(*MF.getFunction(), msg, DL.getDebugLoc())); + DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc())); } // Test whether the given calling convention is supported. @@ -671,7 +696,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // Record the number and types of results. SmallVector Params; SmallVector Results; - ComputeSignatureVTs(*MF.getFunction(), DAG.getTarget(), Params, Results); + ComputeSignatureVTs(MF.getFunction(), DAG.getTarget(), Params, Results); for (MVT VT : Results) MFI->addResult(VT); diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 426c2c802172..bf1282b5edfa 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -26,7 +26,7 @@ def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), [(set I64:$dst, (zext I32:$src))], "i64.extend_u/i32\t$dst, $src", 0xad>; -let Predicates = [HasAtomics] in { +let Predicates = [HasSignExt] in { def I32_EXTEND8_S_I32 : I<(outs I32:$dst), (ins I32:$src), [(set I32:$dst, (sext_inreg I32:$src, i8))], "i32.extend8_s\t$dst, $src", 0xc0>; @@ -42,7 +42,7 @@ def I64_EXTEND16_S_I64 : I<(outs I64:$dst), (ins I64:$src), def I64_EXTEND32_S_I64 : I<(outs I64:$dst), (ins I64:$src), [(set I64:$dst, (sext_inreg I64:$src, i32))], "i64.extend32_s\t$dst, $src", 0xc4>; -} // Predicates = [HasAtomics] +} // Predicates = [HasSignExt] } // defs = [ARGUMENTS] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index f8d311ac3b00..245d5abbf263 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -30,6 +30,14 @@ def NotHasNontrappingFPToInt : Predicate<"!Subtarget->hasNontrappingFPToInt()">, AssemblerPredicate<"!FeatureNontrappingFPToInt", "nontrapping-fptoint">; +def HasSignExt : + Predicate<"Subtarget->hasSignExt()">, + AssemblerPredicate<"FeatureSignExt", + "sign-ext">; +def NotHasSignExt : + Predicate<"!Subtarget->hasSignExt()">, + AssemblerPredicate<"!FeatureSignExt", + "sign-ext">; //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Node Types. diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 576b71dd7966..5b867aa763a1 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -99,6 +99,13 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; + case EQZ_I32: { + // Invert an eqz by replacing it with its operand. + Cond = Def->getOperand(1).getReg(); + Def->eraseFromParent(); + Inverted = true; + break; + } default: break; } } diff --git a/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp new file mode 100644 index 000000000000..0020817aee41 --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp @@ -0,0 +1,191 @@ +//===-- WebAssemblyLowerGlobalDtors.cpp - Lower @llvm.global_dtors --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Lower @llvm.global_dtors. +/// +/// WebAssembly doesn't have a builtin way to invoke static destructors. +/// Implement @llvm.global_dtors by creating wrapper functions that are +/// registered in @llvm.global_ctors and which contain a call to +/// `__cxa_atexit` to register their destructor functions. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Pass.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-global-dtors" + +namespace { +class LowerGlobalDtors final : public ModulePass { + StringRef getPassName() const override { + return "WebAssembly Lower @llvm.global_dtors"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override; + +public: + static char ID; + LowerGlobalDtors() : ModulePass(ID) {} +}; +} // End anonymous namespace + +char LowerGlobalDtors::ID = 0; +ModulePass *llvm::createWebAssemblyLowerGlobalDtors() { + return new LowerGlobalDtors(); +} + +bool LowerGlobalDtors::runOnModule(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_dtors"); + if (!GV) + return false; + + const ConstantArray *InitList = dyn_cast(GV->getInitializer()); + if (!InitList) + return false; + + // Sanity-check @llvm.global_dtor's type. + StructType *ETy = dyn_cast(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 3 || + !ETy->getTypeAtIndex(0U)->isIntegerTy() || + !ETy->getTypeAtIndex(1U)->isPointerTy() || + !ETy->getTypeAtIndex(2U)->isPointerTy()) + return false; // Not (int, ptr, ptr). + + // Collect the contents of @llvm.global_dtors, collated by priority and + // associated symbol. + std::map > > DtorFuncs; + for (Value *O : InitList->operands()) { + ConstantStruct *CS = dyn_cast(O); + if (!CS) continue; // Malformed. + + ConstantInt *Priority = dyn_cast(CS->getOperand(0)); + if (!Priority) continue; // Malformed. + uint16_t PriorityValue = Priority->getLimitedValue(UINT16_MAX); + + Constant *DtorFunc = CS->getOperand(1); + if (DtorFunc->isNullValue()) + break; // Found a null terminator, skip the rest. + + Constant *Associated = CS->getOperand(2); + Associated = cast(Associated->stripPointerCastsNoFollowAliases()); + + DtorFuncs[PriorityValue][Associated].push_back(DtorFunc); + } + if (DtorFuncs.empty()) + return false; + + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + LLVMContext &C = M.getContext(); + PointerType *VoidStar = Type::getInt8PtrTy(C); + Type *AtExitFuncArgs[] = { VoidStar }; + FunctionType *AtExitFuncTy = FunctionType::get( + Type::getVoidTy(C), + AtExitFuncArgs, + /*isVarArg=*/false); + + Type *AtExitArgs[] = { + PointerType::get(AtExitFuncTy, 0), + VoidStar, + VoidStar + }; + FunctionType *AtExitTy = FunctionType::get( + Type::getInt32Ty(C), + AtExitArgs, + /*isVarArg=*/false); + Constant *AtExit = M.getOrInsertFunction("__cxa_atexit", AtExitTy); + + // Declare __dso_local. + Constant *DsoHandle = M.getNamedValue("__dso_handle"); + if (!DsoHandle) { + Type *DsoHandleTy = Type::getInt8Ty(C); + GlobalVariable *Handle = + new GlobalVariable(M, DsoHandleTy, /*isConstant=*/true, + GlobalVariable::ExternalWeakLinkage, + nullptr, "__dso_handle"); + Handle->setVisibility(GlobalVariable::HiddenVisibility); + DsoHandle = Handle; + } + + // For each unique priority level and associated symbol, generate a function + // to call all the destructors at that level, and a function to register the + // first function with __cxa_atexit. + for (auto &PriorityAndMore : DtorFuncs) { + uint16_t Priority = PriorityAndMore.first; + for (auto &AssociatedAndMore : PriorityAndMore.second) { + Constant *Associated = AssociatedAndMore.first; + + Function *CallDtors = Function::Create( + AtExitFuncTy, Function::PrivateLinkage, + "call_dtors" + + (Priority != UINT16_MAX ? + (Twine(".") + Twine(Priority)) : Twine()) + + (!Associated->isNullValue() ? + (Twine(".") + Associated->getName()) : Twine()), + &M); + BasicBlock *BB = BasicBlock::Create(C, "body", CallDtors); + + for (auto Dtor : AssociatedAndMore.second) + CallInst::Create(Dtor, "", BB); + ReturnInst::Create(C, BB); + + FunctionType *VoidVoid = FunctionType::get(Type::getVoidTy(C), + /*isVarArg=*/false); + Function *RegisterCallDtors = Function::Create( + VoidVoid, Function::PrivateLinkage, + "register_call_dtors" + + (Priority != UINT16_MAX ? + (Twine(".") + Twine(Priority)) : Twine()) + + (!Associated->isNullValue() ? + (Twine(".") + Associated->getName()) : Twine()), + &M); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", RegisterCallDtors); + BasicBlock *FailBB = BasicBlock::Create(C, "fail", RegisterCallDtors); + BasicBlock *RetBB = BasicBlock::Create(C, "return", RegisterCallDtors); + + Value *Null = ConstantPointerNull::get(VoidStar); + Value *Args[] = { CallDtors, Null, DsoHandle }; + Value *Res = CallInst::Create(AtExit, Args, "call", EntryBB); + Value *Cmp = new ICmpInst(*EntryBB, ICmpInst::ICMP_NE, Res, + Constant::getNullValue(Res->getType())); + BranchInst::Create(FailBB, RetBB, Cmp, EntryBB); + + // If `__cxa_atexit` hits out-of-memory, trap, so that we don't misbehave. + // This should be very rare, because if the process is running out of memory + // before main has even started, something is wrong. + CallInst::Create(Intrinsic::getDeclaration(&M, Intrinsic::trap), + "", FailBB); + new UnreachableInst(C, FailBB); + + ReturnInst::Create(C, RetBB); + + // Now register the registration function with @llvm.global_ctors. + appendToGlobalCtors(M, RegisterCallDtors, Priority, Associated); + } + } + + // Now that we've lowered everything, remove @llvm.global_dtors. + GV->eraseFromParent(); + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 8880539804ca..4a93d4810c7d 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -43,7 +43,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { if (const auto *FuncTy = dyn_cast(Global->getValueType())) { const MachineFunction &MF = *MO.getParent()->getParent()->getParent(); const TargetMachine &TM = MF.getTarget(); - const Function &CurrentFunc = *MF.getFunction(); + const Function &CurrentFunc = MF.getFunction(); SmallVector Returns; SmallVector Params; diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp index 5a3a7411ed46..ebe97848d461 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -15,14 +15,14 @@ /// have multiple defs, and then they do, the defs are usually closely related. /// Later, after coalescing, tail duplication, and other optimizations, it's /// more common to see registers with multiple unrelated defs. This pass -/// updates LiveIntervalAnalysis to distribute the value numbers across separate +/// updates LiveIntervals to distribute the value numbers across separate /// LiveIntervals. /// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "WebAssemblySubtarget.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 1462c49aa9fd..3a2876bfcde2 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -117,7 +117,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M } } - // Ok, we're now ready to run LiveIntervalAnalysis again. + // Ok, we're now ready to run the LiveIntervals analysis again. MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); return Changed; diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index ba39b6cdb568..2ac3a839c3c8 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -19,7 +19,7 @@ #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index ea9e3fa862ce..a4bb967f36f6 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -26,7 +26,7 @@ #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -107,12 +107,12 @@ static void ConvertImplicitDefToConstZero(MachineInstr *MI, } else if (RegClass == &WebAssembly::F32RegClass) { MI->setDesc(TII->get(WebAssembly::CONST_F32)); ConstantFP *Val = cast(Constant::getNullValue( - Type::getFloatTy(MF.getFunction()->getContext()))); + Type::getFloatTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else if (RegClass == &WebAssembly::F64RegClass) { MI->setDesc(TII->get(WebAssembly::CONST_F64)); ConstantFP *Val = cast(Constant::getNullValue( - Type::getDoubleTy(MF.getFunction()->getContext()))); + Type::getDoubleTy(MF.getFunction().getContext()))); MI->addOperand(MachineOperand::CreateFPImm(Val)); } else { llvm_unreachable("Unexpected reg class"); @@ -746,6 +746,14 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { MachineDominatorTree &MDT = getAnalysis(); LiveIntervals &LIS = getAnalysis(); + // Disable the TEE optimization if we aren't doing direct wasm object + // emission, because lowering TEE to TEE_LOCAL is done in the ExplicitLocals + // pass, which is also disabled. + bool UseTee = true; + if (MF.getSubtarget() + .getTargetTriple().isOSBinFormatELF()) + UseTee = false; + // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. @@ -811,7 +819,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Insert = RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(), LIS, MFI, MRI, TII, TRI); - } else if (CanMove && + } else if (UseTee && CanMove && OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI, TII); diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 2599064334ee..d4d415206660 100644 --- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -22,6 +22,7 @@ #include "WebAssemblyRuntimeLibcallSignatures.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -84,912 +85,401 @@ enum RuntimeLibcallSignature { unsupported }; -} // end anonymous namespace - -static const RuntimeLibcallSignature -RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = { -// Integer -/* SHL_I16 */ i16_func_i16_i16, -/* SHL_I32 */ i32_func_i32_i32, -/* SHL_I64 */ i64_func_i64_i64, -/* SHL_I128 */ i64_i64_func_i64_i64_i32, -/* SRL_I16 */ i16_func_i16_i16, -/* SRL_I32 */ i32_func_i32_i32, -/* SRL_I64 */ i64_func_i64_i64, -/* SRL_I128 */ i64_i64_func_i64_i64_i32, -/* SRA_I16 */ i16_func_i16_i16, -/* SRA_I32 */ i32_func_i32_i32, -/* SRA_I64 */ i64_func_i64_i64, -/* SRA_I128 */ i64_i64_func_i64_i64_i32, -/* MUL_I8 */ i8_func_i8_i8, -/* MUL_I16 */ i16_func_i16_i16, -/* MUL_I32 */ i32_func_i32_i32, -/* MUL_I64 */ i64_func_i64_i64, -/* MUL_I128 */ i64_i64_func_i64_i64_i64_i64, -/* MULO_I32 */ i32_func_i32_i32, -/* MULO_I64 */ i64_func_i64_i64, -/* MULO_I128 */ i64_i64_func_i64_i64_i64_i64, -/* SDIV_I8 */ i8_func_i8_i8, -/* SDIV_I16 */ i16_func_i16_i16, -/* SDIV_I32 */ i32_func_i32_i32, -/* SDIV_I64 */ i64_func_i64_i64, -/* SDIV_I128 */ i64_i64_func_i64_i64_i64_i64, -/* UDIV_I8 */ i8_func_i8_i8, -/* UDIV_I16 */ i16_func_i16_i16, -/* UDIV_I32 */ i32_func_i32_i32, -/* UDIV_I64 */ i64_func_i64_i64, -/* UDIV_I128 */ i64_i64_func_i64_i64_i64_i64, -/* SREM_I8 */ i8_func_i8_i8, -/* SREM_I16 */ i16_func_i16_i16, -/* SREM_I32 */ i32_func_i32_i32, -/* SREM_I64 */ i64_func_i64_i64, -/* SREM_I128 */ i64_i64_func_i64_i64_i64_i64, -/* UREM_I8 */ i8_func_i8_i8, -/* UREM_I16 */ i16_func_i16_i16, -/* UREM_I32 */ i32_func_i32_i32, -/* UREM_I64 */ i64_func_i64_i64, -/* UREM_I128 */ i64_i64_func_i64_i64_i64_i64, -/* SDIVREM_I8 */ i8_func_i8_i8, -/* SDIVREM_I16 */ i16_i16_func_i16_i16, -/* SDIVREM_I32 */ i32_i32_func_i32_i32, -/* SDIVREM_I64 */ i64_func_i64_i64, -/* SDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64, -/* UDIVREM_I8 */ i8_func_i8_i8, -/* UDIVREM_I16 */ i16_i16_func_i16_i16, -/* UDIVREM_I32 */ i32_i32_func_i32_i32, -/* UDIVREM_I64 */ i64_i64_func_i64_i64, -/* UDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64, -/* NEG_I32 */ i32_func_i32, -/* NEG_I64 */ i64_func_i64, - -// FLOATING POINT -/* ADD_F32 */ f32_func_f32_f32, -/* ADD_F64 */ f64_func_f64_f64, -/* ADD_F80 */ unsupported, -/* ADD_F128 */ func_iPTR_i64_i64_i64_i64, -/* ADD_PPCF128 */ unsupported, -/* SUB_F32 */ f32_func_f32_f32, -/* SUB_F64 */ f64_func_f64_f64, -/* SUB_F80 */ unsupported, -/* SUB_F128 */ func_iPTR_i64_i64_i64_i64, -/* SUB_PPCF128 */ unsupported, -/* MUL_F32 */ f32_func_f32_f32, -/* MUL_F64 */ f64_func_f64_f64, -/* MUL_F80 */ unsupported, -/* MUL_F128 */ func_iPTR_i64_i64_i64_i64, -/* MUL_PPCF128 */ unsupported, -/* DIV_F32 */ f32_func_f32_f32, -/* DIV_F64 */ f64_func_f64_f64, -/* DIV_F80 */ unsupported, -/* DIV_F128 */ func_iPTR_i64_i64_i64_i64, -/* DIV_PPCF128 */ unsupported, -/* REM_F32 */ f32_func_f32_f32, -/* REM_F64 */ f64_func_f64_f64, -/* REM_F80 */ unsupported, -/* REM_F128 */ func_iPTR_i64_i64_i64_i64, -/* REM_PPCF128 */ unsupported, -/* FMA_F32 */ f32_func_f32_f32_f32, -/* FMA_F64 */ f64_func_f64_f64_f64, -/* FMA_F80 */ unsupported, -/* FMA_F128 */ func_iPTR_i64_i64_i64_i64_i64_i64, -/* FMA_PPCF128 */ unsupported, -/* POWI_F32 */ f32_func_f32_i32, -/* POWI_F64 */ f64_func_f64_i32, -/* POWI_F80 */ unsupported, -/* POWI_F128 */ func_iPTR_i64_i64_i64_i64, -/* POWI_PPCF128 */ unsupported, -/* SQRT_F32 */ f32_func_f32, -/* SQRT_F64 */ f64_func_f64, -/* SQRT_F80 */ unsupported, -/* SQRT_F128 */ func_iPTR_i64_i64, -/* SQRT_PPCF128 */ unsupported, -/* LOG_F32 */ f32_func_f32, -/* LOG_F64 */ f64_func_f64, -/* LOG_F80 */ unsupported, -/* LOG_F128 */ func_iPTR_i64_i64, -/* LOG_PPCF128 */ unsupported, -/* LOG2_F32 */ f32_func_f32, -/* LOG2_F64 */ f64_func_f64, -/* LOG2_F80 */ unsupported, -/* LOG2_F128 */ func_iPTR_i64_i64, -/* LOG2_PPCF128 */ unsupported, -/* LOG10_F32 */ f32_func_f32, -/* LOG10_F64 */ f64_func_f64, -/* LOG10_F80 */ unsupported, -/* LOG10_F128 */ func_iPTR_i64_i64, -/* LOG10_PPCF128 */ unsupported, -/* EXP_F32 */ f32_func_f32, -/* EXP_F64 */ f64_func_f64, -/* EXP_F80 */ unsupported, -/* EXP_F128 */ func_iPTR_i64_i64, -/* EXP_PPCF128 */ unsupported, -/* EXP2_F32 */ f32_func_f32, -/* EXP2_F64 */ f64_func_f64, -/* EXP2_F80 */ unsupported, -/* EXP2_F128 */ func_iPTR_i64_i64, -/* EXP2_PPCF128 */ unsupported, -/* SIN_F32 */ f32_func_f32, -/* SIN_F64 */ f64_func_f64, -/* SIN_F80 */ unsupported, -/* SIN_F128 */ func_iPTR_i64_i64, -/* SIN_PPCF128 */ unsupported, -/* COS_F32 */ f32_func_f32, -/* COS_F64 */ f64_func_f64, -/* COS_F80 */ unsupported, -/* COS_F128 */ func_iPTR_i64_i64, -/* COS_PPCF128 */ unsupported, -/* SINCOS_F32 */ func_f32_iPTR_iPTR, -/* SINCOS_F64 */ func_f64_iPTR_iPTR, -/* SINCOS_F80 */ unsupported, -/* SINCOS_F128 */ func_i64_i64_iPTR_iPTR, -/* SINCOS_PPCF128 */ unsupported, -/* POW_F32 */ f32_func_f32_f32, -/* POW_F64 */ f64_func_f64_f64, -/* POW_F80 */ unsupported, -/* POW_F128 */ func_iPTR_i64_i64_i64_i64, -/* POW_PPCF128 */ unsupported, -/* CEIL_F32 */ f32_func_f32, -/* CEIL_F64 */ f64_func_f64, -/* CEIL_F80 */ unsupported, -/* CEIL_F128 */ func_iPTR_i64_i64, -/* CEIL_PPCF128 */ unsupported, -/* TRUNC_F32 */ f32_func_f32, -/* TRUNC_F64 */ f64_func_f64, -/* TRUNC_F80 */ unsupported, -/* TRUNC_F128 */ func_iPTR_i64_i64, -/* TRUNC_PPCF128 */ unsupported, -/* RINT_F32 */ f32_func_f32, -/* RINT_F64 */ f64_func_f64, -/* RINT_F80 */ unsupported, -/* RINT_F128 */ func_iPTR_i64_i64, -/* RINT_PPCF128 */ unsupported, -/* NEARBYINT_F32 */ f32_func_f32, -/* NEARBYINT_F64 */ f64_func_f64, -/* NEARBYINT_F80 */ unsupported, -/* NEARBYINT_F128 */ func_iPTR_i64_i64, -/* NEARBYINT_PPCF128 */ unsupported, -/* ROUND_F32 */ f32_func_f32, -/* ROUND_F64 */ f64_func_f64, -/* ROUND_F80 */ unsupported, -/* ROUND_F128 */ func_iPTR_i64_i64, -/* ROUND_PPCF128 */ unsupported, -/* FLOOR_F32 */ f32_func_f32, -/* FLOOR_F64 */ f64_func_f64, -/* FLOOR_F80 */ unsupported, -/* FLOOR_F128 */ func_iPTR_i64_i64, -/* FLOOR_PPCF128 */ unsupported, -/* COPYSIGN_F32 */ f32_func_f32_f32, -/* COPYSIGN_F64 */ f64_func_f64_f64, -/* COPYSIGN_F80 */ unsupported, -/* COPYSIGN_F128 */ func_iPTR_i64_i64_i64_i64, -/* COPYSIGN_PPCF128 */ unsupported, -/* FMIN_F32 */ f32_func_f32_f32, -/* FMIN_F64 */ f64_func_f64_f64, -/* FMIN_F80 */ unsupported, -/* FMIN_F128 */ func_iPTR_i64_i64_i64_i64, -/* FMIN_PPCF128 */ unsupported, -/* FMAX_F32 */ f32_func_f32_f32, -/* FMAX_F64 */ f64_func_f64_f64, -/* FMAX_F80 */ unsupported, -/* FMAX_F128 */ func_iPTR_i64_i64_i64_i64, -/* FMAX_PPCF128 */ unsupported, - -// CONVERSION -/* FPEXT_F32_PPCF128 */ unsupported, -/* FPEXT_F64_PPCF128 */ unsupported, -/* FPEXT_F64_F128 */ func_iPTR_f64, -/* FPEXT_F32_F128 */ func_iPTR_f32, -/* FPEXT_F32_F64 */ f64_func_f32, -/* FPEXT_F16_F32 */ f32_func_i16, -/* FPROUND_F32_F16 */ i16_func_f32, -/* FPROUND_F64_F16 */ unsupported, -/* FPROUND_F80_F16 */ unsupported, -/* FPROUND_F128_F16 */ unsupported, -/* FPROUND_PPCF128_F16 */ unsupported, -/* FPROUND_F64_F32 */ f32_func_f64, -/* FPROUND_F80_F32 */ unsupported, -/* FPROUND_F128_F32 */ f32_func_i64_i64, -/* FPROUND_PPCF128_F32 */ unsupported, -/* FPROUND_F80_F64 */ unsupported, -/* FPROUND_F128_F64 */ f64_func_i64_i64, -/* FPROUND_PPCF128_F64 */ unsupported, -/* FPTOSINT_F32_I32 */ i32_func_f32, -/* FPTOSINT_F32_I64 */ i64_func_f32, -/* FPTOSINT_F32_I128 */ i64_i64_func_f32, -/* FPTOSINT_F64_I32 */ i32_func_f64, -/* FPTOSINT_F64_I64 */ i64_func_f64, -/* FPTOSINT_F64_I128 */ i64_i64_func_f64, -/* FPTOSINT_F80_I32 */ unsupported, -/* FPTOSINT_F80_I64 */ unsupported, -/* FPTOSINT_F80_I128 */ unsupported, -/* FPTOSINT_F128_I32 */ i32_func_i64_i64, -/* FPTOSINT_F128_I64 */ i64_func_i64_i64, -/* FPTOSINT_F128_I128 */ i64_i64_func_i64_i64, -/* FPTOSINT_PPCF128_I32 */ unsupported, -/* FPTOSINT_PPCF128_I64 */ unsupported, -/* FPTOSINT_PPCF128_I128 */ unsupported, -/* FPTOUINT_F32_I32 */ i32_func_f32, -/* FPTOUINT_F32_I64 */ i64_func_f32, -/* FPTOUINT_F32_I128 */ i64_i64_func_f32, -/* FPTOUINT_F64_I32 */ i32_func_f64, -/* FPTOUINT_F64_I64 */ i64_func_f64, -/* FPTOUINT_F64_I128 */ i64_i64_func_f64, -/* FPTOUINT_F80_I32 */ unsupported, -/* FPTOUINT_F80_I64 */ unsupported, -/* FPTOUINT_F80_I128 */ unsupported, -/* FPTOUINT_F128_I32 */ i32_func_i64_i64, -/* FPTOUINT_F128_I64 */ i64_func_i64_i64, -/* FPTOUINT_F128_I128 */ i64_i64_func_i64_i64, -/* FPTOUINT_PPCF128_I32 */ unsupported, -/* FPTOUINT_PPCF128_I64 */ unsupported, -/* FPTOUINT_PPCF128_I128 */ unsupported, -/* SINTTOFP_I32_F32 */ f32_func_i32, -/* SINTTOFP_I32_F64 */ f64_func_i32, -/* SINTTOFP_I32_F80 */ unsupported, -/* SINTTOFP_I32_F128 */ func_iPTR_i32, -/* SINTTOFP_I32_PPCF128 */ unsupported, -/* SINTTOFP_I64_F32 */ f32_func_i64, -/* SINTTOFP_I64_F64 */ f64_func_i64, -/* SINTTOFP_I64_F80 */ unsupported, -/* SINTTOFP_I64_F128 */ func_iPTR_i64, -/* SINTTOFP_I64_PPCF128 */ unsupported, -/* SINTTOFP_I128_F32 */ f32_func_i64_i64, -/* SINTTOFP_I128_F64 */ f64_func_i64_i64, -/* SINTTOFP_I128_F80 */ unsupported, -/* SINTTOFP_I128_F128 */ func_iPTR_i64_i64, -/* SINTTOFP_I128_PPCF128 */ unsupported, -/* UINTTOFP_I32_F32 */ f32_func_i32, -/* UINTTOFP_I32_F64 */ f64_func_i64, -/* UINTTOFP_I32_F80 */ unsupported, -/* UINTTOFP_I32_F128 */ func_iPTR_i32, -/* UINTTOFP_I32_PPCF128 */ unsupported, -/* UINTTOFP_I64_F32 */ f32_func_i64, -/* UINTTOFP_I64_F64 */ f64_func_i64, -/* UINTTOFP_I64_F80 */ unsupported, -/* UINTTOFP_I64_F128 */ func_iPTR_i64, -/* UINTTOFP_I64_PPCF128 */ unsupported, -/* UINTTOFP_I128_F32 */ f32_func_i64_i64, -/* UINTTOFP_I128_F64 */ f64_func_i64_i64, -/* UINTTOFP_I128_F80 */ unsupported, -/* UINTTOFP_I128_F128 */ func_iPTR_i64_i64, -/* UINTTOFP_I128_PPCF128 */ unsupported, - -// COMPARISON -/* OEQ_F32 */ i32_func_f32_f32, -/* OEQ_F64 */ i32_func_f64_f64, -/* OEQ_F128 */ i32_func_i64_i64_i64_i64, -/* OEQ_PPCF128 */ unsupported, -/* UNE_F32 */ i32_func_f32_f32, -/* UNE_F64 */ i32_func_f64_f64, -/* UNE_F128 */ i32_func_i64_i64_i64_i64, -/* UNE_PPCF128 */ unsupported, -/* OGE_F32 */ i32_func_f32_f32, -/* OGE_F64 */ i32_func_f64_f64, -/* OGE_F128 */ i32_func_i64_i64_i64_i64, -/* OGE_PPCF128 */ unsupported, -/* OLT_F32 */ i32_func_f32_f32, -/* OLT_F64 */ i32_func_f64_f64, -/* OLT_F128 */ i32_func_i64_i64_i64_i64, -/* OLT_PPCF128 */ unsupported, -/* OLE_F32 */ i32_func_f32_f32, -/* OLE_F64 */ i32_func_f64_f64, -/* OLE_F128 */ i32_func_i64_i64_i64_i64, -/* OLE_PPCF128 */ unsupported, -/* OGT_F32 */ i32_func_f32_f32, -/* OGT_F64 */ i32_func_f64_f64, -/* OGT_F128 */ i32_func_i64_i64_i64_i64, -/* OGT_PPCF128 */ unsupported, -/* UO_F32 */ i32_func_f32_f32, -/* UO_F64 */ i32_func_f64_f64, -/* UO_F128 */ i32_func_i64_i64_i64_i64, -/* UO_PPCF128 */ unsupported, -/* O_F32 */ i32_func_f32_f32, -/* O_F64 */ i32_func_f64_f64, -/* O_F128 */ i32_func_i64_i64_i64_i64, -/* O_PPCF128 */ unsupported, - -// MEMORY -/* MEMCPY */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMSET */ iPTR_func_iPTR_i32_iPTR, -/* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR, - -// ELEMENT-WISE ATOMIC MEMORY -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, - -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, - -// EXCEPTION HANDLING -/* UNWIND_RESUME */ unsupported, - -// Note: there's two sets of atomics libcalls; see -// for more info on the -// difference between them. - -// Atomic '__sync_*' libcalls. -/* SYNC_VAL_COMPARE_AND_SWAP_1 */ unsupported, -/* SYNC_VAL_COMPARE_AND_SWAP_2 */ unsupported, -/* SYNC_VAL_COMPARE_AND_SWAP_4 */ unsupported, -/* SYNC_VAL_COMPARE_AND_SWAP_8 */ unsupported, -/* SYNC_VAL_COMPARE_AND_SWAP_16 */ unsupported, -/* SYNC_LOCK_TEST_AND_SET_1 */ unsupported, -/* SYNC_LOCK_TEST_AND_SET_2 */ unsupported, -/* SYNC_LOCK_TEST_AND_SET_4 */ unsupported, -/* SYNC_LOCK_TEST_AND_SET_8 */ unsupported, -/* SYNC_LOCK_TEST_AND_SET_16 */ unsupported, -/* SYNC_FETCH_AND_ADD_1 */ unsupported, -/* SYNC_FETCH_AND_ADD_2 */ unsupported, -/* SYNC_FETCH_AND_ADD_4 */ unsupported, -/* SYNC_FETCH_AND_ADD_8 */ unsupported, -/* SYNC_FETCH_AND_ADD_16 */ unsupported, -/* SYNC_FETCH_AND_SUB_1 */ unsupported, -/* SYNC_FETCH_AND_SUB_2 */ unsupported, -/* SYNC_FETCH_AND_SUB_4 */ unsupported, -/* SYNC_FETCH_AND_SUB_8 */ unsupported, -/* SYNC_FETCH_AND_SUB_16 */ unsupported, -/* SYNC_FETCH_AND_AND_1 */ unsupported, -/* SYNC_FETCH_AND_AND_2 */ unsupported, -/* SYNC_FETCH_AND_AND_4 */ unsupported, -/* SYNC_FETCH_AND_AND_8 */ unsupported, -/* SYNC_FETCH_AND_AND_16 */ unsupported, -/* SYNC_FETCH_AND_OR_1 */ unsupported, -/* SYNC_FETCH_AND_OR_2 */ unsupported, -/* SYNC_FETCH_AND_OR_4 */ unsupported, -/* SYNC_FETCH_AND_OR_8 */ unsupported, -/* SYNC_FETCH_AND_OR_16 */ unsupported, -/* SYNC_FETCH_AND_XOR_1 */ unsupported, -/* SYNC_FETCH_AND_XOR_2 */ unsupported, -/* SYNC_FETCH_AND_XOR_4 */ unsupported, -/* SYNC_FETCH_AND_XOR_8 */ unsupported, -/* SYNC_FETCH_AND_XOR_16 */ unsupported, -/* SYNC_FETCH_AND_NAND_1 */ unsupported, -/* SYNC_FETCH_AND_NAND_2 */ unsupported, -/* SYNC_FETCH_AND_NAND_4 */ unsupported, -/* SYNC_FETCH_AND_NAND_8 */ unsupported, -/* SYNC_FETCH_AND_NAND_16 */ unsupported, -/* SYNC_FETCH_AND_MAX_1 */ unsupported, -/* SYNC_FETCH_AND_MAX_2 */ unsupported, -/* SYNC_FETCH_AND_MAX_4 */ unsupported, -/* SYNC_FETCH_AND_MAX_8 */ unsupported, -/* SYNC_FETCH_AND_MAX_16 */ unsupported, -/* SYNC_FETCH_AND_UMAX_1 */ unsupported, -/* SYNC_FETCH_AND_UMAX_2 */ unsupported, -/* SYNC_FETCH_AND_UMAX_4 */ unsupported, -/* SYNC_FETCH_AND_UMAX_8 */ unsupported, -/* SYNC_FETCH_AND_UMAX_16 */ unsupported, -/* SYNC_FETCH_AND_MIN_1 */ unsupported, -/* SYNC_FETCH_AND_MIN_2 */ unsupported, -/* SYNC_FETCH_AND_MIN_4 */ unsupported, -/* SYNC_FETCH_AND_MIN_8 */ unsupported, -/* SYNC_FETCH_AND_MIN_16 */ unsupported, -/* SYNC_FETCH_AND_UMIN_1 */ unsupported, -/* SYNC_FETCH_AND_UMIN_2 */ unsupported, -/* SYNC_FETCH_AND_UMIN_4 */ unsupported, -/* SYNC_FETCH_AND_UMIN_8 */ unsupported, -/* SYNC_FETCH_AND_UMIN_16 */ unsupported, - -// Atomic '__atomic_*' libcalls. -/* ATOMIC_LOAD */ unsupported, -/* ATOMIC_LOAD_1 */ unsupported, -/* ATOMIC_LOAD_2 */ unsupported, -/* ATOMIC_LOAD_4 */ unsupported, -/* ATOMIC_LOAD_8 */ unsupported, -/* ATOMIC_LOAD_16 */ unsupported, - -/* ATOMIC_STORE */ unsupported, -/* ATOMIC_STORE_1 */ unsupported, -/* ATOMIC_STORE_2 */ unsupported, -/* ATOMIC_STORE_4 */ unsupported, -/* ATOMIC_STORE_8 */ unsupported, -/* ATOMIC_STORE_16 */ unsupported, - -/* ATOMIC_EXCHANGE */ unsupported, -/* ATOMIC_EXCHANGE_1 */ unsupported, -/* ATOMIC_EXCHANGE_2 */ unsupported, -/* ATOMIC_EXCHANGE_4 */ unsupported, -/* ATOMIC_EXCHANGE_8 */ unsupported, -/* ATOMIC_EXCHANGE_16 */ unsupported, - -/* ATOMIC_COMPARE_EXCHANGE */ unsupported, -/* ATOMIC_COMPARE_EXCHANGE_1 */ unsupported, -/* ATOMIC_COMPARE_EXCHANGE_2 */ unsupported, -/* ATOMIC_COMPARE_EXCHANGE_4 */ unsupported, -/* ATOMIC_COMPARE_EXCHANGE_8 */ unsupported, -/* ATOMIC_COMPARE_EXCHANGE_16 */ unsupported, - -/* ATOMIC_FETCH_ADD_1 */ unsupported, -/* ATOMIC_FETCH_ADD_2 */ unsupported, -/* ATOMIC_FETCH_ADD_4 */ unsupported, -/* ATOMIC_FETCH_ADD_8 */ unsupported, -/* ATOMIC_FETCH_ADD_16 */ unsupported, - -/* ATOMIC_FETCH_SUB_1 */ unsupported, -/* ATOMIC_FETCH_SUB_2 */ unsupported, -/* ATOMIC_FETCH_SUB_4 */ unsupported, -/* ATOMIC_FETCH_SUB_8 */ unsupported, -/* ATOMIC_FETCH_SUB_16 */ unsupported, - -/* ATOMIC_FETCH_AND_1 */ unsupported, -/* ATOMIC_FETCH_AND_2 */ unsupported, -/* ATOMIC_FETCH_AND_4 */ unsupported, -/* ATOMIC_FETCH_AND_8 */ unsupported, -/* ATOMIC_FETCH_AND_16 */ unsupported, - -/* ATOMIC_FETCH_OR_1 */ unsupported, -/* ATOMIC_FETCH_OR_2 */ unsupported, -/* ATOMIC_FETCH_OR_4 */ unsupported, -/* ATOMIC_FETCH_OR_8 */ unsupported, -/* ATOMIC_FETCH_OR_16 */ unsupported, - -/* ATOMIC_FETCH_XOR_1 */ unsupported, -/* ATOMIC_FETCH_XOR_2 */ unsupported, -/* ATOMIC_FETCH_XOR_4 */ unsupported, -/* ATOMIC_FETCH_XOR_8 */ unsupported, -/* ATOMIC_FETCH_XOR_16 */ unsupported, - -/* ATOMIC_FETCH_NAND_1 */ unsupported, -/* ATOMIC_FETCH_NAND_2 */ unsupported, -/* ATOMIC_FETCH_NAND_4 */ unsupported, -/* ATOMIC_FETCH_NAND_8 */ unsupported, -/* ATOMIC_FETCH_NAND_16 */ unsupported, - -// Stack Protector Fail. -/* STACKPROTECTOR_CHECK_FAIL */ func, - -// Deoptimization. -/* DEOPTIMIZE */ unsupported, +struct RuntimeLibcallSignatureTable { + std::vector Table; + + // Any newly-added libcalls will be unsupported by default. + RuntimeLibcallSignatureTable() : Table(RTLIB::UNKNOWN_LIBCALL, unsupported) { + // Integer + Table[RTLIB::SHL_I16] = i16_func_i16_i16; + Table[RTLIB::SHL_I32] = i32_func_i32_i32; + Table[RTLIB::SHL_I64] = i64_func_i64_i64; + Table[RTLIB::SHL_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::SRL_I16] = i16_func_i16_i16; + Table[RTLIB::SRL_I32] = i32_func_i32_i32; + Table[RTLIB::SRL_I64] = i64_func_i64_i64; + Table[RTLIB::SRL_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::SRA_I16] = i16_func_i16_i16; + Table[RTLIB::SRA_I32] = i32_func_i32_i32; + Table[RTLIB::SRA_I64] = i64_func_i64_i64; + Table[RTLIB::SRA_I128] = i64_i64_func_i64_i64_i32; + Table[RTLIB::MUL_I8] = i8_func_i8_i8; + Table[RTLIB::MUL_I16] = i16_func_i16_i16; + Table[RTLIB::MUL_I32] = i32_func_i32_i32; + Table[RTLIB::MUL_I64] = i64_func_i64_i64; + Table[RTLIB::MUL_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::MULO_I32] = i32_func_i32_i32; + Table[RTLIB::MULO_I64] = i64_func_i64_i64; + Table[RTLIB::MULO_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::SDIV_I8] = i8_func_i8_i8; + Table[RTLIB::SDIV_I16] = i16_func_i16_i16; + Table[RTLIB::SDIV_I32] = i32_func_i32_i32; + Table[RTLIB::SDIV_I64] = i64_func_i64_i64; + Table[RTLIB::SDIV_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UDIV_I8] = i8_func_i8_i8; + Table[RTLIB::UDIV_I16] = i16_func_i16_i16; + Table[RTLIB::UDIV_I32] = i32_func_i32_i32; + Table[RTLIB::UDIV_I64] = i64_func_i64_i64; + Table[RTLIB::UDIV_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::SREM_I8] = i8_func_i8_i8; + Table[RTLIB::SREM_I16] = i16_func_i16_i16; + Table[RTLIB::SREM_I32] = i32_func_i32_i32; + Table[RTLIB::SREM_I64] = i64_func_i64_i64; + Table[RTLIB::SREM_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UREM_I8] = i8_func_i8_i8; + Table[RTLIB::UREM_I16] = i16_func_i16_i16; + Table[RTLIB::UREM_I32] = i32_func_i32_i32; + Table[RTLIB::UREM_I64] = i64_func_i64_i64; + Table[RTLIB::UREM_I128] = i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::SDIVREM_I8] = i8_func_i8_i8; + Table[RTLIB::SDIVREM_I16] = i16_i16_func_i16_i16; + Table[RTLIB::SDIVREM_I32] = i32_i32_func_i32_i32; + Table[RTLIB::SDIVREM_I64] = i64_func_i64_i64; + Table[RTLIB::SDIVREM_I128] = i64_i64_i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::UDIVREM_I8] = i8_func_i8_i8; + Table[RTLIB::UDIVREM_I16] = i16_i16_func_i16_i16; + Table[RTLIB::UDIVREM_I32] = i32_i32_func_i32_i32; + Table[RTLIB::UDIVREM_I64] = i64_i64_func_i64_i64; + Table[RTLIB::UDIVREM_I128] = i64_i64_i64_i64_func_i64_i64_i64_i64; + Table[RTLIB::NEG_I32] = i32_func_i32; + Table[RTLIB::NEG_I64] = i64_func_i64; + + // Floating-point. + // All F80 and PPCF128 routines are unsupported. + Table[RTLIB::ADD_F32] = f32_func_f32_f32; + Table[RTLIB::ADD_F64] = f64_func_f64_f64; + Table[RTLIB::ADD_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::SUB_F32] = f32_func_f32_f32; + Table[RTLIB::SUB_F64] = f64_func_f64_f64; + Table[RTLIB::SUB_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::MUL_F32] = f32_func_f32_f32; + Table[RTLIB::MUL_F64] = f64_func_f64_f64; + Table[RTLIB::MUL_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::DIV_F32] = f32_func_f32_f32; + Table[RTLIB::DIV_F64] = f64_func_f64_f64; + Table[RTLIB::DIV_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::REM_F32] = f32_func_f32_f32; + Table[RTLIB::REM_F64] = f64_func_f64_f64; + Table[RTLIB::REM_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMA_F32] = f32_func_f32_f32_f32; + Table[RTLIB::FMA_F64] = f64_func_f64_f64_f64; + Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64; + Table[RTLIB::POWI_F32] = f32_func_f32_i32; + Table[RTLIB::POWI_F64] = f64_func_f64_i32; + Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::SQRT_F32] = f32_func_f32; + Table[RTLIB::SQRT_F64] = f64_func_f64; + Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG_F32] = f32_func_f32; + Table[RTLIB::LOG_F64] = f64_func_f64; + Table[RTLIB::LOG_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG2_F32] = f32_func_f32; + Table[RTLIB::LOG2_F64] = f64_func_f64; + Table[RTLIB::LOG2_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG10_F32] = f32_func_f32; + Table[RTLIB::LOG10_F64] = f64_func_f64; + Table[RTLIB::LOG10_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP_F32] = f32_func_f32; + Table[RTLIB::EXP_F64] = f64_func_f64; + Table[RTLIB::EXP_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP2_F32] = f32_func_f32; + Table[RTLIB::EXP2_F64] = f64_func_f64; + Table[RTLIB::EXP2_F128] = func_iPTR_i64_i64; + Table[RTLIB::SIN_F32] = f32_func_f32; + Table[RTLIB::SIN_F64] = f64_func_f64; + Table[RTLIB::SIN_F128] = func_iPTR_i64_i64; + Table[RTLIB::COS_F32] = f32_func_f32; + Table[RTLIB::COS_F64] = f64_func_f64; + Table[RTLIB::COS_F128] = func_iPTR_i64_i64; + Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR; + Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR; + Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR; + Table[RTLIB::POW_F32] = f32_func_f32_f32; + Table[RTLIB::POW_F64] = f64_func_f64_f64; + Table[RTLIB::POW_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::CEIL_F32] = f32_func_f32; + Table[RTLIB::CEIL_F64] = f64_func_f64; + Table[RTLIB::CEIL_F128] = func_iPTR_i64_i64; + Table[RTLIB::TRUNC_F32] = f32_func_f32; + Table[RTLIB::TRUNC_F64] = f64_func_f64; + Table[RTLIB::TRUNC_F128] = func_iPTR_i64_i64; + Table[RTLIB::RINT_F32] = f32_func_f32; + Table[RTLIB::RINT_F64] = f64_func_f64; + Table[RTLIB::RINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::NEARBYINT_F32] = f32_func_f32; + Table[RTLIB::NEARBYINT_F64] = f64_func_f64; + Table[RTLIB::NEARBYINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::ROUND_F32] = f32_func_f32; + Table[RTLIB::ROUND_F64] = f64_func_f64; + Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64; + Table[RTLIB::FLOOR_F32] = f32_func_f32; + Table[RTLIB::FLOOR_F64] = f64_func_f64; + Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64; + Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32; + Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64; + Table[RTLIB::COPYSIGN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMIN_F32] = f32_func_f32_f32; + Table[RTLIB::FMIN_F64] = f64_func_f64_f64; + Table[RTLIB::FMIN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMAX_F32] = f32_func_f32_f32; + Table[RTLIB::FMAX_F64] = f64_func_f64_f64; + Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64; + + // Conversion + // All F80 and PPCF128 routines are unspported. + Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64; + Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32; + Table[RTLIB::FPEXT_F32_F64] = f64_func_f32; + Table[RTLIB::FPEXT_F16_F32] = f32_func_i16; + Table[RTLIB::FPROUND_F32_F16] = i16_func_f32; + Table[RTLIB::FPROUND_F64_F32] = f32_func_f64; + Table[RTLIB::FPROUND_F128_F32] = f32_func_i64_i64; + Table[RTLIB::FPROUND_F128_F64] = f64_func_i64_i64; + Table[RTLIB::FPTOSINT_F32_I32] = i32_func_f32; + Table[RTLIB::FPTOSINT_F32_I64] = i64_func_f32; + Table[RTLIB::FPTOSINT_F32_I128] = i64_i64_func_f32; + Table[RTLIB::FPTOSINT_F64_I32] = i32_func_f64; + Table[RTLIB::FPTOSINT_F64_I64] = i64_func_f64; + Table[RTLIB::FPTOSINT_F64_I128] = i64_i64_func_f64; + Table[RTLIB::FPTOSINT_F128_I32] = i32_func_i64_i64; + Table[RTLIB::FPTOSINT_F128_I64] = i64_func_i64_i64; + Table[RTLIB::FPTOSINT_F128_I128] = i64_i64_func_i64_i64; + Table[RTLIB::FPTOUINT_F32_I32] = i32_func_f32; + Table[RTLIB::FPTOUINT_F32_I64] = i64_func_f32; + Table[RTLIB::FPTOUINT_F32_I128] = i64_i64_func_f32; + Table[RTLIB::FPTOUINT_F64_I32] = i32_func_f64; + Table[RTLIB::FPTOUINT_F64_I64] = i64_func_f64; + Table[RTLIB::FPTOUINT_F64_I128] = i64_i64_func_f64; + Table[RTLIB::FPTOUINT_F128_I32] = i32_func_i64_i64; + Table[RTLIB::FPTOUINT_F128_I64] = i64_func_i64_i64; + Table[RTLIB::FPTOUINT_F128_I128] = i64_i64_func_i64_i64; + Table[RTLIB::SINTTOFP_I32_F32] = f32_func_i32; + Table[RTLIB::SINTTOFP_I32_F64] = f64_func_i32; + Table[RTLIB::SINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::SINTTOFP_I64_F32] = f32_func_i64; + Table[RTLIB::SINTTOFP_I64_F64] = f64_func_i64; + Table[RTLIB::SINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::SINTTOFP_I128_F32] = f32_func_i64_i64; + Table[RTLIB::SINTTOFP_I128_F64] = f64_func_i64_i64; + Table[RTLIB::SINTTOFP_I128_F128] = func_iPTR_i64_i64; + Table[RTLIB::UINTTOFP_I32_F32] = f32_func_i32; + Table[RTLIB::UINTTOFP_I32_F64] = f64_func_i64; + Table[RTLIB::UINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::UINTTOFP_I64_F32] = f32_func_i64; + Table[RTLIB::UINTTOFP_I64_F64] = f64_func_i64; + Table[RTLIB::UINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::UINTTOFP_I128_F32] = f32_func_i64_i64; + Table[RTLIB::UINTTOFP_I128_F64] = f64_func_i64_i64; + Table[RTLIB::UINTTOFP_I128_F128] = func_iPTR_i64_i64; + + // Comparison + // ALl F80 and PPCF128 routines are unsupported. + Table[RTLIB::OEQ_F32] = i32_func_f32_f32; + Table[RTLIB::OEQ_F64] = i32_func_f64_f64; + Table[RTLIB::OEQ_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::UNE_F32] = i32_func_f32_f32; + Table[RTLIB::UNE_F64] = i32_func_f64_f64; + Table[RTLIB::UNE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OGE_F32] = i32_func_f32_f32; + Table[RTLIB::OGE_F64] = i32_func_f64_f64; + Table[RTLIB::OGE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OLT_F32] = i32_func_f32_f32; + Table[RTLIB::OLT_F64] = i32_func_f64_f64; + Table[RTLIB::OLT_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OLE_F32] = i32_func_f32_f32; + Table[RTLIB::OLE_F64] = i32_func_f64_f64; + Table[RTLIB::OLE_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::OGT_F32] = i32_func_f32_f32; + Table[RTLIB::OGT_F64] = i32_func_f64_f64; + Table[RTLIB::OGT_F128] = i32_func_i64_i64_i64_i64; + Table[RTLIB::UO_F32] = i32_func_f32_f32; + Table[RTLIB::UO_F64] = i32_func_f64_f64; + Table[RTLIB::UO_F128] = i32_func_i64_i64_i64_i64; + // O_FXX has the weird property that it uses the same libcall name as UO_FXX + // This breaks our name-based lookup. Fortunately only the UO family of + // libcalls appears to be actually used. + Table[RTLIB::O_F32] = unsupported; + Table[RTLIB::O_F64] = unsupported; + Table[RTLIB::O_F128] = unsupported; + + // Memory + Table[RTLIB::MEMCPY] = iPTR_func_iPTR_iPTR_iPTR; + Table[RTLIB::MEMSET] = iPTR_func_iPTR_i32_iPTR; + Table[RTLIB::MEMMOVE] = iPTR_func_iPTR_iPTR_iPTR; + + // Element-wise Atomic memory + // TODO: Fix these when we implement atomic support + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = unsupported; + Table[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = unsupported; + + // Atomic '__sync_*' libcalls. + // TODO: Fix these when we implement atomic support + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = unsupported; + Table[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = unsupported; + Table[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_ADD_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_SUB_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_AND_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_OR_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_XOR_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_NAND_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MAX_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMAX_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_MIN_16] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_1] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_2] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_4] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_8] = unsupported; + Table[RTLIB::SYNC_FETCH_AND_UMIN_16] = unsupported; + + // Atomic '__atomic_*' libcalls. + // TODO: Fix these when we implement atomic support + Table[RTLIB::ATOMIC_LOAD] = unsupported; + Table[RTLIB::ATOMIC_LOAD_1] = unsupported; + Table[RTLIB::ATOMIC_LOAD_2] = unsupported; + Table[RTLIB::ATOMIC_LOAD_4] = unsupported; + Table[RTLIB::ATOMIC_LOAD_8] = unsupported; + Table[RTLIB::ATOMIC_LOAD_16] = unsupported; + + Table[RTLIB::ATOMIC_STORE] = unsupported; + Table[RTLIB::ATOMIC_STORE_1] = unsupported; + Table[RTLIB::ATOMIC_STORE_2] = unsupported; + Table[RTLIB::ATOMIC_STORE_4] = unsupported; + Table[RTLIB::ATOMIC_STORE_8] = unsupported; + Table[RTLIB::ATOMIC_STORE_16] = unsupported; + + Table[RTLIB::ATOMIC_EXCHANGE] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_1] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_2] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_4] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_8] = unsupported; + Table[RTLIB::ATOMIC_EXCHANGE_16] = unsupported; + + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = unsupported; + Table[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_ADD_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_ADD_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_SUB_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_SUB_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_AND_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_AND_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_OR_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_OR_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_XOR_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_XOR_16] = unsupported; + + Table[RTLIB::ATOMIC_FETCH_NAND_1] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_2] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_4] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_8] = unsupported; + Table[RTLIB::ATOMIC_FETCH_NAND_16] = unsupported; + } }; -static const char * -RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = { -/* SHL_I16 */ "__ashlhi3", -/* SHL_I32 */ "__ashlsi3", -/* SHL_I64 */ "__ashldi3", -/* SHL_I128 */ "__ashlti3", -/* SRL_I16 */ "__lshrhi3", -/* SRL_I32 */ "__lshrsi3", -/* SRL_I64 */ "__lshrdi3", -/* SRL_I128 */ "__lshrti3", -/* SRA_I16 */ "__ashrhi3", -/* SRA_I32 */ "__ashrsi3", -/* SRA_I64 */ "__ashrdi3", -/* SRA_I128 */ "__ashrti3", -/* MUL_I8 */ "__mulqi3", -/* MUL_I16 */ "__mulhi3", -/* MUL_I32 */ "__mulsi3", -/* MUL_I64 */ "__muldi3", -/* MUL_I128 */ "__multi3", -/* MULO_I32 */ "__mulosi4", -/* MULO_I64 */ "__mulodi4", -/* MULO_I128 */ "__muloti4", -/* SDIV_I8 */ "__divqi3", -/* SDIV_I16 */ "__divhi3", -/* SDIV_I32 */ "__divsi3", -/* SDIV_I64 */ "__divdi3", -/* SDIV_I128 */ "__divti3", -/* UDIV_I8 */ "__udivqi3", -/* UDIV_I16 */ "__udivhi3", -/* UDIV_I32 */ "__udivsi3", -/* UDIV_I64 */ "__udivdi3", -/* UDIV_I128 */ "__udivti3", -/* SREM_I8 */ "__modqi3", -/* SREM_I16 */ "__modhi3", -/* SREM_I32 */ "__modsi3", -/* SREM_I64 */ "__moddi3", -/* SREM_I128 */ "__modti3", -/* UREM_I8 */ "__umodqi3", -/* UREM_I16 */ "__umodhi3", -/* UREM_I32 */ "__umodsi3", -/* UREM_I64 */ "__umoddi3", -/* UREM_I128 */ "__umodti3", -/* SDIVREM_I8 */ nullptr, -/* SDIVREM_I16 */ nullptr, -/* SDIVREM_I32 */ nullptr, -/* SDIVREM_I64 */ nullptr, -/* SDIVREM_I128 */ nullptr, -/* UDIVREM_I8 */ nullptr, -/* UDIVREM_I16 */ nullptr, -/* UDIVREM_I32 */ nullptr, -/* UDIVREM_I64 */ nullptr, -/* UDIVREM_I128 */ nullptr, -/* NEG_I32 */ "__negsi2", -/* NEG_I64 */ "__negdi2", -/* ADD_F32 */ "__addsf3", -/* ADD_F64 */ "__adddf3", -/* ADD_F80 */ nullptr, -/* ADD_F128 */ "__addtf3", -/* ADD_PPCF128 */ nullptr, -/* SUB_F32 */ "__subsf3", -/* SUB_F64 */ "__subdf3", -/* SUB_F80 */ nullptr, -/* SUB_F128 */ "__subtf3", -/* SUB_PPCF128 */ nullptr, -/* MUL_F32 */ "__mulsf3", -/* MUL_F64 */ "__muldf3", -/* MUL_F80 */ nullptr, -/* MUL_F128 */ "__multf3", -/* MUL_PPCF128 */ nullptr, -/* DIV_F32 */ "__divsf3", -/* DIV_F64 */ "__divdf3", -/* DIV_F80 */ nullptr, -/* DIV_F128 */ "__divtf3", -/* DIV_PPCF128 */ nullptr, -/* REM_F32 */ "fmodf", -/* REM_F64 */ "fmod", -/* REM_F80 */ nullptr, -/* REM_F128 */ "fmodl", -/* REM_PPCF128 */ nullptr, -/* FMA_F32 */ "fmaf", -/* FMA_F64 */ "fma", -/* FMA_F80 */ nullptr, -/* FMA_F128 */ "fmal", -/* FMA_PPCF128 */ nullptr, -/* POWI_F32 */ "__powisf2", -/* POWI_F64 */ "__powidf2", -/* POWI_F80 */ nullptr, -/* POWI_F128 */ "__powitf2", -/* POWI_PPCF128 */ nullptr, -/* SQRT_F32 */ "sqrtf", -/* SQRT_F64 */ "sqrt", -/* SQRT_F80 */ nullptr, -/* SQRT_F128 */ "sqrtl", -/* SQRT_PPCF128 */ nullptr, -/* LOG_F32 */ "logf", -/* LOG_F64 */ "log", -/* LOG_F80 */ nullptr, -/* LOG_F128 */ "logl", -/* LOG_PPCF128 */ nullptr, -/* LOG2_F32 */ "log2f", -/* LOG2_F64 */ "log2", -/* LOG2_F80 */ nullptr, -/* LOG2_F128 */ "log2l", -/* LOG2_PPCF128 */ nullptr, -/* LOG10_F32 */ "log10f", -/* LOG10_F64 */ "log10", -/* LOG10_F80 */ nullptr, -/* LOG10_F128 */ "log10l", -/* LOG10_PPCF128 */ nullptr, -/* EXP_F32 */ "expf", -/* EXP_F64 */ "exp", -/* EXP_F80 */ nullptr, -/* EXP_F128 */ "expl", -/* EXP_PPCF128 */ nullptr, -/* EXP2_F32 */ "exp2f", -/* EXP2_F64 */ "exp2", -/* EXP2_F80 */ nullptr, -/* EXP2_F128 */ "exp2l", -/* EXP2_PPCF128 */ nullptr, -/* SIN_F32 */ "sinf", -/* SIN_F64 */ "sin", -/* SIN_F80 */ nullptr, -/* SIN_F128 */ "sinl", -/* SIN_PPCF128 */ nullptr, -/* COS_F32 */ "cosf", -/* COS_F64 */ "cos", -/* COS_F80 */ nullptr, -/* COS_F128 */ "cosl", -/* COS_PPCF128 */ nullptr, -/* SINCOS_F32 */ "sincosf", -/* SINCOS_F64 */ "sincos", -/* SINCOS_F80 */ nullptr, -/* SINCOS_F128 */ "sincosl", -/* SINCOS_PPCF128 */ nullptr, -/* POW_F32 */ "powf", -/* POW_F64 */ "pow", -/* POW_F80 */ nullptr, -/* POW_F128 */ "powl", -/* POW_PPCF128 */ nullptr, -/* CEIL_F32 */ "ceilf", -/* CEIL_F64 */ "ceil", -/* CEIL_F80 */ nullptr, -/* CEIL_F128 */ "ceill", -/* CEIL_PPCF128 */ nullptr, -/* TRUNC_F32 */ "truncf", -/* TRUNC_F64 */ "trunc", -/* TRUNC_F80 */ nullptr, -/* TRUNC_F128 */ "truncl", -/* TRUNC_PPCF128 */ nullptr, -/* RINT_F32 */ "rintf", -/* RINT_F64 */ "rint", -/* RINT_F80 */ nullptr, -/* RINT_F128 */ "rintl", -/* RINT_PPCF128 */ nullptr, -/* NEARBYINT_F32 */ "nearbyintf", -/* NEARBYINT_F64 */ "nearbyint", -/* NEARBYINT_F80 */ nullptr, -/* NEARBYINT_F128 */ "nearbyintl", -/* NEARBYINT_PPCF128 */ nullptr, -/* ROUND_F32 */ "roundf", -/* ROUND_F64 */ "round", -/* ROUND_F80 */ nullptr, -/* ROUND_F128 */ "roundl", -/* ROUND_PPCF128 */ nullptr, -/* FLOOR_F32 */ "floorf", -/* FLOOR_F64 */ "floor", -/* FLOOR_F80 */ nullptr, -/* FLOOR_F128 */ "floorl", -/* FLOOR_PPCF128 */ nullptr, -/* COPYSIGN_F32 */ "copysignf", -/* COPYSIGN_F64 */ "copysign", -/* COPYSIGN_F80 */ nullptr, -/* COPYSIGN_F128 */ "copysignl", -/* COPYSIGN_PPCF128 */ nullptr, -/* FMIN_F32 */ "fminf", -/* FMIN_F64 */ "fmin", -/* FMIN_F80 */ nullptr, -/* FMIN_F128 */ "fminl", -/* FMIN_PPCF128 */ nullptr, -/* FMAX_F32 */ "fmaxf", -/* FMAX_F64 */ "fmax", -/* FMAX_F80 */ nullptr, -/* FMAX_F128 */ "fmaxl", -/* FMAX_PPCF128 */ nullptr, -/* FPEXT_F32_PPCF128 */ nullptr, -/* FPEXT_F64_PPCF128 */ nullptr, -/* FPEXT_F64_F128 */ "__extenddftf2", -/* FPEXT_F32_F128 */ "__extendsftf2", -/* FPEXT_F32_F64 */ "__extendsfdf2", -/* FPEXT_F16_F32 */ "__gnu_h2f_ieee", -/* FPROUND_F32_F16 */ "__gnu_f2h_ieee", -/* FPROUND_F64_F16 */ nullptr, -/* FPROUND_F80_F16 */ nullptr, -/* FPROUND_F128_F16 */ nullptr, -/* FPROUND_PPCF128_F16 */ nullptr, -/* FPROUND_F64_F32 */ "__truncdfsf2", -/* FPROUND_F80_F32 */ "__truncxfsf2", -/* FPROUND_F128_F32 */ "__trunctfsf2", -/* FPROUND_PPCF128_F32 */ nullptr, -/* FPROUND_F80_F64 */ "__truncxfdf2", -/* FPROUND_F128_F64 */ "__trunctfdf2", -/* FPROUND_PPCF128_F64 */ nullptr, -/* FPTOSINT_F32_I32 */ "__fixsfsi", -/* FPTOSINT_F32_I64 */ "__fixsfdi", -/* FPTOSINT_F32_I128 */ "__fixsfti", -/* FPTOSINT_F64_I32 */ "__fixdfsi", -/* FPTOSINT_F64_I64 */ "__fixdfdi", -/* FPTOSINT_F64_I128 */ "__fixdfti", -/* FPTOSINT_F80_I32 */ "__fixxfsi", -/* FPTOSINT_F80_I64 */ "__fixxfdi", -/* FPTOSINT_F80_I128 */ "__fixxfti", -/* FPTOSINT_F128_I32 */ "__fixtfsi", -/* FPTOSINT_F128_I64 */ "__fixtfdi", -/* FPTOSINT_F128_I128 */ "__fixtfti", -/* FPTOSINT_PPCF128_I32 */ nullptr, -/* FPTOSINT_PPCF128_I64 */ nullptr, -/* FPTOSINT_PPCF128_I128 */ nullptr, -/* FPTOUINT_F32_I32 */ "__fixunssfsi", -/* FPTOUINT_F32_I64 */ "__fixunssfdi", -/* FPTOUINT_F32_I128 */ "__fixunssfti", -/* FPTOUINT_F64_I32 */ "__fixunsdfsi", -/* FPTOUINT_F64_I64 */ "__fixunsdfdi", -/* FPTOUINT_F64_I128 */ "__fixunsdfti", -/* FPTOUINT_F80_I32 */ "__fixunsxfsi", -/* FPTOUINT_F80_I64 */ "__fixunsxfdi", -/* FPTOUINT_F80_I128 */ "__fixunsxfti", -/* FPTOUINT_F128_I32 */ "__fixunstfsi", -/* FPTOUINT_F128_I64 */ "__fixunstfdi", -/* FPTOUINT_F128_I128 */ "__fixunstfti", -/* FPTOUINT_PPCF128_I32 */ nullptr, -/* FPTOUINT_PPCF128_I64 */ nullptr, -/* FPTOUINT_PPCF128_I128 */ nullptr, -/* SINTTOFP_I32_F32 */ "__floatsisf", -/* SINTTOFP_I32_F64 */ "__floatsidf", -/* SINTTOFP_I32_F80 */ nullptr, -/* SINTTOFP_I32_F128 */ "__floatsitf", -/* SINTTOFP_I32_PPCF128 */ nullptr, -/* SINTTOFP_I64_F32 */ "__floatdisf", -/* SINTTOFP_I64_F64 */ "__floatdidf", -/* SINTTOFP_I64_F80 */ nullptr, -/* SINTTOFP_I64_F128 */ "__floatditf", -/* SINTTOFP_I64_PPCF128 */ nullptr, -/* SINTTOFP_I128_F32 */ "__floattisf", -/* SINTTOFP_I128_F64 */ "__floattidf", -/* SINTTOFP_I128_F80 */ nullptr, -/* SINTTOFP_I128_F128 */ "__floattitf", -/* SINTTOFP_I128_PPCF128 */ nullptr, -/* UINTTOFP_I32_F32 */ "__floatunsisf", -/* UINTTOFP_I32_F64 */ "__floatunsidf", -/* UINTTOFP_I32_F80 */ nullptr, -/* UINTTOFP_I32_F128 */ "__floatunsitf", -/* UINTTOFP_I32_PPCF128 */ nullptr, -/* UINTTOFP_I64_F32 */ "__floatundisf", -/* UINTTOFP_I64_F64 */ "__floatundidf", -/* UINTTOFP_I64_F80 */ nullptr, -/* UINTTOFP_I64_F128 */ "__floatunditf", -/* UINTTOFP_I64_PPCF128 */ nullptr, -/* UINTTOFP_I128_F32 */ "__floatuntisf", -/* UINTTOFP_I128_F64 */ "__floatuntidf", -/* UINTTOFP_I128_F80 */ nullptr, -/* UINTTOFP_I128_F128 */ "__floatuntitf", -/* UINTTOFP_I128_PPCF128 */ nullptr, -/* OEQ_F32 */ "__eqsf2", -/* OEQ_F64 */ "__eqdf2", -/* OEQ_F128 */ "__eqtf2", -/* OEQ_PPCF128 */ nullptr, -/* UNE_F32 */ "__nesf2", -/* UNE_F64 */ "__nedf2", -/* UNE_F128 */ "__netf2", -/* UNE_PPCF128 */ nullptr, -/* OGE_F32 */ "__gesf2", -/* OGE_F64 */ "__gedf2", -/* OGE_F128 */ "__getf2", -/* OGE_PPCF128 */ nullptr, -/* OLT_F32 */ "__ltsf2", -/* OLT_F64 */ "__ltdf2", -/* OLT_F128 */ "__lttf2", -/* OLT_PPCF128 */ nullptr, -/* OLE_F32 */ "__lesf2", -/* OLE_F64 */ "__ledf2", -/* OLE_F128 */ "__letf2", -/* OLE_PPCF128 */ nullptr, -/* OGT_F32 */ "__gtsf2", -/* OGT_F64 */ "__gtdf2", -/* OGT_F128 */ "__gttf2", -/* OGT_PPCF128 */ nullptr, -/* UO_F32 */ "__unordsf2", -/* UO_F64 */ "__unorddf2", -/* UO_F128 */ "__unordtf2", -/* UO_PPCF128 */ nullptr, -/* O_F32 */ "__unordsf2", -/* O_F64 */ "__unorddf2", -/* O_F128 */ "__unordtf2", -/* O_PPCF128 */ nullptr, -/* MEMCPY */ "memcpy", -/* MEMMOVE */ "memset", -/* MEMSET */ "memmove", -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, -/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, -/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, -/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, -/* UNWIND_RESUME */ "_Unwind_Resume", -/* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", -/* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", -/* SYNC_VAL_COMPARE_AND_SWAP_4 */ "__sync_val_compare_and_swap_4", -/* SYNC_VAL_COMPARE_AND_SWAP_8 */ "__sync_val_compare_and_swap_8", -/* SYNC_VAL_COMPARE_AND_SWAP_16 */ "__sync_val_compare_and_swap_16", -/* SYNC_LOCK_TEST_AND_SET_1 */ "__sync_lock_test_and_set_1", -/* SYNC_LOCK_TEST_AND_SET_2 */ "__sync_lock_test_and_set_2", -/* SYNC_LOCK_TEST_AND_SET_4 */ "__sync_lock_test_and_set_4", -/* SYNC_LOCK_TEST_AND_SET_8 */ "__sync_lock_test_and_set_8", -/* SYNC_LOCK_TEST_AND_SET_16 */ "__sync_lock_test_and_set_16", -/* SYNC_FETCH_AND_ADD_1 */ "__sync_fetch_and_add_1", -/* SYNC_FETCH_AND_ADD_2 */ "__sync_fetch_and_add_2", -/* SYNC_FETCH_AND_ADD_4 */ "__sync_fetch_and_add_4", -/* SYNC_FETCH_AND_ADD_8 */ "__sync_fetch_and_add_8", -/* SYNC_FETCH_AND_ADD_16 */ "__sync_fetch_and_add_16", -/* SYNC_FETCH_AND_SUB_1 */ "__sync_fetch_and_sub_1", -/* SYNC_FETCH_AND_SUB_2 */ "__sync_fetch_and_sub_2", -/* SYNC_FETCH_AND_SUB_4 */ "__sync_fetch_and_sub_4", -/* SYNC_FETCH_AND_SUB_8 */ "__sync_fetch_and_sub_8", -/* SYNC_FETCH_AND_SUB_16 */ "__sync_fetch_and_sub_16", -/* SYNC_FETCH_AND_AND_1 */ "__sync_fetch_and_and_1", -/* SYNC_FETCH_AND_AND_2 */ "__sync_fetch_and_and_2", -/* SYNC_FETCH_AND_AND_4 */ "__sync_fetch_and_and_4", -/* SYNC_FETCH_AND_AND_8 */ "__sync_fetch_and_and_8", -/* SYNC_FETCH_AND_AND_16 */ "__sync_fetch_and_and_16", -/* SYNC_FETCH_AND_OR_1 */ "__sync_fetch_and_or_1", -/* SYNC_FETCH_AND_OR_2 */ "__sync_fetch_and_or_2", -/* SYNC_FETCH_AND_OR_4 */ "__sync_fetch_and_or_4", -/* SYNC_FETCH_AND_OR_8 */ "__sync_fetch_and_or_8", -/* SYNC_FETCH_AND_OR_16 */ "__sync_fetch_and_or_16", -/* SYNC_FETCH_AND_XOR_1 */ "__sync_fetch_and_xor_1", -/* SYNC_FETCH_AND_XOR_2 */ "__sync_fetch_and_xor_2", -/* SYNC_FETCH_AND_XOR_4 */ "__sync_fetch_and_xor_4", -/* SYNC_FETCH_AND_XOR_8 */ "__sync_fetch_and_xor_8", -/* SYNC_FETCH_AND_XOR_16 */ "__sync_fetch_and_xor_16", -/* SYNC_FETCH_AND_NAND_1 */ "__sync_fetch_and_nand_1", -/* SYNC_FETCH_AND_NAND_2 */ "__sync_fetch_and_nand_2", -/* SYNC_FETCH_AND_NAND_4 */ "__sync_fetch_and_nand_4", -/* SYNC_FETCH_AND_NAND_8 */ "__sync_fetch_and_nand_8", -/* SYNC_FETCH_AND_NAND_16 */ "__sync_fetch_and_nand_16", -/* SYNC_FETCH_AND_MAX_1 */ "__sync_fetch_and_max_1", -/* SYNC_FETCH_AND_MAX_2 */ "__sync_fetch_and_max_2", -/* SYNC_FETCH_AND_MAX_4 */ "__sync_fetch_and_max_4", -/* SYNC_FETCH_AND_MAX_8 */ "__sync_fetch_and_max_8", -/* SYNC_FETCH_AND_MAX_16 */ "__sync_fetch_and_max_16", -/* SYNC_FETCH_AND_UMAX_1 */ "__sync_fetch_and_umax_1", -/* SYNC_FETCH_AND_UMAX_2 */ "__sync_fetch_and_umax_2", -/* SYNC_FETCH_AND_UMAX_4 */ "__sync_fetch_and_umax_4", -/* SYNC_FETCH_AND_UMAX_8 */ "__sync_fetch_and_umax_8", -/* SYNC_FETCH_AND_UMAX_16 */ "__sync_fetch_and_umax_16", -/* SYNC_FETCH_AND_MIN_1 */ "__sync_fetch_and_min_1", -/* SYNC_FETCH_AND_MIN_2 */ "__sync_fetch_and_min_2", -/* SYNC_FETCH_AND_MIN_4 */ "__sync_fetch_and_min_4", -/* SYNC_FETCH_AND_MIN_8 */ "__sync_fetch_and_min_8", -/* SYNC_FETCH_AND_MIN_16 */ "__sync_fetch_and_min_16", -/* SYNC_FETCH_AND_UMIN_1 */ "__sync_fetch_and_umin_1", -/* SYNC_FETCH_AND_UMIN_2 */ "__sync_fetch_and_umin_2", -/* SYNC_FETCH_AND_UMIN_4 */ "__sync_fetch_and_umin_4", -/* SYNC_FETCH_AND_UMIN_8 */ "__sync_fetch_and_umin_8", -/* SYNC_FETCH_AND_UMIN_16 */ "__sync_fetch_and_umin_16", - -/* ATOMIC_LOAD */ "__atomic_load", -/* ATOMIC_LOAD_1 */ "__atomic_load_1", -/* ATOMIC_LOAD_2 */ "__atomic_load_2", -/* ATOMIC_LOAD_4 */ "__atomic_load_4", -/* ATOMIC_LOAD_8 */ "__atomic_load_8", -/* ATOMIC_LOAD_16 */ "__atomic_load_16", - -/* ATOMIC_STORE */ "__atomic_store", -/* ATOMIC_STORE_1 */ "__atomic_store_1", -/* ATOMIC_STORE_2 */ "__atomic_store_2", -/* ATOMIC_STORE_4 */ "__atomic_store_4", -/* ATOMIC_STORE_8 */ "__atomic_store_8", -/* ATOMIC_STORE_16 */ "__atomic_store_16", - -/* ATOMIC_EXCHANGE */ "__atomic_exchange", -/* ATOMIC_EXCHANGE_1 */ "__atomic_exchange_1", -/* ATOMIC_EXCHANGE_2 */ "__atomic_exchange_2", -/* ATOMIC_EXCHANGE_4 */ "__atomic_exchange_4", -/* ATOMIC_EXCHANGE_8 */ "__atomic_exchange_8", -/* ATOMIC_EXCHANGE_16 */ "__atomic_exchange_16", - -/* ATOMIC_COMPARE_EXCHANGE */ "__atomic_compare_exchange", -/* ATOMIC_COMPARE_EXCHANGE_1 */ "__atomic_compare_exchange_1", -/* ATOMIC_COMPARE_EXCHANGE_2 */ "__atomic_compare_exchange_2", -/* ATOMIC_COMPARE_EXCHANGE_4 */ "__atomic_compare_exchange_4", -/* ATOMIC_COMPARE_EXCHANGE_8 */ "__atomic_compare_exchange_8", -/* ATOMIC_COMPARE_EXCHANGE_16 */ "__atomic_compare_exchange_16", +ManagedStatic RuntimeLibcallSignatures; + +// Maps libcall names to their RTLIB::Libcall number. Builds the map in a +// constructor for use with ManagedStatic +struct StaticLibcallNameMap { + StringMap Map; + StaticLibcallNameMap() { +#define HANDLE_LIBCALL(code, name) \ + if ((const char *)name && \ + RuntimeLibcallSignatures->Table[RTLIB::code] != unsupported) { \ + assert(Map.find(StringRef::withNullAsEmpty(name)) == Map.end() && \ + "duplicate libcall names in name map"); \ + Map[StringRef::withNullAsEmpty(name)] = RTLIB::code; \ + } +#include "llvm/CodeGen/RuntimeLibcalls.def" +#undef HANDLE_LIBCALL + } +}; -/* ATOMIC_FETCH_ADD_1 */ "__atomic_fetch_add_1", -/* ATOMIC_FETCH_ADD_2 */ "__atomic_fetch_add_2", -/* ATOMIC_FETCH_ADD_4 */ "__atomic_fetch_add_4", -/* ATOMIC_FETCH_ADD_8 */ "__atomic_fetch_add_8", -/* ATOMIC_FETCH_ADD_16 */ "__atomic_fetch_add_16", -/* ATOMIC_FETCH_SUB_1 */ "__atomic_fetch_sub_1", -/* ATOMIC_FETCH_SUB_2 */ "__atomic_fetch_sub_2", -/* ATOMIC_FETCH_SUB_4 */ "__atomic_fetch_sub_4", -/* ATOMIC_FETCH_SUB_8 */ "__atomic_fetch_sub_8", -/* ATOMIC_FETCH_SUB_16 */ "__atomic_fetch_sub_16", -/* ATOMIC_FETCH_AND_1 */ "__atomic_fetch_and_1", -/* ATOMIC_FETCH_AND_2 */ "__atomic_fetch_and_2", -/* ATOMIC_FETCH_AND_4 */ "__atomic_fetch_and_4", -/* ATOMIC_FETCH_AND_8 */ "__atomic_fetch_and_8", -/* ATOMIC_FETCH_AND_16 */ "__atomic_fetch_and_16", -/* ATOMIC_FETCH_OR_1 */ "__atomic_fetch_or_1", -/* ATOMIC_FETCH_OR_2 */ "__atomic_fetch_or_2", -/* ATOMIC_FETCH_OR_4 */ "__atomic_fetch_or_4", -/* ATOMIC_FETCH_OR_8 */ "__atomic_fetch_or_8", -/* ATOMIC_FETCH_OR_16 */ "__atomic_fetch_or_16", -/* ATOMIC_FETCH_XOR_1 */ "__atomic_fetch_xor_1", -/* ATOMIC_FETCH_XOR_2 */ "__atomic_fetch_xor_2", -/* ATOMIC_FETCH_XOR_4 */ "__atomic_fetch_xor_4", -/* ATOMIC_FETCH_XOR_8 */ "__atomic_fetch_xor_8", -/* ATOMIC_FETCH_XOR_16 */ "__atomic_fetch_xor_16", -/* ATOMIC_FETCH_NAND_1 */ "__atomic_fetch_nand_1", -/* ATOMIC_FETCH_NAND_2 */ "__atomic_fetch_nand_2", -/* ATOMIC_FETCH_NAND_4 */ "__atomic_fetch_nand_4", -/* ATOMIC_FETCH_NAND_8 */ "__atomic_fetch_nand_8", -/* ATOMIC_FETCH_NAND_16 */ "__atomic_fetch_nand_16", +} // end anonymous namespace -/* STACKPROTECTOR_CHECK_FAIL */ "__stack_chk_fail", -/* DEOPTIMIZE */ "__llvm_deoptimize", -}; void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, RTLIB::Libcall LC, SmallVectorImpl &Rets, @@ -1001,7 +491,8 @@ void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, WebAssembly::ExprType::I64 : WebAssembly::ExprType::I32; - switch (RuntimeLibcallSignatures[LC]) { + auto& Table = RuntimeLibcallSignatures->Table; + switch (Table[LC]) { case func: break; case f32_func_f32: @@ -1309,15 +800,14 @@ void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, } } +static ManagedStatic LibcallNameMap; +// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed +// other than here, just roll its logic into this version. void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name, SmallVectorImpl &Rets, SmallVectorImpl &Params) { - assert(strcmp(RuntimeLibcallNames[RTLIB::DEOPTIMIZE], "__llvm_deoptimize") == - 0); - - for (size_t i = 0, e = RTLIB::UNKNOWN_LIBCALL; i < e; ++i) - if (RuntimeLibcallNames[i] && strcmp(RuntimeLibcallNames[i], Name) == 0) - return GetSignature(Subtarget, RTLIB::Libcall(i), Rets, Params); - - llvm_unreachable("unexpected runtime library name"); + auto& Map = LibcallNameMap->Map; + auto val = Map.find(Name); + assert(val != Map.end() && "unexpected runtime library name"); + return GetSignature(Subtarget, val->second, Rets, Params); } diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 8173364fa880..22a5a9099e72 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -29,7 +29,7 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 9e122a5f1574..78602a35e649 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -41,8 +41,8 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, const std::string &FS, const TargetMachine &TM) : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false), - HasAtomics(false), HasNontrappingFPToInt(false), CPUString(CPU), - TargetTriple(TT), FrameLowering(), + HasAtomics(false), HasNontrappingFPToInt(false), HasSignExt(false), + CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), TLInfo(TM, *this) {} diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h index a6bf0b6d54f6..c999f501a9c9 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -32,6 +32,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasSIMD128; bool HasAtomics; bool HasNontrappingFPToInt; + bool HasSignExt; /// String name of used CPU. std::string CPUString; @@ -78,6 +79,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasSIMD128() const { return HasSIMD128; } bool hasAtomics() const { return HasAtomics; } bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; } + bool hasSignExt() const { return HasSignExt; } /// Parses features string setting specified subtarget options. Definition of /// function is auto generated by tblgen. diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 8ad74d9db7b0..d38cde74d2ec 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -146,10 +146,9 @@ class WebAssemblyPassConfig final : public TargetPassConfig { }; } // end anonymous namespace -TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); - }); +TargetTransformInfo +WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); } TargetPassConfig * @@ -175,6 +174,9 @@ void WebAssemblyPassConfig::addIRPasses() { // control specifically what gets lowered. addPass(createAtomicExpandPass()); + // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. + addPass(createWebAssemblyLowerGlobalDtors()); + // Fix function bitcasts, as WebAssembly requires caller and callee signatures // to match. addPass(createWebAssemblyFixFunctionBitcasts()); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index 224849526514..dd826befd117 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -43,8 +43,7 @@ class WebAssemblyTargetMachine final : public LLVMTargetMachine { return TLOF.get(); } - /// \brief Get the TargetIRAnalysis for this target. - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; bool usesPhysRegsForPEI() const override { return false; } }; diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt index 16694a7a863f..242f96fe2194 100644 --- a/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -21,6 +21,10 @@ comp-goto-1.c 980526-1.c 990208-1.c +label13.C O0 +label13a.C O0 +label3.C +pr42462.C O0 # WebAssembly hasn't implemented (will never?) __builtin_return_address 20010122-1.c @@ -76,12 +80,52 @@ pr41935.c 920728-1.c pr28865.c widechar-2.c +attr-alias-1.C +attr-alias-2.C +attr-ifunc-1.C +attr-ifunc-2.C +attr-ifunc-3.C +attr-ifunc-4.C +complit12.C +va-arg-pack-1.C +va-arg-pack-len-1.C +builtin-line1.C +builtin-location.C +devirt-6.C # bad main signature +devirt-13.C # bad main signature +devirt-14.C # bad main signature +devirt-21.C # bad main signature +devirt-23.C # bad main signature # Untriaged: Assertion failure in WasmObjectWriter::applyRelocations 20071220-2.c wasm-o,O0 -# Untriaged: Assertion failure in WasmObjectWriter::getFunctionType -20051012-1.c wasm-o,O0 -920501-1.c wasm-o,O0 -921208-2.c wasm-o,O0 -call-trap-1.c wasm-o,O0 +# Untriaged C++ failures +spec5.C +addr1.C +ef_test.C +friend18.C +member2.C +new39.C +new40.C +nrv8.C +offsetof9.C +opaque-1.C +pr19650.C +pr37146-1.C +pr46149.C +pr59470.C +rtti2.C +self1.C +thread_local3.C +thread_local3g.C +thread_local4.C +thread_local4g.C +thread_local5.C +thread_local5g.C +type-generic-1.C +vbase8-10.C +vbase8-21.C +vbase8-22.C +vbase8-4.C +vector1.C diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 1c38757b4b03..2c376fd062ca 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -610,7 +610,7 @@ class X86AddressSanitizer32 : public X86AddressSanitizer { EmitInstruction(Out, MCInstBuilder(X86::CLD)); EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS)); - EmitInstruction(Out, MCInstBuilder(X86::AND64ri8) + EmitInstruction(Out, MCInstBuilder(X86::AND32ri8) .addReg(X86::ESP) .addReg(X86::ESP) .addImm(-16)); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 4dc0466344b7..96b464a2f13d 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -825,7 +825,7 @@ class X86AsmParser : public MCTargetAsmParser { bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); unsigned IdentifyIntelInlineAsmOperator(StringRef Name); unsigned ParseIntelInlineAsmOperator(unsigned OpKind); - std::unique_ptr ParseRoundingModeOp(SMLoc Start, SMLoc End); + std::unique_ptr ParseRoundingModeOp(SMLoc Start); bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM); void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, SMLoc End); @@ -1098,19 +1098,31 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, EndLoc = Parser.getTok().getEndLoc(); - // If this is "db[0-7]", match it as an alias - // for dr[0-7]. - if (RegNo == 0 && Tok.getString().size() == 3 && - Tok.getString().startswith("db")) { - switch (Tok.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; + // If this is "db[0-15]", match it as an alias + // for dr[0-15]. + if (RegNo == 0 && Tok.getString().startswith("db")) { + if (Tok.getString().size() == 3) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + case '8': RegNo = X86::DR8; break; + case '9': RegNo = X86::DR9; break; + } + } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { + switch (Tok.getString()[3]) { + case '0': RegNo = X86::DR10; break; + case '1': RegNo = X86::DR11; break; + case '2': RegNo = X86::DR12; break; + case '3': RegNo = X86::DR13; break; + case '4': RegNo = X86::DR14; break; + case '5': RegNo = X86::DR15; break; + } } if (RegNo != 0) { @@ -1583,7 +1595,7 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, //ParseRoundingModeOp - Parse AVX-512 rounding mode operand std::unique_ptr -X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { +X86AsmParser::ParseRoundingModeOp(SMLoc Start) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); // Eat "{" and mark the current place. @@ -1604,6 +1616,7 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) { Parser.Lex(); // Eat the sae if (!getLexer().is(AsmToken::RCurly)) return ErrorOperand(Tok.getLoc(), "Expected } at this point"); + SMLoc End = Tok.getEndLoc(); Parser.Lex(); // Eat "}" const MCExpr *RndModeOp = MCConstantExpr::create(rndMode, Parser.getContext()); @@ -1782,7 +1795,7 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { // Rounding mode operand. if (getSTI().getFeatureBits()[X86::FeatureAVX512] && getLexer().is(AsmToken::LCurly)) - return ParseRoundingModeOp(Start, End); + return ParseRoundingModeOp(Start); // Register operand. unsigned RegNo = 0; @@ -1883,9 +1896,9 @@ std::unique_ptr X86AsmParser::ParseATTOperand() { return X86Operand::CreateImm(Val, Start, End); } case AsmToken::LCurly:{ - SMLoc Start = Parser.getTok().getLoc(), End; + SMLoc Start = Parser.getTok().getLoc(); if (getSTI().getFeatureBits()[X86::FeatureAVX512]) - return ParseRoundingModeOp(Start, End); + return ParseRoundingModeOp(Start); return ErrorOperand(Start, "Unexpected '{' in expression"); } } @@ -2363,12 +2376,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) Flags |= Prefix; + if (getLexer().is(AsmToken::EndOfStatement)) { + // We don't have real instr with the given prefix + // let's use the prefix as the instr. + // TODO: there could be several prefixes one after another + Flags = X86::IP_NO_PREFIX; + break; + } Name = Parser.getTok().getString(); Parser.Lex(); // eat the prefix - // Hack: we could have something like + // Hack: we could have something like "rep # some comment" or // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" while (Name.startswith(";") || Name.startswith("\n") || - Name.startswith("\t") || Name.startswith("/")) { + Name.startswith("#") || Name.startswith("\t") || + Name.startswith("/")) { Name = Parser.getTok().getString(); Parser.Lex(); // go to next prefix or instr } diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 43a0561e769b..b3bcf4034eda 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H #define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H +#include "InstPrinter/X86IntelInstPrinter.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmParserCommon.h" #include "llvm/ADT/STLExtras.h" @@ -77,7 +78,7 @@ struct X86Operand : public MCParsedAsmOperand { }; X86Operand(KindTy K, SMLoc Start, SMLoc End) - : Kind(K), StartLoc(Start), EndLoc(End) {} + : Kind(K), StartLoc(Start), EndLoc(End) {} StringRef getSymName() override { return SymName; } void *getOpDecl() override { return OpDecl; } @@ -95,7 +96,52 @@ struct X86Operand : public MCParsedAsmOperand { /// getOffsetOfLoc - Get the location of the offset operator. SMLoc getOffsetOfLoc() const override { return OffsetOfLoc; } - void print(raw_ostream &OS) const override {} + void print(raw_ostream &OS) const override { + + auto PrintImmValue = [&](const MCExpr *Val, const char *VName) { + if (Val->getKind() == MCExpr::Constant) { + if (auto Imm = cast(Val)->getValue()) + OS << VName << Imm; + } else if (Val->getKind() == MCExpr::SymbolRef) { + if (auto *SRE = dyn_cast(Val)) { + const MCSymbol &Sym = SRE->getSymbol(); + if (auto SymName = Sym.getName().data()) + OS << VName << SymName; + } + } + }; + + switch (Kind) { + case Token: + OS << Tok.Data; + break; + case Register: + OS << "Reg:" << X86IntelInstPrinter::getRegisterName(Reg.RegNo); + break; + case Immediate: + PrintImmValue(Imm.Val, "Imm:"); + break; + case Prefix: + OS << "Prefix:" << Pref.Prefixes; + break; + case Memory: + OS << "Memory: ModeSize=" << Mem.ModeSize; + if (Mem.Size) + OS << ",Size=" << Mem.Size; + if (Mem.BaseReg) + OS << ",BaseReg=" << X86IntelInstPrinter::getRegisterName(Mem.BaseReg); + if (Mem.IndexReg) + OS << ",IndexReg=" + << X86IntelInstPrinter::getRegisterName(Mem.IndexReg); + if (Mem.Scale) + OS << ",Scale=" << Mem.Scale; + if (Mem.Disp) + PrintImmValue(Mem.Disp, ",Disp="); + if (Mem.SegReg) + OS << ",SegReg=" << X86IntelInstPrinter::getRegisterName(Mem.SegReg); + break; + } + } StringRef getToken() const { assert(Kind == Token && "Invalid access!"); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 7e0df2941467..ed79f4fec4e4 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -23,6 +23,7 @@ add_public_tablegen_target(X86CommonTableGen) set(sources X86AsmPrinter.cpp X86CallFrameOptimization.cpp + X86CallingConv.cpp X86CallLowering.cpp X86CmovConversion.cpp X86DomainReassignment.cpp @@ -36,6 +37,7 @@ set(sources X86InstructionSelector.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp + X86IndirectBranchTracking.cpp X86InterleavedAccess.cpp X86InstrFMA3Info.cpp X86InstrInfo.cpp @@ -48,6 +50,7 @@ set(sources X86PadShortFunction.cpp X86RegisterBankInfo.cpp X86RegisterInfo.cpp + X86RetpolineThunks.cpp X86SelectionDAGInfo.cpp X86ShuffleDecodeConstantPool.cpp X86Subtarget.cpp @@ -57,7 +60,6 @@ set(sources X86VZeroUpper.cpp X86WinAllocaExpander.cpp X86WinEHState.cpp - X86CallingConv.cpp ) add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 2890fd6156e1..a46f22ff40f5 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -583,12 +583,12 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSLLDQYri: case X86::VPSLLDQZ128rr: case X86::VPSLLDQZ256rr: - case X86::VPSLLDQZ512rr: + case X86::VPSLLDQZrr: Src1Name = getRegName(MI->getOperand(1).getReg()); LLVM_FALLTHROUGH; case X86::VPSLLDQZ128rm: case X86::VPSLLDQZ256rm: - case X86::VPSLLDQZ512rm: + case X86::VPSLLDQZrm: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), @@ -601,12 +601,12 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSRLDQYri: case X86::VPSRLDQZ128rr: case X86::VPSRLDQZ256rr: - case X86::VPSRLDQZ512rr: + case X86::VPSRLDQZrr: Src1Name = getRegName(MI->getOperand(1).getReg()); LLVM_FALLTHROUGH; case X86::VPSRLDQZ128rm: case X86::VPSRLDQZ256rm: - case X86::VPSRLDQZ512rm: + case X86::VPSRLDQZrm: DestName = getRegName(MI->getOperand(0).getReg()); if (MI->getOperand(NumOperands - 1).isImm()) DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0), diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h index c6d0d85a7d3d..629c02c95c7f 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.h +++ b/lib/Target/X86/InstPrinter/X86InstComments.h @@ -15,10 +15,13 @@ #ifndef LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H #define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H +#include "llvm/CodeGen/MachineInstr.h" + namespace llvm { enum AsmComments { - AC_EVEX_2_VEX = 0x2 // For instr that was compressed from EVEX to VEX. + // For instr that was compressed from EVEX to VEX. + AC_EVEX_2_VEX = MachineInstr::TAsmComments }; class MCInst; diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 580570ce29cb..3e68120041c0 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -20,12 +20,9 @@ #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -70,19 +67,10 @@ class X86ELFObjectWriter : public MCELFObjectTargetWriter { }; class X86AsmBackend : public MCAsmBackend { - const StringRef CPU; - bool HasNopl; - const uint64_t MaxNopLength; + const MCSubtargetInfo &STI; public: - X86AsmBackend(const Target &T, StringRef CPU) - : MCAsmBackend(), CPU(CPU), - MaxNopLength((CPU == "slm" || CPU == "silvermont") ? 7 : 15) { - HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && - CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && - CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && - CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" && - CPU != "c3" && CPU != "c3-2" && CPU != "lakemont"; - } + X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) + : MCAsmBackend(), STI(STI) {} unsigned getNumFixupKinds() const override { return X86::NumTargetFixupKinds; @@ -349,14 +337,15 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { }; // This CPU doesn't support long nops. If needed add more. - // FIXME: Can we get this from the subtarget somehow? // FIXME: We could generated something better than plain 0x90. - if (!HasNopl) { + if (!STI.getFeatureBits()[X86::FeatureNOPL]) { for (uint64_t i = 0; i < Count; ++i) OW->write8(0x90); return true; } + uint64_t MaxNopLength = STI.getFeatureBits()[X86::ProcIntelSLM] ? 7 : 15; + // 15 is the longest single nop instruction. Emit as many 15-byte nops as // needed, then emit a nop of the remaining length. do { @@ -380,14 +369,15 @@ namespace { class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; - ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : X86AsmBackend(T, CPU), OSABI(OSABI) {} + ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI), OSABI(OSABI) {} }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -397,8 +387,9 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { class ELFX86_X32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -409,8 +400,9 @@ class ELFX86_X32AsmBackend : public ELFX86AsmBackend { class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { public: - ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -421,8 +413,9 @@ class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, + const MCSubtargetInfo &STI) + : ELFX86AsmBackend(T, OSABI, STI) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -434,8 +427,9 @@ class WindowsX86AsmBackend : public X86AsmBackend { bool Is64Bit; public: - WindowsX86AsmBackend(const Target &T, bool is64Bit, StringRef CPU) - : X86AsmBackend(T, CPU) + WindowsX86AsmBackend(const Target &T, bool is64Bit, + const MCSubtargetInfo &STI) + : X86AsmBackend(T, STI) , Is64Bit(is64Bit) { } @@ -793,9 +787,9 @@ class DarwinX86AsmBackend : public X86AsmBackend { } public: - DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU, - bool Is64Bit) - : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) { + DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, bool Is64Bit) + : X86AsmBackend(T, STI), MRI(MRI), Is64Bit(Is64Bit) { memset(SavedRegs, 0, sizeof(SavedRegs)); OffsetSize = Is64Bit ? 8 : 4; MoveInstrSize = Is64Bit ? 3 : 2; @@ -806,8 +800,8 @@ class DarwinX86AsmBackend : public X86AsmBackend { class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU) - : DarwinX86AsmBackend(T, MRI, CPU, false) {} + const MCSubtargetInfo &STI) + : DarwinX86AsmBackend(T, MRI, STI, false) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -827,8 +821,8 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { const MachO::CPUSubTypeX86 Subtype; public: DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} + const MCSubtargetInfo &STI, MachO::CPUSubTypeX86 st) + : DarwinX86AsmBackend(T, MRI, STI, true), Subtype(st) {} std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override { @@ -846,43 +840,43 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { } // end anonymous namespace MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TheTriple, - StringRef CPU, const MCTargetOptions &Options) { + const Triple &TheTriple = STI.getTargetTriple(); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, CPU); + return new DarwinX86_32AsmBackend(T, MRI, STI); if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, false, CPU); + return new WindowsX86AsmBackend(T, false, STI); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.isOSIAMCU()) - return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU); + return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); - return new ELFX86_32AsmBackend(T, OSABI, CPU); + return new ELFX86_32AsmBackend(T, OSABI, STI); } MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TheTriple, - StringRef CPU, const MCTargetOptions &Options) { + const Triple &TheTriple = STI.getTargetTriple(); if (TheTriple.isOSBinFormatMachO()) { MachO::CPUSubTypeX86 CS = StringSwitch(TheTriple.getArchName()) .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, CPU, CS); + return new DarwinX86_64AsmBackend(T, MRI, STI, CS); } if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) - return new WindowsX86AsmBackend(T, true, CPU); + return new WindowsX86AsmBackend(T, true, STI); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); if (TheTriple.getEnvironment() == Triple::GNUX32) - return new ELFX86_X32AsmBackend(T, OSABI, CPU); - return new ELFX86_64AsmBackend(T, OSABI, CPU); + return new ELFX86_X32AsmBackend(T, OSABI, STI); + return new ELFX86_64AsmBackend(T, OSABI, STI); } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index f65ba1b60052..07cc488d047e 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -59,7 +59,9 @@ namespace X86 { IP_HAS_AD_SIZE = 2, IP_HAS_REPEAT_NE = 4, IP_HAS_REPEAT = 8, - IP_HAS_LOCK = 16 + IP_HAS_LOCK = 16, + NO_SCHED_INFO = 32 // Don't add sched comment to the current instr because + // it was already added }; } // end namespace X86; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 1538a515f419..fa7c352a1b63 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -13,10 +13,7 @@ #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -27,11 +24,11 @@ enum AsmWriterFlavorTy { ATT = 0, Intel = 1 }; -static cl::opt -AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), - cl::desc("Choose style of code to emit from X86 backend:"), - cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"), - clEnumValN(Intel, "intel", "Emit Intel-style assembly"))); +static cl::opt AsmWriterFlavor( + "x86-asm-syntax", cl::init(ATT), cl::Hidden, + cl::desc("Choose style of code to emit from X86 backend:"), + cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"), + clEnumValN(Intel, "intel", "Emit Intel-style assembly"))); static cl::opt MarkedJTDataRegions("mark-data-regions", cl::init(true), diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 272c6f230145..a7059c6914df 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1130,6 +1130,8 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0x40 | REX, CurByte, OS); Ret = true; } + } else { + assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode."); } // 0x0F escape code must be emitted just before the opcode. diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index c5859b600ad2..d758c0588cb1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -70,11 +70,13 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); -MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createX86_32AsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); -MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, +MCAsmBackend *createX86_64AsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, const MCTargetOptions &Options); /// Implements X86-only directives for assembly emission. diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index e6896e805568..73cf27692447 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -145,15 +145,15 @@ This is the llvm code after instruction scheduling: cond_next140 (0xa910740, LLVM BB @0xa90beb0): %reg1078 = MOV32ri -3 - %reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0 - %reg1037 = MOV32rm %reg1024, 1, %NOREG, 40 + %reg1079 = ADD32rm %reg1078, %reg1068, 1, %noreg, 0 + %reg1037 = MOV32rm %reg1024, 1, %noreg, 40 %reg1080 = IMUL32rr %reg1079, %reg1037 - %reg1081 = MOV32rm %reg1058, 1, %NOREG, 0 + %reg1081 = MOV32rm %reg1058, 1, %noreg, 0 %reg1038 = LEA32r %reg1081, 1, %reg1080, -3 - %reg1036 = MOV32rm %reg1024, 1, %NOREG, 32 + %reg1036 = MOV32rm %reg1024, 1, %noreg, 32 %reg1082 = SHL32ri %reg1038, 4 %reg1039 = ADD32rr %reg1036, %reg1082 - %reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0 + %reg1083 = MOVAPSrm %reg1059, 1, %noreg, 0 %reg1034 = SHUFPSrr %reg1083, %reg1083, 170 %reg1032 = SHUFPSrr %reg1083, %reg1083, 0 %reg1035 = SHUFPSrr %reg1083, %reg1083, 255 @@ -166,32 +166,32 @@ cond_next140 (0xa910740, LLVM BB @0xa90beb0): Still ok. After register allocation: cond_next140 (0xa910740, LLVM BB @0xa90beb0): - %EAX = MOV32ri -3 - %EDX = MOV32rm , 1, %NOREG, 0 - ADD32rm %EAX, %EDX, 1, %NOREG, 0 - %EDX = MOV32rm , 1, %NOREG, 0 - %EDX = MOV32rm %EDX, 1, %NOREG, 40 - IMUL32rr %EAX, %EDX - %ESI = MOV32rm , 1, %NOREG, 0 - %ESI = MOV32rm %ESI, 1, %NOREG, 0 - MOV32mr , 1, %NOREG, 0, %ESI - %EAX = LEA32r %ESI, 1, %EAX, -3 - %ESI = MOV32rm , 1, %NOREG, 0 - %ESI = MOV32rm %ESI, 1, %NOREG, 32 - %EDI = MOV32rr %EAX - SHL32ri %EDI, 4 - ADD32rr %EDI, %ESI - %XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0 - %XMM1 = MOVAPSrr %XMM0 - SHUFPSrr %XMM1, %XMM1, 170 - %XMM2 = MOVAPSrr %XMM0 - SHUFPSrr %XMM2, %XMM2, 0 - %XMM3 = MOVAPSrr %XMM0 - SHUFPSrr %XMM3, %XMM3, 255 - SHUFPSrr %XMM0, %XMM0, 85 - %EBX = MOV32rr %EDI - AND32ri8 %EBX, 15 - CMP32ri8 %EBX, 0 + %eax = MOV32ri -3 + %edx = MOV32rm %stack.3, 1, %noreg, 0 + ADD32rm %eax, %edx, 1, %noreg, 0 + %edx = MOV32rm %stack.7, 1, %noreg, 0 + %edx = MOV32rm %edx, 1, %noreg, 40 + IMUL32rr %eax, %edx + %esi = MOV32rm %stack.5, 1, %noreg, 0 + %esi = MOV32rm %esi, 1, %noreg, 0 + MOV32mr %stack.4, 1, %noreg, 0, %esi + %eax = LEA32r %esi, 1, %eax, -3 + %esi = MOV32rm %stack.7, 1, %noreg, 0 + %esi = MOV32rm %esi, 1, %noreg, 32 + %edi = MOV32rr %eax + SHL32ri %edi, 4 + ADD32rr %edi, %esi + %xmm0 = MOVAPSrm %ecx, 1, %noreg, 0 + %xmm1 = MOVAPSrr %xmm0 + SHUFPSrr %xmm1, %xmm1, 170 + %xmm2 = MOVAPSrr %xmm0 + SHUFPSrr %xmm2, %xmm2, 0 + %xmm3 = MOVAPSrr %xmm0 + SHUFPSrr %xmm3, %xmm3, 255 + SHUFPSrr %xmm0, %xmm0, 85 + %ebx = MOV32rr %edi + AND32ri8 %ebx, 15 + CMP32ri8 %ebx, 0 JE mbb This looks really bad. The problem is shufps is a destructive opcode. Since it diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index 09626e13849d..a3ea4595ac1e 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -103,20 +103,20 @@ LBB1_3: ## bb Before regalloc, we have: - %reg1025 = IMUL32rri8 %reg1024, 45, %EFLAGS + %reg1025 = IMUL32rri8 %reg1024, 45, implicit-def %eflags JMP mbb Successors according to CFG: 0x203afb0 (#3) bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: Predecessors according to CFG: 0x203aec0 (#0) - %reg1026 = IMUL32rri8 %reg1024, 78, %EFLAGS + %reg1026 = IMUL32rri8 %reg1024, 78, implicit-def %eflags Successors according to CFG: 0x203afb0 (#3) bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) - %reg1027 = PHI %reg1025, mbb, + %reg1027 = PHI %reg1025, mbb, %reg1026, mbb - %reg1029 = MOVZX64rr32 %reg1027 + %reg1029 = MOVZX64rr32 %reg1027 so we'd have to know that IMUL32rri8 leaves the high word zero extended and to be able to recognize the zero extend. This could also presumably be implemented diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 799157c926e6..11652af9f1fc 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -987,11 +987,11 @@ bb7: ; preds = %entry to: foo: # @foo -# BB#0: # %entry +# %bb.0: # %entry movl 4(%esp), %ecx cmpb $0, 16(%esp) je .LBB0_2 -# BB#1: # %bb +# %bb.1: # %bb movl 8(%esp), %eax addl %ecx, %eax ret @@ -1073,7 +1073,7 @@ declare void @exit(i32) noreturn nounwind This compiles into: _abort_gzip: ## @abort_gzip -## BB#0: ## %entry +## %bb.0: ## %entry subl $12, %esp movb _in_exit.4870.b, %al cmpb $1, %al @@ -1396,7 +1396,7 @@ define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize { } bar: # @bar -# BB#0: +# %bb.0: movb (%rdi), %al andb $1, %al movzbl %al, %eax @@ -1633,7 +1633,7 @@ In the real code, we get a lot more wrong than this. However, even in this code we generate: _foo: ## @foo -## BB#0: ## %entry +## %bb.0: ## %entry movb (%rsi), %al movb (%rdi), %cl cmpb %al, %cl @@ -1646,12 +1646,12 @@ LBB0_2: ## %if.end movb 1(%rdi), %cl cmpb %al, %cl jne LBB0_1 -## BB#3: ## %if.end38 +## %bb.3: ## %if.end38 movb 2(%rsi), %al movb 2(%rdi), %cl cmpb %al, %cl jne LBB0_1 -## BB#4: ## %if.end60 +## %bb.4: ## %if.end60 movb 3(%rdi), %al cmpb 3(%rsi), %al LBB0_5: ## %if.end60 diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 5631648d2dc8..ba3f74f7a7a3 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -22,6 +22,7 @@ namespace llvm { class FunctionPass; class ImmutablePass; class InstructionSelector; +class ModulePass; class PassRegistry; class X86RegisterBankInfo; class X86Subtarget; @@ -49,6 +50,10 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// transition penalty between functions encoded with AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); +/// This pass inserts ENDBR instructions before indirect jump/call +/// destinations as part of CET IBT mechanism. +FunctionPass *createX86IndirectBranchTrackingPass(); + /// Return a pass that pads short functions with NOOPs. /// This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); @@ -102,6 +107,9 @@ void initializeFixupBWInstPassPass(PassRegistry &); /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); +/// This pass creates the thunks for the retpoline feature. +ModulePass *createX86RetpolineThunksPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index f1e57091b0df..3304440325db 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -34,6 +34,9 @@ def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true", def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; +def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", + "Enable NOPL instruction">; + def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; @@ -137,7 +140,7 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; -def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1", +def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", "true", "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", @@ -246,6 +249,8 @@ def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; +def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true", + "Support RDPID instructions">; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. @@ -263,6 +268,18 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; +def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", + "HasPOPCNTFalseDeps", "true", + "POPCNT has a false dependency on dest register">; +def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", + "HasLZCNTFalseDeps", "true", + "LZCNT/TZCNT have a false dependency on dest register">; +// On recent X86 (port bound) processors, its preferable to combine to a single shuffle +// using a variable mask over multiple fixed shuffles. +def FeatureFastVariableShuffle + : SubtargetFeature<"fast-variable-shuffle", + "HasFastVariableShuffle", + "true", "Shuffles with variable masks are fast">; // On some X86 processors, there is no performance hazard to writing only the // lower parts of a YMM or ZMM register without clearing the upper part. def FeatureFastPartialYMMorZMMWrite @@ -323,11 +340,60 @@ def FeatureHasFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", "Indicates if gather is reasonably fast.">; +def FeaturePrefer256Bit + : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", + "Prefer 256-bit AVX instructions">; + +// Enable mitigation of some aspects of speculative execution related +// vulnerabilities by removing speculatable indirect branches. This disables +// jump-table formation, rewrites explicit `indirectbr` instructions into +// `switch` instructions, and uses a special construct called a "retpoline" to +// prevent speculation of the remaining indirect branches (indirect calls and +// tail calls). +def FeatureRetpoline + : SubtargetFeature<"retpoline", "UseRetpoline", "true", + "Remove speculation of indirect branches from the " + "generated code, either by avoiding them entirely or " + "lowering them with a speculation blocking construct.">; + +// Rely on external thunks for the emitted retpoline calls. This allows users +// to provide their own custom thunk definitions in highly specialized +// environments such as a kernel that does boot-time hot patching. +def FeatureRetpolineExternalThunk + : SubtargetFeature< + "retpoline-external-thunk", "UseRetpolineExternalThunk", "true", + "Enable retpoline, but with an externally provided thunk.", + [FeatureRetpoline]>; + //===----------------------------------------------------------------------===// -// X86 processors supported. +// Register File Description +//===----------------------------------------------------------------------===// + +include "X86RegisterInfo.td" +include "X86RegisterBanks.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions //===----------------------------------------------------------------------===// include "X86Schedule.td" +include "X86InstrInfo.td" + +def X86InstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// X86 processors supported. +//===----------------------------------------------------------------------===// + +include "X86ScheduleAtom.td" +include "X86SchedSandyBridge.td" +include "X86SchedHaswell.td" +include "X86SchedBroadwell.td" +include "X86ScheduleSLM.td" +include "X86ScheduleZnver1.td" +include "X86ScheduleBtVer2.td" +include "X86SchedSkylakeClient.td" +include "X86SchedSkylakeServer.td" def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; @@ -360,16 +426,16 @@ def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>; def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; -foreach P = ["i686", "pentiumpro"] in { - def : Proc; -} +def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>; +def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, + FeatureNOPL]>; def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureCMOV, FeatureFXSR]>; + FeatureCMOV, FeatureFXSR, FeatureNOPL]>; foreach P = ["pentium3", "pentium3m"] in { def : Proc; + FeatureFXSR, FeatureNOPL]>; } // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. @@ -384,12 +450,12 @@ foreach P = ["pentium3", "pentium3m"] in { def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR]>; + FeatureSSE2, FeatureFXSR, FeatureNOPL]>; foreach P = ["pentium4", "pentium4m"] in { def : ProcessorModel; + FeatureSSE2, FeatureFXSR, FeatureNOPL]>; } // Intel Quark. @@ -398,18 +464,19 @@ def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR]>; + FeatureFXSR, FeatureNOPL]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR]>; + FeatureFXSR, FeatureNOPL]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B ]>; @@ -420,6 +487,7 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureMMX, FeatureSSSE3, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -430,6 +498,7 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureMMX, FeatureSSE41, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureMacroFusion @@ -443,6 +512,7 @@ class BonnellProc : ProcessorModel : ProcessorModel : ProcessorModel : ProcessorModel : ProcessorModel : ProcModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. @@ -584,7 +660,8 @@ def IVBFeatures : ProcessorFeatures : ProcModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. @@ -596,23 +673,29 @@ def HSWFeatures : ProcessorFeatures; class HaswellProc : ProcModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. def BDWFeatures : ProcessorFeatures; class BroadwellProc : ProcModel; def : BroadwellProc<"broadwell">; @@ -629,7 +712,8 @@ def SKLFeatures : ProcessorFeatures : ProcModel; def : SkylakeClientProc<"skylake">; @@ -645,7 +729,8 @@ def KNLFeatures : ProcessorFeatures; // FIXME: define KNL model @@ -706,7 +791,9 @@ def ICLFeatures : ProcessorFeatures; class IcelakeProc : ProcModel; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; + def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc; + Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { def : Proc; + FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { def : Proc; + FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>; } foreach P = ["amdfam10", "barcelona"] in { def : Proc; } @@ -754,6 +842,7 @@ def : Proc<"btver1", [ FeatureSSSE3, FeatureSSE4A, FeatureFXSR, + FeatureNOPL, FeatureCMPXCHG16B, FeaturePRFCHW, FeatureLZCNT, @@ -768,6 +857,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureCMPXCHG16B, FeaturePRFCHW, @@ -798,6 +888,7 @@ def : Proc<"bdver1", [ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureLZCNT, FeaturePOPCNT, @@ -819,6 +910,7 @@ def : Proc<"bdver2", [ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -845,6 +937,7 @@ def : Proc<"bdver3", [ FeatureMMX, FeatureAVX, FeatureFXSR, + FeatureNOPL, FeatureSSE4A, FeatureF16C, FeatureLZCNT, @@ -867,6 +960,7 @@ def : Proc<"bdver4", [ FeatureMMX, FeatureAVX2, FeatureFXSR, + FeatureNOPL, FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, @@ -904,6 +998,7 @@ def: ProcessorModel<"znver1", Znver1Model, [ FeatureFMA, FeatureFSGSBase, FeatureFXSR, + FeatureNOPL, FeatureFastLZCNT, FeatureLAHFSAHF, FeatureLZCNT, @@ -948,27 +1043,13 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureMMX, FeatureSSE2, FeatureFXSR, + FeatureNOPL, Feature64Bit, FeatureSlow3OpsLEA, FeatureSlowIncDec, FeatureMacroFusion ]>; -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "X86RegisterInfo.td" -include "X86RegisterBanks.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "X86InstrInfo.td" - -def X86InstrInfo : InstrInfo; - //===----------------------------------------------------------------------===// // Calling Conventions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 1c938d9c8423..4da7d59df465 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -23,12 +23,10 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -65,7 +63,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { - bool Local = MF.getFunction()->hasLocalLinkage(); + bool Local = MF.getFunction().hasLocalLinkage(); OutStreamer->BeginCOFFSymbolDef(CurrentFnSym); OutStreamer->EmitCOFFSymbolStorageClass( Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); @@ -648,27 +646,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (TT.isOSBinFormatCOFF()) { - const TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast(getObjFileLowering()); - - std::string Flags; - raw_string_ostream FlagsOS(Flags); - - for (const auto &Function : M) - TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function); - for (const auto &Global : M.globals()) - TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global); - for (const auto &Alias : M.aliases()) - TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias); - - FlagsOS.flush(); - - // Output collected flags. - if (!Flags.empty()) { - OutStreamer->SwitchSection(TLOFCOFF.getDrectveSection()); - OutStreamer->EmitBytes(Flags); - } - SM.serializeToStackMapSection(); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 08d773451793..31328e6aea95 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -32,6 +32,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { FaultMaps FM; std::unique_ptr CodeEmitter; bool EmitFPOData = false; + bool NeedsRetpoline = false; // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow @@ -97,10 +98,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); - // Helper function that emits the XRay sleds we've collected for a particular - // function. - void EmitXRayTable(); - // Choose between emitting .seh_ directives and .cv_fpo_ directives. void EmitSEHInstruction(const MachineInstr *MI); diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index b4202799ae75..522dc7926b94 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -148,7 +148,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // is a danger of that being generated. if (STI->isTargetDarwin() && (!MF.getLandingPads().empty() || - (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF)))) + (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF)))) return false; // It is not valid to change the stack pointer outside the prolog/epilog @@ -243,7 +243,7 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size"); Log2SlotSize = Log2_32(SlotSize); - if (skipFunction(*MF.getFunction()) || !isLegal(MF)) + if (skipFunction(MF.getFunction()) || !isLegal(MF)) return false; unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 3e1f3400b461..ccb982f9ac16 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -177,7 +177,7 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = MF.getDataLayout(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); ArgInfo OrigArg{VReg, Val->getType()}; setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); @@ -334,7 +334,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const ArgInfo &OrigRet, ArrayRef OrigArgs) const { MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); + const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); const X86Subtarget &STI = MF.getSubtarget(); diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 2de9a5fbfe92..5d806fe60b86 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -500,7 +500,7 @@ def CC_X86_64_C : CallingConv<[ // A SwiftError is passed in R12. CCIfSwiftError>>, - // For Swift Calling Convention, pass sret in %RAX. + // For Swift Calling Convention, pass sret in %rax. CCIfCC<"CallingConv::Swift", CCIfSRet>>>, diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index a4bb98956ead..489d9d86e254 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -164,7 +164,7 @@ void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { } bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; if (!EnableCmovConverter) return false; diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp index f205d3ebfbf7..ba7280c29cc9 100644 --- a/lib/Target/X86/X86DomainReassignment.cpp +++ b/lib/Target/X86/X86DomainReassignment.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -27,6 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include using namespace llvm; @@ -43,7 +43,7 @@ static cl::opt DisableX86DomainReassignment( cl::desc("X86: Disable Virtual Register Reassignment."), cl::init(false)); namespace { -enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain }; +enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains }; static bool isGPR(const TargetRegisterClass *RC) { return X86::GR64RegClass.hasSubClassEq(RC) || @@ -70,13 +70,13 @@ static RegDomain getDomain(const TargetRegisterClass *RC, static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, RegDomain Domain) { assert(Domain == MaskDomain && "add domain"); - if (SrcRC == &X86::GR8RegClass) + if (X86::GR8RegClass.hasSubClassEq(SrcRC)) return &X86::VK8RegClass; - if (SrcRC == &X86::GR16RegClass) + if (X86::GR16RegClass.hasSubClassEq(SrcRC)) return &X86::VK16RegClass; - if (SrcRC == &X86::GR32RegClass) + if (X86::GR32RegClass.hasSubClassEq(SrcRC)) return &X86::VK32RegClass; - if (SrcRC == &X86::GR64RegClass) + if (X86::GR64RegClass.hasSubClassEq(SrcRC)) return &X86::VK64RegClass; llvm_unreachable("add register class"); return nullptr; @@ -301,75 +301,65 @@ typedef DenseMap /// different closure that manipulates the loaded or stored value. class Closure { private: - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - /// Virtual registers in the closure. DenseSet Edges; /// Instructions in the closure. SmallVector Instrs; - /// A map of available Instruction Converters. - const InstrConverterBaseMap &Converters; - - /// The register domain of this closure. - RegDomain Domain; - /// Domains which this closure can legally be reassigned to. - SmallVector LegalDstDomains; + std::bitset LegalDstDomains; - SmallVector getLegalDstDomains() const { - return LegalDstDomains; +public: + Closure(std::initializer_list LegalDstDomainList) { + for (RegDomain D : LegalDstDomainList) + LegalDstDomains.set(D); } - /// Enqueue \p Reg to be considered for addition to the closure. - void visitRegister(unsigned Reg, SmallVectorImpl &Worklist); + /// Mark this closure as illegal for reassignment to all domains. + void setAllIllegal() { LegalDstDomains.reset(); } - /// Add \p MI to this closure. - void encloseInstr(MachineInstr *MI); + /// \returns true if this closure has domains which are legal to reassign to. + bool hasLegalDstDomain() const { return LegalDstDomains.any(); } - /// Calculate the total cost of reassigning the closure to \p Domain. - double calculateCost(RegDomain Domain) const; + /// \returns true if is legal to reassign this closure to domain \p RD. + bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; } - /// All edges that are included in some closure. - DenseSet &EnclosedEdges; + /// Mark this closure as illegal for reassignment to domain \p RD. + void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; } - /// All instructions that are included in some closure. - DenseMap &EnclosedInstrs; + bool empty() const { return Edges.empty(); } -public: - Closure(const TargetInstrInfo *TII, MachineRegisterInfo *MRI, - const InstrConverterBaseMap &Converters, - const SmallVector &LegalDstDomains, - DenseSet &EnclosedEdges, - DenseMap &EnclosedInstrs) - : TII(TII), MRI(MRI), Converters(Converters), Domain(NoDomain), - LegalDstDomains(LegalDstDomains), EnclosedEdges(EnclosedEdges), - EnclosedInstrs(EnclosedInstrs) {} + bool insertEdge(unsigned Reg) { + return Edges.insert(Reg).second; + } - /// Starting from \Reg, expand the closure as much as possible. - void buildClosure(unsigned E); + using const_edge_iterator = DenseSet::const_iterator; + iterator_range edges() const { + return iterator_range(Edges.begin(), Edges.end()); + } - /// /returns true if it is profitable to reassign the closure to \p Domain. - bool isReassignmentProfitable(RegDomain Domain) const; + void addInstruction(MachineInstr *I) { + Instrs.push_back(I); + } - /// Reassign the closure to \p Domain. - void Reassign(RegDomain Domain) const; + ArrayRef instructions() const { + return Instrs; + } - /// Mark this closure as illegal for reassignment to all domains. - void setAllIllegal() { LegalDstDomains.clear(); } +}; - /// \returns true if this closure has domains which are legal to reassign to. - bool hasLegalDstDomain() const { return !LegalDstDomains.empty(); } +class X86DomainReassignment : public MachineFunctionPass { + const X86Subtarget *STI; + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; - /// \returns true if is legal to reassign this closure to domain \p RD. - bool isLegal(RegDomain RD) const { return is_contained(LegalDstDomains, RD); } + /// All edges that are included in some closure + DenseSet EnclosedEdges; - bool empty() const { return Edges.empty(); } -}; + /// All instructions that are included in some closure. + DenseMap EnclosedInstrs; -class X86DomainReassignment : public MachineFunctionPass { public: static char ID; @@ -389,22 +379,39 @@ class X86DomainReassignment : public MachineFunctionPass { } private: - const X86Subtarget *STI; - MachineRegisterInfo *MRI; - const X86InstrInfo *TII; - /// A map of available Instruction Converters. InstrConverterBaseMap Converters; /// Initialize Converters map. void initConverters(); + + /// Starting from \Reg, expand the closure as much as possible. + void buildClosure(Closure &, unsigned Reg); + + /// Enqueue \p Reg to be considered for addition to the closure. + void visitRegister(Closure &, unsigned Reg, RegDomain &Domain, + SmallVectorImpl &Worklist); + + /// Reassign the closure to \p Domain. + void reassign(const Closure &C, RegDomain Domain) const; + + /// Add \p MI to the closure. + void encloseInstr(Closure &C, MachineInstr *MI); + + /// /returns true if it is profitable to reassign the closure to \p Domain. + bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const; + + /// Calculate the total cost of reassigning the closure to \p Domain. + double calculateCost(const Closure &C, RegDomain Domain) const; }; char X86DomainReassignment::ID = 0; } // End anonymous namespace. -void Closure::visitRegister(unsigned Reg, SmallVectorImpl &Worklist) { +void X86DomainReassignment::visitRegister(Closure &C, unsigned Reg, + RegDomain &Domain, + SmallVectorImpl &Worklist) { if (EnclosedEdges.count(Reg)) return; @@ -425,56 +432,61 @@ void Closure::visitRegister(unsigned Reg, SmallVectorImpl &Worklist) { Worklist.push_back(Reg); } -void Closure::encloseInstr(MachineInstr *MI) { +void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) { auto I = EnclosedInstrs.find(MI); if (I != EnclosedInstrs.end()) { - if (I->second != this) + if (I->second != &C) // Instruction already belongs to another closure, avoid conflicts between // closure and mark this closure as illegal. - setAllIllegal(); + C.setAllIllegal(); return; } - EnclosedInstrs[MI] = this; - Instrs.push_back(MI); + EnclosedInstrs[MI] = &C; + C.addInstruction(MI); // Mark closure as illegal for reassignment to domains, if there is no // converter for the instruction or if the converter cannot convert the // instruction. - erase_if(LegalDstDomains, [&](RegDomain D) { - InstrConverterBase *IC = Converters.lookup({D, MI->getOpcode()}); - return !IC || !IC->isLegal(MI, TII); - }); + for (int i = 0; i != NumDomains; ++i) { + if (C.isLegal((RegDomain)i)) { + InstrConverterBase *IC = Converters.lookup({i, MI->getOpcode()}); + if (!IC || !IC->isLegal(MI, TII)) + C.setIllegal((RegDomain)i); + } + } } -double Closure::calculateCost(RegDomain DstDomain) const { - assert(isLegal(DstDomain) && "Cannot calculate cost for illegal closure"); +double X86DomainReassignment::calculateCost(const Closure &C, + RegDomain DstDomain) const { + assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure"); double Cost = 0.0; - for (auto MI : Instrs) + for (auto *MI : C.instructions()) Cost += Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI); return Cost; } -bool Closure::isReassignmentProfitable(RegDomain Domain) const { - return calculateCost(Domain) < 0.0; +bool X86DomainReassignment::isReassignmentProfitable(const Closure &C, + RegDomain Domain) const { + return calculateCost(C, Domain) < 0.0; } -void Closure::Reassign(RegDomain Domain) const { - assert(isLegal(Domain) && "Cannot convert illegal closure"); +void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { + assert(C.isLegal(Domain) && "Cannot convert illegal closure"); // Iterate all instructions in the closure, convert each one using the // appropriate converter. SmallVector ToErase; - for (auto MI : Instrs) + for (auto *MI : C.instructions()) if (Converters.lookup({Domain, MI->getOpcode()}) ->convertInstr(MI, TII, MRI)) ToErase.push_back(MI); // Iterate all registers in the closure, replace them with registers in the // destination domain. - for (unsigned Reg : Edges) { + for (unsigned Reg : C.edges()) { MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain)); for (auto &MO : MRI->use_operands(Reg)) { if (MO.isReg()) @@ -511,18 +523,19 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg, return false; } -void Closure::buildClosure(unsigned Reg) { +void X86DomainReassignment::buildClosure(Closure &C, unsigned Reg) { SmallVector Worklist; - visitRegister(Reg, Worklist); + RegDomain Domain = NoDomain; + visitRegister(C, Reg, Domain, Worklist); while (!Worklist.empty()) { unsigned CurReg = Worklist.pop_back_val(); // Register already in this closure. - if (!Edges.insert(CurReg).second) + if (!C.insertEdge(CurReg)) continue; MachineInstr *DefMI = MRI->getVRegDef(CurReg); - encloseInstr(DefMI); + encloseInstr(C, DefMI); // Add register used by the defining MI to the worklist. // Do not add registers which are used in address calculation, they will be @@ -541,7 +554,7 @@ void Closure::buildClosure(unsigned Reg) { auto &Op = DefMI->getOperand(OpIdx); if (!Op.isReg() || !Op.isUse()) continue; - visitRegister(Op.getReg(), Worklist); + visitRegister(C, Op.getReg(), Domain, Worklist); } // Expand closure through register uses. @@ -549,10 +562,10 @@ void Closure::buildClosure(unsigned Reg) { // We would like to avoid converting closures which calculare addresses, // as this should remain in GPRs. if (usedAsAddr(UseMI, CurReg, TII)) { - setAllIllegal(); + C.setAllIllegal(); continue; } - encloseInstr(&UseMI); + encloseInstr(C, &UseMI); for (auto &DefOp : UseMI.defs()) { if (!DefOp.isReg()) @@ -560,10 +573,10 @@ void Closure::buildClosure(unsigned Reg) { unsigned DefReg = DefOp.getReg(); if (!TargetRegisterInfo::isVirtualRegister(DefReg)) { - setAllIllegal(); + C.setAllIllegal(); continue; } - visitRegister(DefReg, Worklist); + visitRegister(C, DefReg, Domain, Worklist); } } } @@ -679,7 +692,7 @@ void X86DomainReassignment::initConverters() { } bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; if (DisableX86DomainReassignment) return false; @@ -700,8 +713,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { initConverters(); bool Changed = false; - DenseSet EnclosedEdges; - DenseMap EnclosedInstrs; + EnclosedEdges.clear(); + EnclosedInstrs.clear(); std::vector Closures; @@ -718,9 +731,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { continue; // Calculate closure starting with Reg. - Closure C(TII, MRI, Converters, {MaskDomain}, EnclosedEdges, - EnclosedInstrs); - C.buildClosure(Reg); + Closure C({MaskDomain}); + buildClosure(C, Reg); // Collect all closures that can potentially be converted. if (!C.empty() && C.isLegal(MaskDomain)) @@ -728,8 +740,8 @@ bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { } for (Closure &C : Closures) - if (C.isReassignmentProfitable(MaskDomain)) { - C.Reassign(MaskDomain); + if (isReassignmentProfitable(C, MaskDomain)) { + reassign(C, MaskDomain); ++NumClosuresConverted; Changed = true; } diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp index 5dfd95f71301..ab2ef26d1cc9 100644 --- a/lib/Target/X86/X86ExpandPseudo.cpp +++ b/lib/Target/X86/X86ExpandPseudo.cpp @@ -222,7 +222,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case X86::EH_RESTORE: { // Restore ESP and EBP, and optionally ESI if required. bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality( - MBB.getParent()->getFunction()->getPersonalityFn())); + MBB.getParent()->getFunction().getPersonalityFn())); X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH); MBBI->eraseFromParent(); return true; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9ea7590ce3ab..faeda19f4b6f 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1976,9 +1976,9 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); - // For i8 remainder, we can't reference AH directly, as we'll end - // up with bogus copies like %R9B = COPY %AH. Reference AX - // instead to prevent AH references in a REX instruction. + // For i8 remainder, we can't reference ah directly, as we'll end + // up with bogus copies like %r9b = COPY %ah. Reference ax + // instead to prevent ah references in a rex instruction. // // The current assumption of the fast register allocator is that isel // won't generate explicit references to the GR8_NOREX registers. If @@ -2424,11 +2424,11 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { if (I->getType()->isDoubleTy()) { // sitofp int -> double - Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SDrr : X86::VCVTSI2SDrr; RC = &X86::FR64RegClass; } else if (I->getType()->isFloatTy()) { // sitofp int -> float - Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr; + Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SSrr : X86::VCVTSI2SSrr; RC = &X86::FR32RegClass; } else return false; @@ -2726,7 +2726,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -2741,7 +2741,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MSI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. @@ -3172,6 +3172,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; + // Functions using retpoline should use SDISel for calls. + if (Subtarget->useRetpoline()) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; @@ -3458,13 +3462,11 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { assert(GV && "Not a direct call"); // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); - // Ignore NonLazyBind attribute in FastISel - if (OpFlags == X86II::MO_GOTPCREL) - OpFlags = 0; // This will be a direct call, or an indirect call through memory for // NonLazyBind calls or dllimport calls. - bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT; + bool NeedLoad = + OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL; unsigned CallOpc = NeedLoad ? (Is64Bit ? X86::CALL64m : X86::CALL32m) : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp index 9664c931c35e..855ea683a8af 100644 --- a/lib/Target/X86/X86FixupBWInsts.cpp +++ b/lib/Target/X86/X86FixupBWInsts.cpp @@ -146,12 +146,12 @@ INITIALIZE_PASS(FixupBWInstPass, FIXUPBW_NAME, FIXUPBW_DESC, false, false) FunctionPass *llvm::createX86FixupBWInsts() { return new FixupBWInstPass(); } bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { - if (!FixupBWInsts || skipFunction(*MF.getFunction())) + if (!FixupBWInsts || skipFunction(MF.getFunction())) return false; this->MF = &MF; TII = MF.getSubtarget().getInstrInfo(); - OptForSize = MF.getFunction()->optForSize(); + OptForSize = MF.getFunction().optForSize(); MLI = &getAnalysis(); LiveRegs.init(TII->getRegisterInfo()); @@ -166,48 +166,75 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) { return true; } -/// Check if register \p Reg is live after the \p MI. -/// -/// \p LiveRegs should be in a state describing liveness information in -/// that exact place as this function tries to precise analysis made -/// by \p LiveRegs by exploiting the information about particular -/// instruction \p MI. \p MI is expected to be one of the MOVs handled -/// by the x86FixupBWInsts pass. -/// Note: similar to LivePhysRegs::contains this would state that -/// super-register is not used if only some part of it is used. -/// -/// X86 backend does not have subregister liveness tracking enabled, -/// so liveness information might be overly conservative. However, for -/// some specific instructions (this pass only cares about MOVs) we can -/// produce more precise results by analysing that MOV's operands. -/// -/// Indeed, if super-register is not live before the mov it means that it -/// was originally and so we are free to modify these -/// undef upper bits. That may happen in case where the use is in another MBB -/// and the vreg/physreg corresponding to the move has higher width than -/// necessary (e.g. due to register coalescing with a "truncate" copy). -/// So, it handles pattern like this: -/// -/// BB#2: derived from LLVM BB %if.then -/// Live Ins: %RDI -/// Predecessors according to CFG: BB#0 -/// %AX = MOV16rm %RDI, 1, %noreg, 0, %noreg, %EAX; mem:LD2[%p] -/// No %EAX -/// Successors according to CFG: BB#3(?%) +/// \brief Check if after \p OrigMI the only portion of super register +/// of the destination register of \p OrigMI that is alive is that +/// destination register. /// -/// BB#3: derived from LLVM BB %if.end -/// Live Ins: %EAX Only %AX is actually live -/// Predecessors according to CFG: BB#2 BB#1 -/// %AX = KILL %AX, %EAX -/// RET 0, %AX -static bool isLive(const MachineInstr &MI, - const LivePhysRegs &LiveRegs, - const TargetRegisterInfo *TRI, - unsigned Reg) { - if (!LiveRegs.contains(Reg)) +/// If so, return that super register in \p SuperDestReg. +bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, + unsigned &SuperDestReg) const { + auto *TRI = &TII->getRegisterInfo(); + + unsigned OrigDestReg = OrigMI->getOperand(0).getReg(); + SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); + + const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg); + + // Make sure that the sub-register that this instruction has as its + // destination is the lowest order sub-register of the super-register. + // If it isn't, then the register isn't really dead even if the + // super-register is considered dead. + if (SubRegIdx == X86::sub_8bit_hi) return false; - unsigned Opc = MI.getOpcode(); (void)Opc; + // If neither the destination-super register nor any applicable subregisters + // are live after this instruction, then the super register is safe to use. + if (!LiveRegs.contains(SuperDestReg)) { + // If the original destination register was not the low 8-bit subregister + // then the super register check is sufficient. + if (SubRegIdx != X86::sub_8bit) + return true; + // If the original destination register was the low 8-bit subregister and + // we also need to check the 16-bit subregister and the high 8-bit + // subregister. + if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) && + !LiveRegs.contains(getX86SubSuperRegister(SuperDestReg, 8, + /*High=*/true))) + return true; + // Otherwise, we have a little more checking to do. + } + + // If we get here, the super-register destination (or some part of it) is + // marked as live after the original instruction. + // + // The X86 backend does not have subregister liveness tracking enabled, + // so liveness information might be overly conservative. Specifically, the + // super register might be marked as live because it is implicitly defined + // by the instruction we are examining. + // + // However, for some specific instructions (this pass only cares about MOVs) + // we can produce more precise results by analysing that MOV's operands. + // + // Indeed, if super-register is not live before the mov it means that it + // was originally and so we are free to modify these + // undef upper bits. That may happen in case where the use is in another MBB + // and the vreg/physreg corresponding to the move has higher width than + // necessary (e.g. due to register coalescing with a "truncate" copy). + // So, we would like to handle patterns like this: + // + // %bb.2: derived from LLVM BB %if.then + // Live Ins: %rdi + // Predecessors according to CFG: %bb.0 + // %ax = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax + // ; No implicit %eax + // Successors according to CFG: %bb.3(?%) + // + // %bb.3: derived from LLVM BB %if.end + // Live Ins: %eax Only %ax is actually live + // Predecessors according to CFG: %bb.2 %bb.1 + // %ax = KILL %ax, implicit killed %eax + // RET 0, %ax + unsigned Opc = OrigMI->getOpcode(); (void)Opc; // These are the opcodes currently handled by the pass, if something // else will be added we need to ensure that new opcode has the same // properties. @@ -216,65 +243,28 @@ static bool isLive(const MachineInstr &MI, "Unexpected opcode."); bool IsDefined = false; - for (auto &MO: MI.implicit_operands()) { + for (auto &MO: OrigMI->implicit_operands()) { if (!MO.isReg()) continue; assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!"); - for (MCSuperRegIterator Supers(Reg, TRI, true); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(OrigDestReg, TRI, true); Supers.isValid(); + ++Supers) { if (*Supers == MO.getReg()) { if (MO.isDef()) IsDefined = true; else - return true; // SuperReg Imp-used' -> live before the MI + return false; // SuperReg Imp-used' -> live before the MI } } } // Reg is not Imp-def'ed -> it's live both before/after the instruction. if (!IsDefined) - return true; + return false; // Otherwise, the Reg is not live before the MI and the MOV can't // make it really live, so it's in fact dead even after the MI. - return false; -} - -/// \brief Check if after \p OrigMI the only portion of super register -/// of the destination register of \p OrigMI that is alive is that -/// destination register. -/// -/// If so, return that super register in \p SuperDestReg. -bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI, - unsigned &SuperDestReg) const { - auto *TRI = &TII->getRegisterInfo(); - - unsigned OrigDestReg = OrigMI->getOperand(0).getReg(); - SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32); - - const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg); - - // Make sure that the sub-register that this instruction has as its - // destination is the lowest order sub-register of the super-register. - // If it isn't, then the register isn't really dead even if the - // super-register is considered dead. - if (SubRegIdx == X86::sub_8bit_hi) - return false; - - if (isLive(*OrigMI, LiveRegs, TRI, SuperDestReg)) - return false; - - if (SubRegIdx == X86::sub_8bit) { - // In the case of byte registers, we also have to check that the upper - // byte register is also dead. That is considered to be independent of - // whether the super-register is dead. - unsigned UpperByteReg = - getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true); - - if (isLive(*OrigMI, LiveRegs, TRI, UpperByteReg)) - return false; - } - return true; } diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index bbc2bffdb703..b41bf99f19b2 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -17,10 +17,8 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -193,12 +191,12 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { - if (skipFunction(*Func.getFunction())) + if (skipFunction(Func.getFunction())) return false; MF = &Func; const X86Subtarget &ST = Func.getSubtarget(); - OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize(); + OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize(); OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA(); if (!OptLEA && !OptIncDec) diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index d43f7a154091..9a72e7114be0 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -349,7 +349,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // In regcall convention, some FP registers may not be passed through // the stack, so they will need to be assigned to the stack first - if ((Entry->getParent()->getFunction()->getCallingConv() == + if ((Entry->getParent()->getFunction().getCallingConv() == CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) { // In the register calling convention, up to one FP argument could be // saved in the first FP register. @@ -499,7 +499,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { /// setupBlockStack - Use the live bundles to set up our model of the stack /// to match predecessors' live out stack. void FPS::setupBlockStack() { - DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() + DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB) << " derived from " << MBB->getName() << ".\n"); StackTop = 0; // Get the live-in bundle for MBB. @@ -516,7 +516,7 @@ void FPS::setupBlockStack() { // Push the fixed live-in registers. for (unsigned i = Bundle.FixCount; i > 0; --i) { - DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" + DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %fp" << unsigned(Bundle.FixStack[i-1]) << '\n'); pushReg(Bundle.FixStack[i-1]); } @@ -538,7 +538,7 @@ void FPS::finishBlockStack() { if (MBB->succ_empty()) return; - DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() + DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB) << " derived from " << MBB->getName() << ".\n"); // Get MBB's live-out bundle. @@ -893,7 +893,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { while (Kills && Defs) { unsigned KReg = countTrailingZeros(Kills); unsigned DReg = countTrailingZeros(Defs); - DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); + DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg << "\n"); std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); std::swap(RegMap[KReg], RegMap[DReg]); Kills &= ~(1 << KReg); @@ -907,7 +907,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { unsigned KReg = getStackEntry(0); if (!(Kills & (1 << KReg))) break; - DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); + DEBUG(dbgs() << "Popping %fp" << KReg << "\n"); popStackAfter(I2); Kills &= ~(1 << KReg); } @@ -916,7 +916,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { // Manually kill the rest. while (Kills) { unsigned KReg = countTrailingZeros(Kills); - DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); + DEBUG(dbgs() << "Killing %fp" << KReg << "\n"); freeStackSlotBefore(I, KReg); Kills &= ~(1 << KReg); } @@ -924,7 +924,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { // Load zeros for all the imp-defs. while(Defs) { unsigned DReg = countTrailingZeros(Defs); - DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); + DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n"); BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); pushReg(DReg); Defs &= ~(1 << DReg); @@ -973,7 +973,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) { unsigned R = MO.getReg() - X86::FP0; if (R < 8) { - if (MF->getFunction()->getCallingConv() != CallingConv::X86_RegCall) { + if (MF->getFunction().getCallingConv() != CallingConv::X86_RegCall) { assert(MO.isDef() && MO.isImplicit()); } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index ead877a399ff..729bd17a71d2 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -148,8 +148,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, const X86RegisterInfo *TRI, bool Is64Bit) { const MachineFunction *MF = MBB.getParent(); - const Function *F = MF->getFunction(); - if (!F || MF->callsEHReturn()) + if (MF->callsEHReturn()) return 0; const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF); @@ -742,6 +741,11 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, bool InProlog) const { bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + // FIXME: Add retpoline support and remove this. + if (Is64Bit && IsLargeCodeModel && STI.useRetpoline()) + report_fatal_error("Emitting stack probe calls on 64-bit with the large " + "code model and retpoline not yet implemented."); + unsigned CallOp; if (Is64Bit) CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; @@ -820,7 +824,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con const MachineFrameInfo &MFI = MF.getFrameInfo(); uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment. unsigned StackAlign = getStackAlignment(); - if (MF.getFunction()->hasFnAttribute("stackrealign")) { + if (MF.getFunction().hasFnAttribute("stackrealign")) { if (MFI.hasCalls()) MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; else if (MaxAlign < SlotSize) @@ -935,28 +939,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, "MF used frame lowering for wrong subtarget"); MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); + const Function &Fn = MF.getFunction(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. bool IsFunclet = MBB.isEHFuncletEntry(); EHPersonality Personality = EHPersonality::Unknown; - if (Fn->hasPersonalityFn()) - Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (Fn.hasPersonalityFn()) + Personality = classifyEHPersonality(Fn.getPersonalityFn()); bool FnHasClrFunclet = MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; bool IsClrFunclet = IsFunclet && FnHasClrFunclet; bool HasFP = hasFP(MF); - bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); + bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv()); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsWin64CFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); + bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); // FIXME: Emit FPO data for EH funclets. bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; bool NeedsDwarfCFI = - !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + !IsWin64Prologue && (MMI.hasDebugInfo() || Fn.needsUnwindTableEntry()); unsigned FramePtr = TRI->getFrameRegister(MF); const unsigned MachineFramePtr = STI.isTarget64BitILP32() @@ -982,16 +986,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // The default stack probe size is 4096 if the function has no stackprobesize // attribute. unsigned StackProbeSize = 4096; - if (Fn->hasFnAttribute("stack-probe-size")) - Fn->getFnAttribute("stack-probe-size") + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte // stack alignment. - if (Fn->getCallingConv() == CallingConv::X86_INTR && Is64Bit && - Fn->arg_size() == 2) { + if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && + Fn.arg_size() == 2) { StackSize += 8; MFI.setStackSize(StackSize); emitSPUpdate(MBB, MBBI, -8, /*InEpilogue=*/false); @@ -1002,7 +1006,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). We also check that we don't // push and pop from the stack. - if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) && + if (Is64Bit && !Fn.hasFnAttribute(Attribute::NoRedZone) && !TRI->needsStackRealignment(MF) && !MFI.hasVarSizedObjects() && // No dynamic alloca. !MFI.adjustsStack() && // No calls. @@ -1447,7 +1451,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // 1. The interrupt handling function uses any of the "rep" instructions. // 2. Interrupt handling function calls another function. // - if (Fn->getCallingConv() == CallingConv::X86_INTR) + if (Fn.getCallingConv() == CallingConv::X86_INTR) BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) .setMIFlag(MachineInstr::FrameSetup); @@ -1508,7 +1512,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { // This is the amount of stack a funclet needs to allocate. unsigned UsedSize; EHPersonality Personality = - classifyEHPersonality(MF.getFunction()->getPersonalityFn()); + classifyEHPersonality(MF.getFunction().getPersonalityFn()); if (Personality == EHPersonality::CoreCLR) { // CLR funclets need to hold enough space to include the PSPSym, at the // same offset from the stack pointer (immediately after the prolog) as it @@ -1551,7 +1555,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsWin64CFI = - IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); + IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); // Get the number of bytes to allocate from the FrameInfo. @@ -1856,6 +1860,32 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( unsigned CalleeSavedFrameSize = 0; int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); + int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MFI.CreateFixedObject(-TailCallReturnAddrDelta, + TailCallReturnAddrDelta - SlotSize, true); + } + + // Spill the BasePtr if it's used. + if (this->TRI->hasBasePointer(MF)) { + // Allocate a spill slot for EBP if we have a base pointer and EH funclets. + if (MF.hasEHFunclets()) { + int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize); + X86FI->setHasSEHFramePtrSave(true); + X86FI->setSEHFramePtrSaveIndex(FI); + } + } + if (hasFP(MF)) { // emitPrologue always spills frame register the first thing. SpillSlotOffset -= SlotSize; @@ -1981,7 +2011,7 @@ void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, MachineInstr *CatchRet) const { // SEH shouldn't use catchret. assert(!isAsynchronousEHPersonality(classifyEHPersonality( - MBB.getParent()->getFunction()->getPersonalityFn())) && + MBB.getParent()->getFunction().getPersonalityFn())) && "SEH should not use CATCHRET"); DebugLoc DL = CatchRet->getDebugLoc(); MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB(); @@ -2021,9 +2051,9 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Don't restore CSRs before an SEH catchret. SEH except blocks do not form // funclets. emitEpilogue transforms these to normal jumps. if (MI->getOpcode() == X86::CATCHRET) { - const Function *Func = MBB.getParent()->getFunction(); + const Function &F = MBB.getParent()->getFunction(); bool IsSEH = isAsynchronousEHPersonality( - classifyEHPersonality(Func->getPersonalityFn())); + classifyEHPersonality(F.getPersonalityFn())); if (IsSEH) return true; } @@ -2061,42 +2091,15 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - MachineFrameInfo &MFI = MF.getFrameInfo(); - - X86MachineFunctionInfo *X86FI = MF.getInfo(); - int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - - if (TailCallReturnAddrDelta < 0) { - // create RETURNADDR area - // arg - // arg - // RETADDR - // { ... - // RETADDR area - // ... - // } - // [EBP] - MFI.CreateFixedObject(-TailCallReturnAddrDelta, - TailCallReturnAddrDelta - SlotSize, true); - } - // Spill the BasePtr if it's used. - if (TRI->hasBasePointer(MF)) { + if (TRI->hasBasePointer(MF)) SavedRegs.set(TRI->getBaseRegister()); - - // Allocate a spill slot for EBP if we have a base pointer and EH funclets. - if (MF.hasEHFunclets()) { - int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize); - X86FI->setHasSEHFramePtrSave(true); - X86FI->setSEHFramePtrSaveIndex(FI); - } - } } static bool HasNestArgument(const MachineFunction *MF) { - const Function *F = MF->getFunction(); - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + const Function &F = MF->getFunction(); + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; I++) { if (I->hasNestAttr()) return true; @@ -2110,7 +2113,7 @@ HasNestArgument(const MachineFunction *MF) { /// needed. Set primary to true for the first register, false for the second. static unsigned GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { - CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); // Erlang stuff. if (CallingConvention == CallingConv::HiPE) { @@ -2160,7 +2163,7 @@ void X86FrameLowering::adjustForSegmentedStacks( assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); - if (MF.getFunction()->isVarArg()) + if (MF.getFunction().isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD() && @@ -2346,6 +2349,10 @@ void X86FrameLowering::adjustForSegmentedStacks( // This solution is not perfect, as it assumes that the .rodata section // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. + // FIXME: Add retpoline support and remove the error here.. + if (STI.useRetpoline()) + report_fatal_error("Emitting morestack calls on 64-bit with the large " + "code model and retpoline not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) .addReg(X86::RIP) .addImm(0) @@ -2434,8 +2441,8 @@ void X86FrameLowering::adjustForHiPEPrologue( Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS"); const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; const unsigned Guaranteed = HipeLeafWords * SlotSize; - unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? - MF.getFunction()->arg_size() - CCRegisteredArgs : 0; + unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ? + MF.getFunction().arg_size() - CCRegisteredArgs : 0; unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize; assert(STI.isTargetLinux() && @@ -2649,10 +2656,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount = alignTo(Amount, StackAlign); MachineModuleInfo &MMI = MF.getMMI(); - const Function *Fn = MF.getFunction(); + const Function &F = MF.getFunction(); bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool DwarfCFI = !WindowsCFI && - (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + bool DwarfCFI = !WindowsCFI && + (MMI.hasDebugInfo() || F.needsUnwindTableEntry()); // If we have any exception handlers in this function, and we adjust // the SP before calls, we may need to indicate this to the unwinder @@ -2694,7 +2701,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); if (StackAdjustment) { - if (!(Fn->optForMinSize() && + if (!(F.optForMinSize() && adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, /*InEpilogue=*/false); @@ -2767,13 +2774,13 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { // If we may need to emit frameless compact unwind information, give // up as this is currently broken: PR25614. - return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && + return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && // The lowering of segmented stack and HiPE only support entry blocks // as prologue blocks: PR26107. // This limitation may be lifted if we fix: // - adjustForSegmentedStacks // - adjustForHiPEPrologue - MF.getFunction()->getCallingConv() != CallingConv::HiPE && + MF.getFunction().getCallingConv() != CallingConv::HiPE && !MF.shouldSplitStack(); } @@ -3003,9 +3010,9 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. - const Function *Fn = MF.getFunction(); + const Function &F = MF.getFunction(); if (!STI.is64Bit() || !MF.hasEHFunclets() || - classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX) + classifyEHPersonality(F.getPersonalityFn()) != EHPersonality::MSVC_CXX) return; // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 93a3b9281d9c..b47ef91d42ab 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "X86.h" -#include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" @@ -21,8 +20,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -109,14 +106,15 @@ namespace { if (Base_Reg.getNode()) Base_Reg.getNode()->dump(); else - dbgs() << "nul"; - dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' - << " Scale" << Scale << '\n' + dbgs() << "nul\n"; + if (BaseType == FrameIndexBase) + dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; + dbgs() << " Scale " << Scale << '\n' << "IndexReg "; if (IndexReg.getNode()) IndexReg.getNode()->dump(); else - dbgs() << "nul"; + dbgs() << "nul\n"; dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) @@ -442,9 +440,8 @@ namespace { } bool foldLoadStoreIntoMemOperand(SDNode *Node); - bool matchBEXTRFromAnd(SDNode *Node); - + bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; }; } @@ -462,7 +459,7 @@ static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); - if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32) + if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); return true; @@ -622,8 +619,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptFor[Min]Size are used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->optForSize(); - OptForMinSize = MF->getFunction()->optForMinSize(); + OptForSize = MF->getFunction().optForSize(); + OptForMinSize = MF->getFunction().optForMinSize(); assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize"); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), @@ -631,11 +628,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - // Only does this when target favors doesn't favor register indirect - // call. + // Only do this when the target can fold the load into the call or + // jmp. + !Subtarget->useRetpoline() && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || !getTargetMachine().isPositionIndependent())))) { /// Also try moving call address load from outside callseq_start to just @@ -756,9 +753,9 @@ void X86DAGToDAGISel::emitSpecialCodeForMain() { void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. - if (const Function *Fn = MF->getFunction()) - if (Fn->hasExternalLinkage() && Fn->getName() == "main") - emitSpecialCodeForMain(); + const Function &F = MF->getFunction(); + if (F.hasExternalLinkage() && F.getName() == "main") + emitSpecialCodeForMain(); } static bool isDispSafeForFrameIndex(int64_t Val) { @@ -1510,6 +1507,12 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { // TODO: Support other operations. switch (N.getOpcode()) { + case ISD::Constant: { + uint64_t Val = cast(N)->getSExtValue(); + if (!foldOffsetIntoAddress(Val, AM)) + return false; + break; + } case X86ISD::Wrapper: if (!matchWrapper(N, AM)) return false; @@ -1525,7 +1528,7 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, X86ISelAddressMode AM; auto *Mgs = cast(Parent); AM.IndexReg = Mgs->getIndex(); - AM.Scale = Mgs->getValue().getScalarValueSizeInBits() / 8; + AM.Scale = cast(Mgs->getScale())->getZExtValue(); unsigned AddrSpace = cast(Parent)->getPointerInfo().getAddrSpace(); // AddrSpace 256 -> GS, 257 -> FS, 258 -> SS. @@ -1536,14 +1539,8 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base, if (AddrSpace == 258) AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); - // If Base is 0, the whole address is in index and the Scale is 1 - if (isa(N)) { - assert(cast(N)->isNullValue() && - "Unexpected base in gather/scatter"); - AM.Scale = 1; - } - // Otherwise, try to match into the base and displacement fields. - else if (matchVectorAddress(N, AM)) + // Try to match into the base and displacement fields. + if (matchVectorAddress(N, AM)) return false; MVT VT = N.getSimpleValueType(); @@ -2179,7 +2176,9 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { case X86ISD::INC: case X86ISD::DEC: case X86ISD::ADD: + case X86ISD::ADC: case X86ISD::SUB: + case X86ISD::SBB: case X86ISD::AND: case X86ISD::OR: case X86ISD::XOR: @@ -2227,7 +2226,9 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { break; } case X86ISD::ADD: + case X86ISD::ADC: case X86ISD::SUB: + case X86ISD::SBB: case X86ISD::AND: case X86ISD::OR: case X86ISD::XOR: { @@ -2236,9 +2237,15 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { case X86ISD::ADD: return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, X86::ADD8mr); + case X86ISD::ADC: + return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, + X86::ADC8mr); case X86ISD::SUB: return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, X86::SUB8mr); + case X86ISD::SBB: + return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, + X86::SBB8mr); case X86ISD::AND: return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, X86::AND8mr); @@ -2255,8 +2262,12 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { switch (Opc) { case X86ISD::ADD: return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0); + case X86ISD::ADC: + return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0); case X86ISD::SUB: return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0); + case X86ISD::SBB: + return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0); case X86ISD::AND: return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0); case X86ISD::OR: @@ -2272,9 +2283,15 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { case X86ISD::ADD: return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, X86::ADD8mi); + case X86ISD::ADC: + return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, + X86::ADC8mi); case X86ISD::SUB: return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, X86::SUB8mi); + case X86ISD::SBB: + return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, + X86::SBB8mi); case X86ISD::AND: return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, X86::AND8mi); @@ -2322,10 +2339,21 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { } } - const SDValue Ops[] = {Base, Scale, Index, Disp, - Segment, Operand, InputChain}; - Result = - CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); + if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { + SDValue CopyTo = + CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, + StoredVal.getOperand(2), SDValue()); + + const SDValue Ops[] = {Base, Scale, Index, Disp, + Segment, Operand, CopyTo, CopyTo.getValue(1)}; + Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, + Ops); + } else { + const SDValue Ops[] = {Base, Scale, Index, Disp, + Segment, Operand, InputChain}; + Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, + Ops); + } break; } default: @@ -2431,6 +2459,60 @@ bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) { return true; } +/// If the high bits of an 'and' operand are known zero, try setting the +/// high bits of an 'and' constant operand to produce a smaller encoding by +/// creating a small, sign-extended negative immediate rather than a large +/// positive one. This reverses a transform in SimplifyDemandedBits that +/// shrinks mask constants by clearing bits. There is also a possibility that +/// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that +/// case, just replace the 'and'. Return 'true' if the node is replaced. +bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { + // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't + // have immediate operands. + MVT VT = And->getSimpleValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return false; + + auto *And1C = dyn_cast(And->getOperand(1)); + if (!And1C) + return false; + + // Bail out if the mask constant is already negative. It can't shrink more. + APInt MaskVal = And1C->getAPIntValue(); + unsigned MaskLZ = MaskVal.countLeadingZeros(); + if (!MaskLZ) + return false; + + SDValue And0 = And->getOperand(0); + APInt HighZeros = APInt::getHighBitsSet(VT.getSizeInBits(), MaskLZ); + APInt NegMaskVal = MaskVal | HighZeros; + + // If a negative constant would not allow a smaller encoding, there's no need + // to continue. Only change the constant when we know it's a win. + unsigned MinWidth = NegMaskVal.getMinSignedBits(); + if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32)) + return false; + + // The variable operand must be all zeros in the top bits to allow using the + // new, negative constant as the mask. + if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) + return false; + + // Check if the mask is -1. In that case, this is an unnecessary instruction + // that escaped earlier analysis. + if (NegMaskVal.isAllOnesValue()) { + ReplaceNode(And, And0.getNode()); + return true; + } + + // A negative mask allows a smaller encoding. Create a new 'and' node. + SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); + SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); + ReplaceNode(And, NewAnd.getNode()); + SelectCode(NewAnd.getNode()); + return true; +} + void X86DAGToDAGISel::Select(SDNode *Node) { MVT NVT = Node->getSimpleValueType(0); unsigned Opc, MOpc; @@ -2485,9 +2567,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } case ISD::AND: - // Try to match BEXTR/BEXTRI instruction. if (matchBEXTRFromAnd(Node)) return; + if (shrinkAndImmediate(Node)) + return; LLVM_FALLTHROUGH; case ISD::OR: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a4fe0d4cc910..ad0eebf8b28a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc())); + DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); } X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, @@ -399,7 +399,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::f80, MVT::f16, Expand); if (Subtarget.hasPOPCNT()) { - setOperationAction(ISD::CTPOP , MVT::i8 , Promote); + setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); } else { setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); @@ -461,7 +461,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, VT, Custom); } - if (Subtarget.hasSSE1()) + if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); @@ -860,8 +860,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion. @@ -998,17 +996,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted // even though v8i16 is a legal type. - setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); - for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); @@ -1133,6 +1127,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (HasInt256) { + // Custom legalize 2x32 to get a little better code. + setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); + setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) setOperationAction(ISD::MGATHER, VT, Custom); @@ -1146,9 +1144,66 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v1i1, &X86::VK1RegClass); + addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); + setOperationAction(ISD::SELECT, MVT::v1i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); + + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i1, MVT::v4i32); + setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i1, MVT::v4i32); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom); + + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); + if (Subtarget.hasVLX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); + } + + // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } + + for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); + } + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); + for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); @@ -1178,47 +1233,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); - if (Subtarget.hasVLX()){ - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); - setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); - setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); - setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); - setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); - - setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); - setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); - setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); - setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); - setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); - } else { + + if (!Subtarget.hasVLX()) { + // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE + // to 512-bit rather than use the AVX2 instructions so that we can use + // k-masks. for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { setOperationAction(ISD::MLOAD, VT, Custom); @@ -1226,27 +1258,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (Subtarget.hasDQI()) { - for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) { - setOperationAction(ISD::SINT_TO_FP, VT, Legal); - setOperationAction(ISD::UINT_TO_FP, VT, Legal); - setOperationAction(ISD::FP_TO_SINT, VT, Legal); - setOperationAction(ISD::FP_TO_UINT, VT, Legal); - } - if (Subtarget.hasVLX()) { - // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion. - setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); - } - } - if (Subtarget.hasVLX()) { - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - } - setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); @@ -1256,13 +1267,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); - for (auto VT : { MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); @@ -1282,7 +1286,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Custom); setOperationAction(ISD::MUL, MVT::v16i32, Legal); @@ -1290,33 +1293,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom); setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); - - // NonVLX sub-targets extend 128/256 vectors to use the 512 version. - setOperationAction(ISD::ABS, MVT::v4i64, Legal); - setOperationAction(ISD::ABS, MVT::v2i64, Legal); - - for (auto VT : { MVT::v8i1, MVT::v16i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::TRUNCATE, VT, Custom); - - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - } - for (auto VT : { MVT::v16i32, MVT::v8i64 }) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); @@ -1328,11 +1308,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); - } - - // NonVLX sub-targets extend 128/256 vectors to use the 512 version. - for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, MVT::v4i64, - MVT::v8i64}) { setOperationAction(ISD::ROTL, VT, Custom); setOperationAction(ISD::ROTR, VT, Custom); } @@ -1344,51 +1319,32 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64); setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64); + if (Subtarget.hasDQI()) { + setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); + } + if (Subtarget.hasCDI()) { // NonVLX sub-targets extend 128/256 vectors to use the 512 version. - for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, - MVT::v4i64, MVT::v8i64}) { + for (auto VT : { MVT::v16i32, MVT::v8i64} ) { setOperationAction(ISD::CTLZ, VT, Legal); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); } } // Subtarget.hasCDI() - if (Subtarget.hasDQI()) { - // NonVLX sub-targets extend 128/256 vectors to use the 512 version. - setOperationAction(ISD::MUL, MVT::v2i64, Legal); - setOperationAction(ISD::MUL, MVT::v4i64, Legal); - setOperationAction(ISD::MUL, MVT::v8i64, Legal); - } - if (Subtarget.hasVPOPCNTDQ()) { - // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512 - // version of popcntd/q. - for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64, - MVT::v4i32, MVT::v2i64}) + for (auto VT : { MVT::v16i32, MVT::v8i64 }) setOperationAction(ISD::CTPOP, VT, Legal); } - // Custom legalize 2x32 to get a little better code. - if (Subtarget.hasVLX()) { - setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); - } - - // Custom lower several nodes. - for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, - MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) - setOperationAction(ISD::MSCATTER, VT, Custom); - - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v1i1, Legal); - // Extract subvector is special because the value type // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Legal under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); - for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, - MVT::v16i1, MVT::v32i1, MVT::v64i1 }) - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); @@ -1409,6 +1365,61 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } }// has AVX-512 + if (!Subtarget.useSoftFloat() && + (Subtarget.hasAVX512() || Subtarget.hasVLX())) { + // These operations are handled on non-VLX by artificially widening in + // isel patterns. + // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? + + setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + + for (auto VT : { MVT::v2i64, MVT::v4i64 }) { + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); + } + + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + } + + // Custom legalize 2x32 to get a little better code. + setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); + setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); + + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, + MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) + setOperationAction(ISD::MSCATTER, VT, Custom); + + if (Subtarget.hasDQI()) { + for (auto VT : { MVT::v2i64, MVT::v4i64 }) { + setOperationAction(ISD::SINT_TO_FP, VT, Legal); + setOperationAction(ISD::UINT_TO_FP, VT, Legal); + setOperationAction(ISD::FP_TO_SINT, VT, Legal); + setOperationAction(ISD::FP_TO_UINT, VT, Legal); + } + } + + if (Subtarget.hasCDI()) { + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); + } + } // Subtarget.hasCDI() + + if (Subtarget.hasVPOPCNTDQ()) { + for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) + setOperationAction(ISD::CTPOP, VT, Legal); + } + } + if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); @@ -1416,74 +1427,64 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v32i1, &X86::VK32RegClass); addRegisterClass(MVT::v64i1, &X86::VK64RegClass); - setOperationAction(ISD::ADD, MVT::v32i1, Custom); - setOperationAction(ISD::ADD, MVT::v64i1, Custom); - setOperationAction(ISD::SUB, MVT::v32i1, Custom); - setOperationAction(ISD::SUB, MVT::v64i1, Custom); - setOperationAction(ISD::MUL, MVT::v32i1, Custom); - setOperationAction(ISD::MUL, MVT::v64i1, Custom); + for (auto VT : { MVT::v32i1, MVT::v64i1 }) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); + + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + for (auto VT : { MVT::v16i1, MVT::v32i1 }) + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + + // Extends from v32i1 masks to 256-bit vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); + // Extends from v64i1 masks to 512-bit vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::SETCC, MVT::v32i1, Custom); - setOperationAction(ISD::SETCC, MVT::v64i1, Custom); setOperationAction(ISD::MUL, MVT::v32i16, Legal); setOperationAction(ISD::MUL, MVT::v64i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); setOperationAction(ISD::MULHS, MVT::v64i8, Custom); setOperationAction(ISD::MULHU, MVT::v64i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); - setOperationAction(ISD::SELECT, MVT::v32i1, Custom); - setOperationAction(ISD::SELECT, MVT::v64i1, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom); - setOperationAction(ISD::VSELECT, MVT::v32i1, Expand); - setOperationAction(ISD::VSELECT, MVT::v64i1, Expand); setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); - if (Subtarget.hasVLX()) { - setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); - } - - LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom; - for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { - setOperationAction(ISD::MLOAD, VT, Action); - setOperationAction(ISD::MSTORE, VT, Action); - } for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); @@ -1512,41 +1513,55 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (Subtarget.hasBITALG()) { - for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v32i8, - MVT::v16i16, MVT::v16i8, MVT::v8i16 }) + for (auto VT : { MVT::v64i8, MVT::v32i16 }) setOperationAction(ISD::CTPOP, VT, Legal); } } - if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { - addRegisterClass(MVT::v4i1, &X86::VK4RegClass); - addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + if (!Subtarget.useSoftFloat() && Subtarget.hasBWI() && + (Subtarget.hasAVX512() || Subtarget.hasVLX())) { + for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { + setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); + } - for (auto VT : { MVT::v2i1, MVT::v4i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); + // These operations are handled on non-VLX by artificially widening in + // isel patterns. + // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + if (Subtarget.hasBITALG()) { + for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) + setOperationAction(ISD::CTPOP, VT, Legal); } + } - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); + if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { + setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); + setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); + setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); + setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); + + setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); + setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); + setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); - for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SMAX, VT, Legal); - setOperationAction(ISD::UMAX, VT, Legal); - setOperationAction(ISD::SMIN, VT, Legal); - setOperationAction(ISD::UMIN, VT, Legal); + if (Subtarget.hasDQI()) { + // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion. + // v2f32 UINT_TO_FP is already custom under SSE2. + setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); + assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && + "Unexpected operation action!"); + // v2i64 FP_TO_S/UINT(v2f32) custom conversion. + setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); + } + + if (Subtarget.hasBWI()) { + setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); + setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); } } @@ -1590,16 +1605,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallName(RTLIB::MUL_I128, nullptr); } - // Combine sin / cos into one node or libcall if possible. - if (Subtarget.hasSinCos()) { - setLibcallName(RTLIB::SINCOS_F32, "sincosf"); - setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget.isTargetDarwin()) { - // For MacOSX, we don't want the normal expansion of a libcall to sincos. - // We want to issue a libcall to __sincos_stret to avoid memory traffic. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } + // Combine sin / cos into _sincos_stret if it is available. + if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && + getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } if (Subtarget.isTargetWin64()) { @@ -1695,6 +1705,19 @@ bool X86TargetLowering::useLoadStackGuardNode() const { return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } +bool X86TargetLowering::useStackGuardXorFP() const { + // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. + return Subtarget.getTargetTriple().isOSMSVCRT(); +} + +SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; + MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); + return SDValue(Node, 0); +} + TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { if (ExperimentalVectorWideningLegalization && @@ -1711,37 +1734,26 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, if (!VT.isVector()) return MVT::i8; - if (VT.getSizeInBits() >= 512) { - EVT EltVT = VT.getVectorElementType(); + if (Subtarget.hasAVX512()) { const unsigned NumElts = VT.getVectorNumElements(); - if (Subtarget.hasAVX512()) - if (EltVT == MVT::i32 || EltVT == MVT::i64 || - EltVT == MVT::f32 || EltVT == MVT::f64) - return EVT::getVectorVT(Context, MVT::i1, NumElts); - if (Subtarget.hasBWI()) - if (EltVT == MVT::i8 || EltVT == MVT::i16) - return EVT::getVectorVT(Context, MVT::i1, NumElts); - } - - if (VT.isSimple()) { - MVT VVT = VT.getSimpleVT(); - const unsigned NumElts = VVT.getVectorNumElements(); - MVT EltVT = VVT.getVectorElementType(); - if (Subtarget.hasBWI() && Subtarget.hasVLX()) - return MVT::getVectorVT(MVT::i1, NumElts); - - if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) { - EVT LegalVT = getTypeToTransformTo(Context, VT); - EltVT = LegalVT.getVectorElementType().getSimpleVT(); + // Figure out what this type will be legalized to. + EVT LegalVT = VT; + while (getTypeAction(Context, LegalVT) != TypeLegal) + LegalVT = getTypeToTransformTo(Context, LegalVT); + + // If we got a 512-bit vector then we'll definitely have a vXi1 compare. + if (LegalVT.getSimpleVT().is512BitVector()) + return EVT::getVectorVT(Context, MVT::i1, NumElts); + + if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { + // If we legalized to less than a 512-bit vector, then we will use a vXi1 + // compare for vXi32/vXi64 for sure. If we have BWI we will also support + // vXi16/vXi8. + MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); + if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) + return EVT::getVectorVT(Context, MVT::i1, NumElts); } - - if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32) - switch(NumElts) { - case 2: return MVT::v2i1; - case 4: return MVT::v4i1; - case 8: return MVT::v8i1; - } } return VT.changeVectorElementTypeToInteger(); @@ -1809,8 +1821,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); - if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) { + const Function &F = MF.getFunction(); + if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) { if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() || ((DstAlign == 0 || DstAlign >= 16) && @@ -1906,7 +1918,7 @@ void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) return; unsigned ParamRegs = 0; - if (auto *M = MF->getFunction()->getParent()) + if (auto *M = MF->getFunction().getParent()) ParamRegs = M->getNumberRegisterParameters(); // Mark the first N int arguments as having reg @@ -2115,6 +2127,10 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &Dl, SelectionDAG &DAG) { EVT ValVT = ValArg.getValueType(); + if (ValVT == MVT::v1i1) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, + DAG.getIntPtrConstant(0, Dl)); + if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { // Two stage lowering might be required @@ -2125,13 +2141,16 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, if (ValLoc == MVT::i32) ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); return ValToCopy; - } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || - (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { + } + + if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || + (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { // One stage lowering is required // bitcast: v32i1 -> i32 / v64i1 -> i64 return DAG.getBitcast(ValLoc, ValArg); - } else - return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg); + } + + return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); } /// Breaks v64i1 value into two registers and adds the new node to the DAG @@ -2173,7 +2192,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // For example, when they are used for argument passing. bool ShouldDisableCalleeSavedRegister = CallConv == CallingConv::X86_RegCall || - MF.getFunction()->hasFnAttribute("no_caller_saved_registers"); + MF.getFunction().hasFnAttribute("no_caller_saved_registers"); if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); @@ -2855,8 +2874,8 @@ static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, return None; } - const Function *Fn = MF.getFunction(); - bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat); + const Function &F = MF.getFunction(); + bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool isSoftFloat = Subtarget.useSoftFloat(); assert(!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); @@ -2889,10 +2908,9 @@ SDValue X86TargetLowering::LowerFormalArguments( X86MachineFunctionInfo *FuncInfo = MF.getInfo(); const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); - const Function *Fn = MF.getFunction(); - if (Fn->hasExternalLinkage() && - Subtarget.isTargetCygMing() && - Fn->getName() == "main") + const Function &F = MF.getFunction(); + if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && + F.getName() == "main") FuncInfo->setForceFramePointer(true); MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -3067,7 +3085,7 @@ SDValue X86TargetLowering::LowerFormalArguments( // Figure out if XMM registers are in use. assert(!(Subtarget.useSoftFloat() && - Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && + F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"); // 64-bit calling conventions support varargs and register parameters, so we @@ -3224,7 +3242,7 @@ SDValue X86TargetLowering::LowerFormalArguments( FuncInfo->setArgumentStackSize(StackSize); if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { - EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); if (Personality == EHPersonality::CoreCLR) { assert(Is64Bit); // TODO: Add a mechanism to frame lowering that will allow us to indicate @@ -3241,7 +3259,7 @@ SDValue X86TargetLowering::LowerFormalArguments( } if (CallConv == CallingConv::X86_RegCall || - Fn->hasFnAttribute("no_caller_saved_registers")) { + F.hasFnAttribute("no_caller_saved_registers")) { MachineRegisterInfo &MRI = MF.getRegInfo(); for (std::pair Pair : MRI.liveins()) MRI.disableCalleeSavedRegister(Pair.first); @@ -3332,7 +3350,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); - auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); const auto *CI = dyn_cast_or_null(CLI.CS.getInstruction()); const Function *Fn = CI ? CI->getCalledFunction() : nullptr; bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || @@ -3367,7 +3385,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, - MF.getFunction()->hasStructRetAttr(), CLI.RetTy, + MF.getFunction().hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require @@ -3713,7 +3731,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(nullptr, *Mod); @@ -3762,10 +3780,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: Model this more precisely so that we can register allocate across // the normal edge and spill and fill across the exceptional edge. if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) { - const Function *CallerFn = MF.getFunction(); + const Function &CallerFn = MF.getFunction(); EHPersonality Pers = - CallerFn->hasPersonalityFn() - ? classifyEHPersonality(CallerFn->getPersonalityFn()) + CallerFn.hasPersonalityFn() + ? classifyEHPersonality(CallerFn.getPersonalityFn()) : EHPersonality::Unknown; if (isFuncletEHPersonality(Pers)) Mask = RegInfo->getNoPreservedMask(); @@ -4013,15 +4031,15 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( // If -tailcallopt is specified, make fastcc functions tail-callable. MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); + const Function &CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, // then the FP_EXTEND of the call result is not a nop. It's not safe to // perform a tailcall optimization here. - if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) + if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) return false; - CallingConv::ID CallerCC = CallerF->getCallingConv(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); bool CCMatch = CallerCC == CalleeCC; bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); @@ -4482,6 +4500,7 @@ static bool hasFPCMov(unsigned X86CC) { bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); @@ -4489,9 +4508,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.readMem = false; - Info.writeMem = false; - Info.vol = false; + Info.flags = MachineMemOperand::MONone; Info.offset = 0; switch (IntrData->Type) { @@ -4499,14 +4516,14 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.memVT = MVT::getVT(I.getType()); Info.align = 1; - Info.readMem = true; + Info.flags |= MachineMemOperand::MOLoad; break; } case COMPRESS_TO_MEM: { Info.ptrVal = I.getArgOperand(0); Info.memVT = MVT::getVT(I.getArgOperand(1)->getType()); Info.align = 1; - Info.writeMem = true; + Info.flags |= MachineMemOperand::MOStore; break; } case TRUNCATE_TO_MEM_VI8: @@ -4524,7 +4541,7 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); Info.align = 1; - Info.writeMem = true; + Info.flags |= MachineMemOperand::MOStore; break; } default: @@ -4602,11 +4619,19 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } +bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, + EVT BitcastVT) const { + if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1) + return false; + + return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT); +} + bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const SelectionDAG &DAG) const { // Do not merge to float value size (128 bytes) if no implicit // float attribute is set. - bool NoFloat = DAG.getMachineFunction().getFunction()->hasFnAttribute( + bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); if (NoFloat) { @@ -4893,8 +4918,6 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); - assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && - "Unexpected vector type"); Vec = DAG.getConstant(0, dl, VT); } else { unsigned Num32BitElts = VT.getSizeInBits() / 32; @@ -5019,113 +5042,128 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, if (!isa(Idx)) return SDValue(); + // Inserting undef is a nop. We can just return the original vector. + if (SubVec.isUndef()) + return Vec; + unsigned IdxVal = cast(Idx)->getZExtValue(); - if (IdxVal == 0 && Vec.isUndef()) // the operation is legal + if (IdxVal == 0 && Vec.isUndef()) // the operation is legal return Op; MVT OpVT = Op.getSimpleValueType(); - MVT SubVecVT = SubVec.getSimpleValueType(); unsigned NumElems = OpVT.getVectorNumElements(); + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); + + // Extend to natively supported kshift. + MVT WideOpVT = OpVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) + WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + + // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts + // if necessary. + if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { + // May need to promote to a legal type. + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + getZeroVector(WideOpVT, Subtarget, DAG, dl), + SubVec, Idx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + } + + MVT SubVecVT = SubVec.getSimpleValueType(); unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); assert(IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"); - // There are 3 possible cases: - // 1. Subvector should be inserted in the lower part (IdxVal == 0) - // 2. Subvector should be inserted in the upper part - // (IdxVal + SubVecNumElems == NumElems) - // 3. Subvector should be inserted in the middle (for example v2i1 - // to v16i1, index 2) - - // If this node widens - by concatenating zeroes - the type of the result - // of a node with instruction that zeroes all upper (irrelevant) bits of the - // output register, mark this node as legal to enable replacing them with - // the v8i1 version of the previous instruction during instruction selection. - // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg, - // while zeroing all the upper remaining 60 bits of the register. if the - // result of such instruction is inserted into an allZeroVector, then we can - // safely remove insert_vector (in instruction selection) as the cmp instr - // already zeroed the rest of the register. - if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 && - (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) || - (SubVec.getOpcode() == ISD::AND && - (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) || - isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode()))))) - return Op; - - // extend to natively supported kshift - MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; - MVT WideOpVT = OpVT; - if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits()) - WideOpVT = MinVT; - - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(WideOpVT); - SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - Undef, SubVec, ZeroIdx); - // Extract sub-vector if require. - auto ExtractSubVec = [&](SDValue V) { - return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - OpVT, V, ZeroIdx); - }; + if (IdxVal == 0) { + // Zero lower bits of the Vec + SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, + ZeroIdx); + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); + // Merge them together, SubVec should be zero extended. + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + getZeroVector(WideOpVT, Subtarget, DAG, dl), + SubVec, ZeroIdx); + Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + } + + SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + Undef, SubVec, ZeroIdx); if (Vec.isUndef()) { - if (IdxVal != 0) { - SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8); - WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, - ShiftBits); - } - return ExtractSubVec(WideSubVec); + assert(IdxVal != 0 && "Unexpected index"); + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } if (ISD::isBuildVectorAllZeros(Vec.getNode())) { + assert(IdxVal != 0 && "Unexpected index"); NumElems = WideOpVT.getVectorNumElements(); unsigned ShiftLeft = NumElems - SubVecNumElems; unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; - Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, - DAG.getConstant(ShiftLeft, dl, MVT::i8)); - Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, - DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec; - return ExtractSubVec(Vec); - } - - if (IdxVal == 0) { - // Zero lower bits of the Vec - SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); - // Merge them together, SubVec should be zero extended. - WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, - getZeroVector(WideOpVT, Subtarget, DAG, dl), - SubVec, ZeroIdx); - Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec); - return ExtractSubVec(Vec); + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getConstant(ShiftLeft, dl, MVT::i8)); + if (ShiftRight != 0) + SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, + DAG.getConstant(ShiftRight, dl, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); } // Simple case when we put subvector in the upper part if (IdxVal + SubVecNumElems == NumElems) { - // Zero upper bits of the Vec - WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec, - DAG.getConstant(IdxVal, dl, MVT::i8)); - SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); - Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); - Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec); - return ExtractSubVec(Vec); - } - // Subvector should be inserted in the middle - use shuffle - WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, - SubVec, ZeroIdx); - SmallVector Mask; - for (unsigned i = 0; i < NumElems; ++i) - Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ? - i : i + NumElems); - return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask); + SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + if (SubVecNumElems * 2 == NumElems) { + // Special case, use legal zero extending insert_subvector. This allows + // isel to opimitize when bits are known zero. + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + getZeroVector(WideOpVT, Subtarget, DAG, dl), + Vec, ZeroIdx); + } else { + // Otherwise use explicit shifts to zero the bits. + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, + Undef, Vec, ZeroIdx); + NumElems = WideOpVT.getVectorNumElements(); + SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); + } + Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); + } + + // Inserting into the middle is more complicated. + + NumElems = WideOpVT.getVectorNumElements(); + + // Widen the vector if needed. + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); + // Move the current value of the bit to be replace to the lsbs. + Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + // Xor with the new bit. + Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec); + // Shift to MSB, filling bottom bits with 0. + unsigned ShiftLeft = NumElems - SubVecNumElems; + Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op, + DAG.getConstant(ShiftLeft, dl, MVT::i8)); + // Shift to the final position, filling upper bits with 0. + unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; + Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op, + DAG.getConstant(ShiftRight, dl, MVT::i8)); + // Xor with original vector leaving the new value. + Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op); + // Reduce to original width if needed. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 @@ -5146,6 +5184,13 @@ static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT, return insert256BitVector(V, V2, NumElems / 2, DAG, dl); } +static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT, + unsigned NumElems, SelectionDAG &DAG, + const SDLoc &dl, unsigned VectorWidth) { + SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, VectorWidth); + return insertSubVector(V, V2, NumElems / 2, DAG, dl, VectorWidth); +} + /// Returns a vector of specified type with all bits set. /// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>. /// Then bitcast to their original type, ensuring they get CSE'd. @@ -5351,6 +5396,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, SmallVector SrcEltBits(1, Cst->getAPIntValue()); return CastBitData(UndefSrcElts, SrcEltBits); } + if (auto *Cst = dyn_cast(Op)) { + APInt UndefSrcElts = APInt::getNullValue(1); + APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); + SmallVector SrcEltBits(1, RawBits); + return CastBitData(UndefSrcElts, SrcEltBits); + } // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { @@ -5902,6 +5953,17 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, unsigned Opcode = N.getOpcode(); switch (Opcode) { + case ISD::VECTOR_SHUFFLE: { + // Don't treat ISD::VECTOR_SHUFFLE as a target shuffle so decode it here. + ArrayRef ShuffleMask = cast(N)->getMask(); + if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) { + Mask.append(ShuffleMask.begin(), ShuffleMask.end()); + Ops.push_back(N.getOperand(0)); + Ops.push_back(N.getOperand(1)); + return true; + } + return false; + } case ISD::AND: case X86ISD::ANDNP: { // Attempt to decode as a per-byte mask. @@ -5963,8 +6025,11 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, case X86ISD::PINSRW: { SDValue InVec = N.getOperand(0); SDValue InScl = N.getOperand(1); + SDValue InIndex = N.getOperand(2); + if (!isa(InIndex) || + cast(InIndex)->getAPIntValue().uge(NumElts)) + return false; uint64_t InIdx = N.getConstantOperandVal(2); - assert(InIdx < NumElts && "Illegal insertion index"); // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern. if (X86::isZeroNode(InScl)) { @@ -5982,8 +6047,12 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return false; SDValue ExVec = InScl.getOperand(0); + SDValue ExIndex = InScl.getOperand(1); + if (!isa(ExIndex) || + cast(ExIndex)->getAPIntValue().uge(NumElts)) + return false; uint64_t ExIdx = InScl.getConstantOperandVal(1); - assert(ExIdx < NumElts && "Illegal extraction index"); + Ops.push_back(InVec); Ops.push_back(ExVec); for (unsigned i = 0; i != NumElts; ++i) @@ -6644,8 +6713,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), LDBase->getAlignment(), - false/*isVolatile*/, true/*ReadMem*/, - false/*WriteMem*/); + MachineMemOperand::MOLoad); for (auto *LD : Loads) DAG.makeEquivalentMemoryOrdering(LD, ResNode); return DAG.getBitcast(VT, ResNode); @@ -6880,7 +6948,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. - bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); + bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. @@ -6958,10 +7026,10 @@ static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already // lowered this: - // (extract_vector_elt (v8f32 %vreg1), Constant<6>) + // (extract_vector_elt (v8f32 %1), Constant<6>) // to: // (extract_vector_elt (vector_shuffle<2,u,u,u> - // (extract_subvector (v8f32 %vreg0), Constant<4>), + // (extract_subvector (v8f32 %0), Constant<4>), // undef) // Constant<0>) // In this case the vector is the extract_subvector expression and the index @@ -7067,8 +7135,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. -SDValue -X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && @@ -7076,10 +7144,10 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) - return DAG.getTargetConstant(0, dl, VT); + return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) - return DAG.getTargetConstant(1, dl, VT); + return Op; if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { @@ -7090,8 +7158,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); // We have to manually lower both halves so getNode doesn't try to // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); @@ -7328,7 +7396,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, /// are written to the parameters \p Opnd0 and \p Opnd1. static bool isAddSub(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, - SDValue &Opnd0, SDValue &Opnd1) { + SDValue &Opnd0, SDValue &Opnd1, + unsigned &NumExtracts) { MVT VT = BV->getSimpleValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && @@ -7340,6 +7409,8 @@ static bool isAddSub(const BuildVectorSDNode *BV, SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); + NumExtracts = 0; + // Odd-numbered elements in the input build vector are obtained from // adding two integer/float elements. // Even-numbered elements in the input build vector are obtained from @@ -7416,6 +7487,9 @@ static bool isAddSub(const BuildVectorSDNode *BV, // Update the pair of expected opcodes. std::swap(ExpectedOpcode, NextExpectedOpcode); + + // Increment the number of extractions done. + ++NumExtracts; } // Don't try to fold this build_vector into an ADDSUB if the inputs are undef. @@ -7428,9 +7502,9 @@ static bool isAddSub(const BuildVectorSDNode *BV, } /// Returns true if is possible to fold MUL and an idiom that has already been -/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1). -/// If (and only if) true is returned, the operands of FMADDSUB are written to -/// parameters \p Opnd0, \p Opnd1, \p Opnd2. +/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into +/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the +/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2. /// /// Prior to calling this function it should be known that there is some /// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation @@ -7453,10 +7527,12 @@ static bool isAddSub(const BuildVectorSDNode *BV, /// recognized ADDSUB idiom with ADDSUB operation is that such replacement /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit /// FMADDSUB is. -static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG, - SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) { - if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 || - !Subtarget.hasAnyFMA()) +static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, + SelectionDAG &DAG, + SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, + unsigned ExpectedUses) { + if (Opnd0.getOpcode() != ISD::FMUL || + !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) return false; // FIXME: These checks must match the similar ones in @@ -7482,7 +7558,8 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Opnd0, Opnd1; - if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1)) + unsigned NumExtracts; + if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts)) return SDValue(); MVT VT = BV->getSimpleValueType(0); @@ -7490,7 +7567,9 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; - if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + // TODO: According to coverage reports, the FMADDSUB transform is not + // triggered by any tests. + if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); // Do not generate X86ISD::ADDSUB node for 512-bit types even though @@ -7660,6 +7739,10 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, case ISD::AND: case ISD::XOR: case ISD::OR: + // Don't do this if the buildvector is a splat - we'd replace one + // constant with an entire vector. + if (Op->getSplatValue()) + return SDValue(); if (!TLI.isOperationLegalOrPromote(Opcode, VT)) return SDValue(); break; @@ -7815,6 +7898,11 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, IndicesVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()), VT.getVectorNumElements()); IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); + if (SrcVec.getValueSizeInBits() < IndicesVT.getSizeInBits()) { + SrcVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(SrcVec), VT, DAG.getUNDEF(VT), + SrcVec, DAG.getIntPtrConstant(0, SDLoc(SrcVec))); + } return DAG.getNode(VT == MVT::v16i8 ? X86ISD::PSHUFB : X86ISD::VPERMV, SDLoc(V), VT, IndicesVec, SrcVec); } @@ -7824,17 +7912,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - MVT ExtVT = VT.getVectorElementType(); + MVT EltVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) - return LowerBUILD_VECTORvXi1(Op, DAG); + return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; BuildVectorSDNode *BV = cast(Op.getNode()); + // TODO: Support FMSUBADD here if we ever get tests for the FMADDSUB + // transform here. if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) @@ -7844,7 +7934,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG)) return BitOp; - unsigned EVTBits = ExtVT.getSizeInBits(); + unsigned EVTBits = EltVT.getSizeInBits(); unsigned NumZero = 0; unsigned NumNonZero = 0; @@ -7880,13 +7970,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // supported, we assume that we will fall back to a shuffle to get the scalar // blended with the constants. Insertion into a zero vector is handled as a // special-case somewhere below here. - LLVMContext &Context = *DAG.getContext(); if (NumConstants == NumElems - 1 && NumNonZero != 1 && (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { // Create an all-constant vector. The variable element in the old // build vector is replaced by undef in the constant vector. Save the // variable scalar element and its index for use in the insertelement. + LLVMContext &Context = *DAG.getContext(); Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); SmallVector ConstVecOps(NumElems, UndefValue::get(EltType)); SDValue VarElt; @@ -7930,7 +8020,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // insertion that way. Only do this if the value is non-constant or if the // value is a constant being inserted into element 0. It is cheaper to do // a constant pool load than it is to do a movd + shuffle. - if (ExtVT == MVT::i64 && !Subtarget.is64Bit() && + if (EltVT == MVT::i64 && !Subtarget.is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) { // Handle SSE only. @@ -7954,8 +8044,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (NumZero == 0) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || - (ExtVT == MVT::i64 && Subtarget.is64Bit())) { + if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || + (EltVT == MVT::i64 && Subtarget.is64Bit())) { assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && "Expected an SSE value type!"); @@ -7966,7 +8056,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // We can't directly insert an i8 or i16 into a vector, so zero extend // it to i32 first. - if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { + if (EltVT == MVT::i16 || EltVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); if (VT.getSizeInBits() >= 256) { MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32); @@ -8038,17 +8128,43 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return V; // See if we can use a vector load to get all of the elements. - if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { + { SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElems); if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } + // If this is a splat of pairs of 32-bit elements, we can use a narrower + // build_vector and broadcast it. + // TODO: We could probably generalize this more. + if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { + SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), + DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; + auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef Ops) { + // Make sure all the even/odd operands match. + for (unsigned i = 2; i != NumElems; ++i) + if (Ops[i % 2] != Op.getOperand(i)) + return false; + return true; + }; + if (CanSplat(Op, NumElems, Ops)) { + MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; + MVT NarrowVT = MVT::getVectorVT(EltVT, 4); + // Create a new build vector and cast to v2i64/v2f64. + SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), + DAG.getBuildVector(NarrowVT, dl, Ops)); + // Broadcast from v2i64/v2f64 and cast to final VT. + MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2); + return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, + NewBV)); + } + } + // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. - if (VT.is256BitVector() || VT.is512BitVector()) { - EVT HVT = EVT::getVectorVT(Context, ExtVT, NumElems/2); + if (VT.getSizeInBits() > 128) { + MVT HVT = MVT::getVectorVT(EltVT, NumElems/2); // Build both the lower and upper subvector. SDValue Lower = @@ -8057,9 +8173,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); // Recreate the wider vector with the lower and upper part. - if (VT.is256BitVector()) - return concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl); - return concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl); + return concatSubVectors(Lower, Upper, VT, NumElems, DAG, dl, + VT.getSizeInBits() / 2); } // Let legalizer expand 2-wide build_vectors. @@ -8269,9 +8384,9 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, SelectionDAG & DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); - unsigned NumOfOperands = Op.getNumOperands(); + unsigned NumOperands = Op.getNumOperands(); - assert(isPowerOf2_32(NumOfOperands) && + assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"); // If this node promotes - by concatenating zeroes - the type of the result @@ -8285,71 +8400,58 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, ZeroC); } - SDValue Undef = DAG.getUNDEF(ResVT); - if (NumOfOperands > 2) { - // Specialize the cases when all, or all but one, of the operands are undef. - unsigned NumOfDefinedOps = 0; - unsigned OpIdx = 0; - for (unsigned i = 0; i < NumOfOperands; i++) - if (!Op.getOperand(i).isUndef()) { - NumOfDefinedOps++; - OpIdx = i; - } - if (NumOfDefinedOps == 0) - return Undef; - if (NumOfDefinedOps == 1) { - unsigned SubVecNumElts = - Op.getOperand(OpIdx).getValueType().getVectorNumElements(); - SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, - Op.getOperand(OpIdx), IdxVal); + unsigned NumZero = 0; + unsigned NumNonZero = 0; + uint64_t NonZeros = 0; + for (unsigned i = 0; i != NumOperands; ++i) { + SDValue SubVec = Op.getOperand(i); + if (SubVec.isUndef()) + continue; + if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + ++NumZero; + else { + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + NonZeros |= (uint64_t)1 << i; + ++NumNonZero; } + } + + + // If there are zero or one non-zeros we can handle this very simply. + if (NumNonZero <= 1) { + SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) + : DAG.getUNDEF(ResVT); + if (!NumNonZero) + return Vec; + unsigned Idx = countTrailingZeros(NonZeros); + SDValue SubVec = Op.getOperand(Idx); + unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, + DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); + } + if (NumOperands > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); - SmallVector Ops; - for (unsigned i = 0; i < NumOfOperands/2; i++) - Ops.push_back(Op.getOperand(i)); - SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); - Ops.clear(); - for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++) - Ops.push_back(Op.getOperand(i)); - SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); + ArrayRef Ops = Op->ops(); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(0, NumOperands/2)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(NumOperands/2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } - // 2 operands - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - unsigned NumElems = ResVT.getVectorNumElements(); - assert(V1.getValueType() == V2.getValueType() && - V1.getValueType().getVectorNumElements() == NumElems/2 && - "Unexpected operands in CONCAT_VECTORS"); + assert(NumNonZero == 2 && "Simple cases not handled?"); - if (ResVT.getSizeInBits() >= 16) + if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK - bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); - bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); - SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl); - if (IsZeroV1 && IsZeroV2) - return ZeroVec; - - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - if (V2.isUndef()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); - if (IsZeroV2) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); - - SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); - if (V1.isUndef()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); - - if (IsZeroV1) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); - - V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal); + SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, + DAG.getUNDEF(ResVT), Op.getOperand(0), + DAG.getIntPtrConstant(0, dl)); + unsigned NumElems = ResVT.getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), + DAG.getIntPtrConstant(NumElems/2, dl)); } static SDValue LowerCONCAT_VECTORS(SDValue Op, @@ -8822,8 +8924,8 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, unsigned &UnpackOpcode, bool IsUnary, - ArrayRef TargetMask, SDLoc &DL, - SelectionDAG &DAG, + ArrayRef TargetMask, + const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { int NumElts = VT.getVectorNumElements(); @@ -10149,7 +10251,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( return SDValue(); // Zero-extend directly to i32. - ExtVT = MVT::v4i32; + ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32); V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); } V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); @@ -10309,9 +10411,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, for (;;) { switch (V.getOpcode()) { case ISD::BITCAST: { + // Peek through bitcasts as long as BroadcastIdx can be adjusted. SDValue VSrc = V.getOperand(0); - MVT SrcVT = VSrc.getSimpleValueType(); - if (VT.getScalarSizeInBits() != SrcVT.getScalarSizeInBits()) + unsigned NumEltBits = V.getScalarValueSizeInBits(); + unsigned NumSrcBits = VSrc.getScalarValueSizeInBits(); + if ((NumEltBits % NumSrcBits) == 0) + BroadcastIdx *= (NumEltBits / NumSrcBits); + else if ((NumSrcBits % NumEltBits) == 0 && + (BroadcastIdx % (NumSrcBits / NumEltBits)) == 0) + BroadcastIdx /= (NumSrcBits / NumEltBits); + else break; V = VSrc; continue; @@ -10343,6 +10452,23 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, break; } + // Ensure the source vector and BroadcastIdx are for a suitable type. + if (VT.getScalarSizeInBits() != V.getScalarValueSizeInBits()) { + unsigned NumEltBits = VT.getScalarSizeInBits(); + unsigned NumSrcBits = V.getScalarValueSizeInBits(); + if ((NumSrcBits % NumEltBits) == 0) + BroadcastIdx *= (NumSrcBits / NumEltBits); + else if ((NumEltBits % NumSrcBits) == 0 && + (BroadcastIdx % (NumEltBits / NumSrcBits)) == 0) + BroadcastIdx /= (NumEltBits / NumSrcBits); + else + return SDValue(); + + unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; + MVT SrcVT = MVT::getVectorVT(VT.getScalarType(), NumSrcElts); + V = DAG.getBitcast(SrcVT, V); + } + // Check if this is a broadcast of a scalar. We special case lowering // for scalars so that we can more effectively fold with loads. // First, look through bitcast: if the original value has a larger element @@ -10408,15 +10534,11 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // The shuffle input might have been a bitcast we looked through; look at // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll // later bitcast it to BroadcastVT. - MVT SrcVT = V.getSimpleValueType(); - assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() && + assert(V.getScalarValueSizeInBits() == BroadcastVT.getScalarSizeInBits() && "Unexpected vector element size"); - assert((SrcVT.is256BitVector() || SrcVT.is512BitVector()) && + assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) && "Unexpected vector size"); - - MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize); - V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, - DAG.getIntPtrConstant(BroadcastIdx, DL)); + V = extract128BitVector(V, BroadcastIdx, DAG, DL); } if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) @@ -10446,9 +10568,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // We only support broadcasting from 128-bit vectors to minimize the // number of patterns we need to deal with in isel. So extract down to - // 128-bits. - if (SrcVT.getSizeInBits() > 128) - V = extract128BitVector(V, 0, DAG, DL); + // 128-bits, removing as many bitcasts as possible. + if (SrcVT.getSizeInBits() > 128) { + MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), + 128 / SrcVT.getScalarSizeInBits()); + V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); + V = DAG.getBitcast(ExtVT, V); + } return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); } @@ -11200,6 +11326,20 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( MutableArrayRef LoMask = Mask.slice(0, 4); MutableArrayRef HiMask = Mask.slice(4, 4); + // Attempt to directly match PSHUFLW or PSHUFHW. + if (isUndefOrInRange(LoMask, 0, 4) && + isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { + return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, + getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); + } + if (isUndefOrInRange(HiMask, 4, 8) && + isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { + for (int i = 0; i != 4; ++i) + HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); + return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, + getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); + } + SmallVector LoInputs; copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); std::sort(LoInputs.begin(), LoInputs.end()); @@ -11219,13 +11359,11 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( MutableArrayRef HToLInputs(LoInputs.data() + NumLToL, NumHToL); MutableArrayRef HToHInputs(HiInputs.data() + NumLToH, NumHToH); - // If we are splatting two values from one half - one to each half, then - // we can shuffle that half so each is splatted to a dword, then splat those - // to their respective halves. - auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp, - int DOffset) { - int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4}; - int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1}; + // If we are shuffling values from one half - check how many different DWORD + // pairs we need to create. If only 1 or 2 then we can perform this as a + // PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below. + auto ShuffleDWordPairs = [&](ArrayRef PSHUFHalfMask, + ArrayRef PSHUFDMask, unsigned ShufWOp) { V = DAG.getNode(ShufWOp, DL, VT, V, getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); V = DAG.getBitcast(PSHUFDVT, V); @@ -11234,10 +11372,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( return DAG.getBitcast(VT, V); }; - if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0) - return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0); - if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0) - return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2); + if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { + int PSHUFDMask[4] = { -1, -1, -1, -1 }; + SmallVector, 4> DWordPairs; + int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); + + // Collect the different DWORD pairs. + for (int DWord = 0; DWord != 4; ++DWord) { + int M0 = Mask[2 * DWord + 0]; + int M1 = Mask[2 * DWord + 1]; + M0 = (M0 >= 0 ? M0 % 4 : M0); + M1 = (M1 >= 0 ? M1 % 4 : M1); + if (M0 < 0 && M1 < 0) + continue; + + bool Match = false; + for (int j = 0, e = DWordPairs.size(); j < e; ++j) { + auto &DWordPair = DWordPairs[j]; + if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) && + (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) { + DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first); + DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second); + PSHUFDMask[DWord] = DOffset + j; + Match = true; + break; + } + } + if (!Match) { + PSHUFDMask[DWord] = DOffset + DWordPairs.size(); + DWordPairs.push_back(std::make_pair(M0, M1)); + } + } + + if (DWordPairs.size() <= 2) { + DWordPairs.resize(2, std::make_pair(-1, -1)); + int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second, + DWordPairs[1].first, DWordPairs[1].second}; + if ((NumHToL + NumHToH) == 0) + return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW); + if ((NumLToL + NumLToH) == 0) + return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW); + } + } // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all // such inputs we can swap two of the dwords across the half mark and end up @@ -11888,6 +12064,19 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask, return 0; } +static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { + MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); + + SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); + if (V2.isUndef()) + return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + + return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); +} + /// \brief Generic lowering of v16i8 shuffles. /// /// This is a hybrid strategy to lower v16i8 vectors. It first attempts to @@ -12078,6 +12267,10 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( DL, MVT::v16i8, V1, V2, Mask, DAG)) return Unpack; + + // If we have VBMI we can use one VPERM instead of multiple PSHUFBs. + if (Subtarget.hasVBMI() && Subtarget.hasVLX()) + return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG); } return PSHUFB; @@ -12978,19 +13171,6 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT, DAG.getConstant(Immediate, DL, MVT::i8)); } -static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { - MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); - - SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); - if (V2.isUndef()) - return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); - - return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); -} - /// \brief Handle lowering of 4-lane 64-bit floating point shuffles. /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 @@ -13161,6 +13341,12 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; + // Try to create an in-lane repeating shuffle mask and then shuffle the + // the results into the target lanes. + if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( + DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) + return V; + // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single @@ -13545,6 +13731,10 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; + // AVX512VBMIVL can lower to VPERMB. + if (Subtarget.hasVBMI() && Subtarget.hasVLX()) + return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG); + // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( @@ -14007,6 +14197,10 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Blend; + if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( + DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG)) + return PSHUFB; + return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } @@ -14142,41 +14336,36 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, ExtVT = MVT::v4i32; break; case MVT::v8i1: - ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL + // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit + // shuffle. + ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64; break; case MVT::v16i1: - ExtVT = MVT::v16i32; + // Take 512-bit type, unless we are avoiding 512-bit types and have the + // 256-bit operation available. + ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16; break; case MVT::v32i1: - ExtVT = MVT::v32i16; + // Take 512-bit type, unless we are avoiding 512-bit types and have the + // 256-bit operation available. + assert(Subtarget.hasBWI() && "Expected AVX512BW support"); + ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8; break; case MVT::v64i1: ExtVT = MVT::v64i8; break; } - if (ISD::isBuildVectorAllZeros(V1.getNode())) - V1 = getZeroVector(ExtVT, Subtarget, DAG, DL); - else if (ISD::isBuildVectorAllOnes(V1.getNode())) - V1 = getOnesVector(ExtVT, DAG, DL); - else - V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); - - if (V2.isUndef()) - V2 = DAG.getUNDEF(ExtVT); - else if (ISD::isBuildVectorAllZeros(V2.getNode())) - V2 = getZeroVector(ExtVT, Subtarget, DAG, DL); - else if (ISD::isBuildVectorAllOnes(V2.getNode())) - V2 = getOnesVector(ExtVT, DAG, DL); - else - V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); + V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); + V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask); // i1 was sign extended we can use X86ISD::CVT2MASK. int NumElems = VT.getVectorNumElements(); if ((Subtarget.hasBWI() && (NumElems >= 32)) || (Subtarget.hasDQI() && (NumElems < 32))) - return DAG.getNode(X86ISD::CVT2MASK, DL, VT, Shuffle); + return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, ExtVT), + Shuffle); return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); } @@ -14482,8 +14671,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { /// Extract one bit from mask vector, like v16i1 or v8i1. /// AVX-512 feature. -SDValue -X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const { +static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue Vec = Op.getOperand(0); SDLoc dl(Vec); MVT VecVT = Vec.getSimpleValueType(); @@ -14499,31 +14688,42 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const unsigned NumElts = VecVT.getVectorNumElements(); // Extending v8i1/v16i1 to 512-bit get better performance on KNL // than extending to 128/256bit. - unsigned VecSize = (NumElts <= 4 ? 128 : 512); - MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts); - SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - ExtVT.getVectorElementType(), Ext, Idx); + MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; + MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); } + // Canonicalize result type to MVT::i32. + if (EltVT != MVT::i32) { + SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + Vec, Idx); + return DAG.getAnyExtOrTrunc(Extract, dl, EltVT); + } + unsigned IdxVal = cast(Idx)->getZExtValue(); + + // Extracts from element 0 are always allowed. + if (IdxVal == 0) + return Op; + + // If the kshift instructions of the correct width aren't natively supported + // then we need to promote the vector to the native size to get the correct + // zeroing behavior. if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) || (VecVT.getVectorNumElements() < 8)) { - // Use kshiftlw/rw instruction. VecVT = MVT::v16i1; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, DAG.getUNDEF(VecVT), Vec, DAG.getIntPtrConstant(0, dl)); } - unsigned MaxSift = VecVT.getVectorNumElements() - 1; - if (MaxSift - IdxVal) - Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, - DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); + + // Use kshiftr instruction to move to the lower element. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, - DAG.getConstant(MaxSift, dl, MVT::i8)); - return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec, DAG.getIntPtrConstant(0, dl)); } @@ -14536,7 +14736,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Idx = Op.getOperand(1); if (VecVT.getVectorElementType() == MVT::i1) - return ExtractBitFromMaskVector(Op, DAG); + return ExtractBitFromMaskVector(Op, DAG, Subtarget); if (!isa(Idx)) { // Its more profitable to go through memory (1 cycles throughput) @@ -14674,8 +14874,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, /// Insert one bit to mask vector, like v16i1 or v8i1. /// AVX-512 feature. -SDValue -X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { +static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); @@ -14685,19 +14885,34 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { if (!isa(Idx)) { // Non constant index. Extend source and destination, // insert element and then truncate the result. - MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); - MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32); + unsigned NumElts = VecVT.getVectorNumElements(); + MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; + MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, - DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec), - DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx); + DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), + DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); } unsigned IdxVal = cast(Idx)->getZExtValue(); - SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); unsigned NumElems = VecVT.getVectorNumElements(); - if(Vec.isUndef()) { + // If the kshift instructions of the correct width aren't natively supported + // then we need to promote the vector to the native size to get the correct + // zeroing behavior. + if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) { + // Need to promote to v16i1, do the insert, then extract back. + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(MVT::v16i1), Vec, + DAG.getIntPtrConstant(0, dl)); + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op, + DAG.getIntPtrConstant(0, dl)); + } + + SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); + + if (Vec.isUndef()) { if (IdxVal) EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); @@ -14720,25 +14935,33 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } // Insertion of one bit into last position - if (IdxVal == NumElems -1) { + if (IdxVal == NumElems - 1) { // Move the bit to the last position inside the vector. EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, DAG.getConstant(IdxVal, dl, MVT::i8)); // Clean the last bit in the source vector. Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec, - DAG.getConstant(1, dl, MVT::i8)); + DAG.getConstant(1, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, - DAG.getConstant(1 , dl, MVT::i8)); + DAG.getConstant(1 , dl, MVT::i8)); return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } - // Use shuffle to insert element. - SmallVector MaskVec(NumElems); - for (unsigned i = 0; i != NumElems; ++i) - MaskVec[i] = (i == IdxVal) ? NumElems : i; - - return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec); + // Move the current value of the bit to be replace to bit 0. + SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + // Xor with the new bit. + Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec); + // Shift to MSB, filling bottom bits with 0. + Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); + // Shift to the final position, filling upper bits with 0. + Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged, + DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8)); + // Xor with original vector to cancel out the original bit value that's still + // present. + return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, @@ -14748,7 +14971,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, unsigned NumElts = VT.getVectorNumElements(); if (EltVT == MVT::i1) - return InsertBitToMaskVector(Op, DAG); + return InsertBitToMaskVector(Op, DAG, Subtarget); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); @@ -14840,7 +15063,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. - bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); + bool MinSize = DAG.getMachineFunction().getFunction().optForMinSize(); if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { // If this is an insertion of 32-bits into the low 32-bits of // a vector, we prefer to generate a blend with immediate rather @@ -14911,6 +15134,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, return insert1BitVector(Op, DAG, Subtarget); } +static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && + "Only vXi1 extract_subvectors need custom lowering"); + + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + + if (!isa(Idx)) + return SDValue(); + + unsigned IdxVal = cast(Idx)->getZExtValue(); + if (IdxVal == 0) // the operation is legal + return Op; + + MVT VecVT = Vec.getSimpleValueType(); + unsigned NumElems = VecVT.getVectorNumElements(); + + // Extend to natively supported kshift. + MVT WideVecVT = VecVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { + WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), Vec, + DAG.getIntPtrConstant(0, dl)); + } + + // Shift to the LSB. + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, + DAG.getIntPtrConstant(0, dl)); +} + // Returns the appropriate wrapper opcode for a global reference. unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const { // References to absolute symbols are never PC-relative. @@ -14981,7 +15240,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. - const Module *Mod = DAG.getMachineFunction().getFunction()->getParent(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod); auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -15430,20 +15689,19 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTSI2P, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); } - if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT)) - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src)); - MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(VT == MVT::v2f64 && "Unexpected type"); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(), + DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src)); } return SDValue(); } @@ -15540,8 +15798,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } /// 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 @@ -15561,7 +15819,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector CV1; @@ -15608,8 +15866,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, } /// 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, @@ -15642,16 +15900,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getSimpleValueType(); - - if (DestVT.bitsLT(MVT::f64)) - return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0, dl)); - if (DestVT.bitsGT(MVT::f64)) - return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - - // Handle final rounding. - return Sub; + return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); } static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, @@ -15783,42 +16032,30 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } -SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, - SelectionDAG &DAG) const { +static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue N0 = Op.getOperand(0); MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); - if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0)); - MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(Op.getValueType() == MVT::v2f64 && "Unexpected type"); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64, + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0)); } switch (SrcVT.SimpleTy) { default: llvm_unreachable("Custom UINT_TO_FP is not supported!"); - case MVT::v4i8: - case MVT::v4i16: - case MVT::v8i8: - case MVT::v8i16: { - MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0)); - } case MVT::v2i32: return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); case MVT::v4i32: case MVT::v8i32: + assert(!Subtarget.hasAVX512()); return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); - case MVT::v16i8: - case MVT::v16i16: - assert(Subtarget.hasAVX512()); - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } } @@ -15828,14 +16065,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't - // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform - // the optimization here. - if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); - if (Op.getSimpleValueType().isVector()) - return lowerUINT_TO_FP_vec(Op, DAG); + return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); @@ -15848,9 +16079,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i64(Op, DAG); + return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i32(Op, DAG); + return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); @@ -16112,8 +16343,18 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) - return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In); + if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && + (VT != MVT::v8i32 || InVT != MVT::v8i16) && + (VT != MVT::v16i16 || InVT != MVT::v16i8) && + (VT != MVT::v8i64 || InVT != MVT::v8i32) && + (VT != MVT::v8i64 || InVT != MVT::v8i16) && + (VT != MVT::v16i32 || InVT != MVT::v16i16) && + (VT != MVT::v16i32 || InVT != MVT::v16i8) && + (VT != MVT::v32i16 || InVT != MVT::v32i8)) + return SDValue(); + + if (Subtarget.hasInt256()) + return DAG.getNode(X86ISD::VZEXT, dl, VT, In); // Optimize vectors in AVX mode: // @@ -16128,14 +16369,6 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, // Concat upper and lower parts. // - if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) && - ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) && - ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) - return SDValue(); - - if (Subtarget.hasInt256()) - return DAG.getNode(X86ISD::VZEXT, dl, VT, In); - SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); SDValue Undef = DAG.getUNDEF(InVT); bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; @@ -16151,62 +16384,86 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } -static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, - const X86Subtarget &Subtarget, SelectionDAG &DAG) { +// Helper to split and extend a v16i1 mask to v16i8 or v16i16. +static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, + const SDLoc &dl, SelectionDAG &DAG) { + assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); + SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, + DAG.getIntPtrConstant(0, dl)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, + DAG.getIntPtrConstant(8, dl)); + Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo); + Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi); + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); +} + +static SDValue LowerZERO_EXTEND_Mask(SDValue Op, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); + assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); SDLoc DL(Op); unsigned NumElts = VT.getVectorNumElements(); - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1 && - (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) - return DAG.getNode(X86ISD::VZEXT, DL, VT, In); + // Extend VT if the scalar type is v8/v16 and BWI is not supported. + MVT ExtVT = VT; + if (!Subtarget.hasBWI() && + (VT.getVectorElementType().getSizeInBits() <= 16)) { + // If v16i32 is to be avoided, we'll need to split and concatenate. + if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) + return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); - if (InVT.getVectorElementType() != MVT::i1) - return SDValue(); + ExtVT = MVT::getVectorVT(MVT::i32, NumElts); + } - // Extend VT if the target is 256 or 128bit vector and VLX is not supported. - MVT ExtVT = VT; - if (!VT.is512BitVector() && !Subtarget.hasVLX()) - ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + // Widen to 512-bits if VLX is not supported. + MVT WideVT = ExtVT; + if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { + NumElts *= 512 / ExtVT.getSizeInBits(); + InVT = MVT::getVectorVT(MVT::i1, NumElts); + In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), + In, DAG.getIntPtrConstant(0, DL)); + WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), + NumElts); + } - SDValue One = - DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT); - SDValue Zero = - DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); + SDValue One = DAG.getConstant(1, DL, WideVT); + SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL); - SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero); - if (VT == ExtVT) - return SelectedVal; - return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); -} + SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); -static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - if (Subtarget.hasFp256()) - if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) - return Res; + // Truncate if we had to extend i16/i8 above. + if (VT != ExtVT) { + WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); + SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); + } - return SDValue(); + // Extract back to 128/256-bit if we widened. + if (WideVT != VT) + SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, + DAG.getIntPtrConstant(0, DL)); + + return SelectedVal; } static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT SVT = In.getSimpleValueType(); - if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1) - return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG); + if (SVT.getVectorElementType() == MVT::i1) + return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG); if (Subtarget.hasFp256()) if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) return Res; - assert(!VT.is256BitVector() || !SVT.is128BitVector() || - VT.getVectorNumElements() != SVT.getVectorNumElements()); + assert(!Op.getSimpleValueType().is256BitVector() || !SVT.is128BitVector() || + Op.getSimpleValueType().getVectorNumElements() != + SVT.getVectorNumElements()); return SDValue(); } @@ -16318,27 +16575,62 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, if (InVT.getScalarSizeInBits() <= 16) { if (Subtarget.hasBWI()) { // legal, will go to VPMOVB2M, VPMOVW2M - // Shift packed bytes not supported natively, bitcast to word - MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); - SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT, - DAG.getBitcast(ExtVT, In), - DAG.getConstant(ShiftInx, DL, ExtVT)); - ShiftNode = DAG.getBitcast(InVT, ShiftNode); - return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode); + if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { + // We need to shift to get the lsb into sign position. + // Shift packed bytes not supported natively, bitcast to word + MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); + In = DAG.getNode(ISD::SHL, DL, ExtVT, + DAG.getBitcast(ExtVT, In), + DAG.getConstant(ShiftInx, DL, ExtVT)); + In = DAG.getBitcast(InVT, In); + } + return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, InVT), + In); } // Use TESTD/Q, extended vector to packed dword/qword. assert((InVT.is256BitVector() || InVT.is128BitVector()) && "Unexpected vector type."); unsigned NumElts = InVT.getVectorNumElements(); - MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + assert((NumElts == 8 || NumElts == 16) && "Unexpected number of elements"); + // We need to change to a wider element type that we have support for. + // For 8 element vectors this is easy, we either extend to v8i32 or v8i64. + // For 16 element vectors we extend to v16i32 unless we are explicitly + // trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors + // we need to split into two 8 element vectors which we can extend to v8i32, + // truncate and concat the results. There's an additional complication if + // the original type is v16i8. In that case we can't split the v16i8 so + // first we pre-extend it to v16i16 which we can split to v8i16, then extend + // to v8i32, truncate that to v8i1 and concat the two halves. + if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) { + if (InVT == MVT::v16i8) { + // First we need to sign extend up to 256-bits so we can split that. + InVT = MVT::v16i16; + In = DAG.getNode(ISD::SIGN_EXTEND, DL, InVT, In); + } + SDValue Lo = extract128BitVector(In, 0, DAG, DL); + SDValue Hi = extract128BitVector(In, 8, DAG, DL); + // We're split now, just emit two truncates and a concat. The two + // truncates will trigger legalization to come back to this function. + Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo); + Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + // We either have 8 elements or we're allowed to use 512-bit vectors. + // If we have VLX, we want to use the narrowest vector that can get the + // job done so we use vXi32. + MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); + MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); InVT = ExtVT; ShiftInx = InVT.getScalarSizeInBits() - 1; } - SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In, - DAG.getConstant(ShiftInx, DL, InVT)); - return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode); + if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { + // We need to shift to get the lsb into sign position. + In = DAG.getNode(ISD::SHL, DL, InVT, In, + DAG.getConstant(ShiftInx, DL, InVT)); + } + return DAG.getNode(X86ISD::TESTM, DL, VT, In, In); } SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { @@ -16357,10 +16649,15 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { // vpmovqb/w/d, vpmovdb/w, vpmovwb if (Subtarget.hasAVX512()) { // word to byte only under BWI - if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8 - return DAG.getNode(X86ISD::VTRUNC, DL, VT, - getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG)); - return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); + if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) { // v16i16 -> v16i8 + // Make sure we're allowed to promote 512-bits. + if (Subtarget.canExtendTo512DQ()) + return DAG.getNode(ISD::TRUNCATE, DL, VT, + getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, + DAG)); + } else { + return DAG.getNode(ISD::TRUNCATE, DL, VT, In); + } } // Truncate with PACKSS if we are truncating a vector with sign-bits that @@ -16471,9 +16768,29 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); if (VT.isVector()) { - assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); SDValue Src = Op.getOperand(0); SDLoc dl(Op); + + if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) { + MVT ResVT = MVT::v4i32; + MVT TruncVT = MVT::v4i1; + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + if (!IsSigned && !Subtarget.hasVLX()) { + // Widen to 512-bits. + ResVT = MVT::v8i32; + TruncVT = MVT::v8i1; + Opc = ISD::FP_TO_UINT; + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, + DAG.getUNDEF(MVT::v8f64), + Src, DAG.getIntPtrConstant(0, dl)); + } + SDValue Res = DAG.getNode(Opc, dl, ResVT, Src); + Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, + DAG.getIntPtrConstant(0, dl)); + } + + assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) { return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, @@ -16901,7 +17218,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, // An add of one will be selected as an INC. if (C->isOne() && (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction()->optForSize())) { + DAG.getMachineFunction().getFunction().optForSize())) { Opcode = X86ISD::INC; NumOperands = 1; break; @@ -16910,7 +17227,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, // An add of negative one (subtract of one) will be selected as a DEC. if (C->isAllOnesValue() && (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction()->optForSize())) { + DAG.getMachineFunction().getFunction().optForSize())) { Opcode = X86ISD::DEC; NumOperands = 1; break; @@ -17105,7 +17422,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // with an immediate. 16 bit immediates are to be avoided. if ((Op0.getValueType() == MVT::i16 && (isa(Op0) || isa(Op1))) && - !DAG.getMachineFunction().getFunction()->optForMinSize() && + !DAG.getMachineFunction().getFunction().optForMinSize() && !Subtarget.isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; @@ -17539,6 +17856,19 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif + // Custom widen MVT::v2f32 to prevent the default widening + // from getting a result type of v4i32, extracting it to v2i32 and then + // trying to sign extend that to v2i1. + if (VT == MVT::v2i1 && Op1.getValueType() == MVT::v2f32) { + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op0, + DAG.getUNDEF(MVT::v2f32)); + Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op1, + DAG.getUNDEF(MVT::v2f32)); + SDValue NewOp = DAG.getNode(ISD::SETCC, dl, MVT::v4i1, Op0, Op1, CC); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, NewOp, + DAG.getIntPtrConstant(0, dl)); + } + unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); @@ -17633,12 +17963,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // In AVX-512 architecture setcc returns mask with i1 elements, // But there is no compare instruction for i8 and i16 elements in KNL. // In this case use SSE compare - bool UseAVX512Inst = - (OpVT.is512BitVector() || - OpVT.getScalarSizeInBits() >= 32 || - (Subtarget.hasBWI() && Subtarget.hasVLX())); - - if (UseAVX512Inst) + if (OpVT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) return LowerIntVSETCC_AVX512(Op, DAG); return DAG.getNode(ISD::TRUNCATE, dl, VT, @@ -17710,7 +18035,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); bool HasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasAVX512() && VET == MVT::i64) || + (Subtarget.hasSSE41() && (VET == MVT::i16 || VET == MVT::i32)) || (Subtarget.hasSSE2() && (VET == MVT::i8)); bool MinMax = false; if (HasMinMax) { @@ -18030,6 +18356,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); } + // For v64i1 without 64-bit support we need to split and rejoin. + if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { + assert(Subtarget.hasBWI() && "Expected BWI to be legal"); + SDValue Op1Lo = extractSubVector(Op1, 0, DAG, DL, 32); + SDValue Op2Lo = extractSubVector(Op2, 0, DAG, DL, 32); + SDValue Op1Hi = extractSubVector(Op1, 32, DAG, DL, 32); + SDValue Op2Hi = extractSubVector(Op2, 32, DAG, DL, 32); + SDValue Lo = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Lo, Op2Lo); + SDValue Hi = DAG.getSelect(DL, MVT::v32i1, Cond, Op1Hi, Op2Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) @@ -18283,58 +18621,76 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); } -static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); + assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); MVT VTElt = VT.getVectorElementType(); - MVT InVTElt = InVT.getVectorElementType(); SDLoc dl(Op); - // SKX processor - if ((InVTElt == MVT::i1) && - (((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) || - - ((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32)))) - - return DAG.getNode(X86ISD::VSEXT, dl, VT, In); - unsigned NumElts = VT.getVectorNumElements(); - if (VT.is512BitVector() && InVTElt != MVT::i1 && - (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) { - if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) - return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG); - return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG); - } - - if (InVTElt != MVT::i1) - return SDValue(); - + // Extend VT if the scalar type is v8/v16 and BWI is not supported. MVT ExtVT = VT; - if (!VT.is512BitVector() && !Subtarget.hasVLX()) { - ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); - } else if (VTElt == MVT::i16 || VTElt == MVT::i8) { - // If we don't have BWI support we need to extend 8/16-bit to 32-bit. - // Otherwise we end up with vselects we can't handle. + if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) { + // If v16i32 is to be avoided, we'll need to split and concatenate. + if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) + return SplitAndExtendv16i1(ISD::SIGN_EXTEND, VT, In, dl, DAG); + ExtVT = MVT::getVectorVT(MVT::i32, NumElts); } + // Widen to 512-bits if VLX is not supported. + MVT WideVT = ExtVT; + if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { + NumElts *= 512 / ExtVT.getSizeInBits(); + InVT = MVT::getVectorVT(MVT::i1, NumElts); + In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), + In, DAG.getIntPtrConstant(0, dl)); + WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); + } + SDValue V; - if (Subtarget.hasDQI()) { - V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG); - assert(!VT.is512BitVector() && "Unexpected vector type"); + MVT WideEltVT = WideVT.getVectorElementType(); + if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) || + (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) { + V = getExtendInVec(X86ISD::VSEXT, dl, WideVT, In, DAG); } else { - SDValue NegOne = getOnesVector(ExtVT, DAG, dl); - SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); - V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero); - if (ExtVT == VT) - return V; + SDValue NegOne = getOnesVector(WideVT, DAG, dl); + SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, dl); + V = DAG.getSelect(dl, WideVT, In, NegOne, Zero); } - return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); + // Truncate if we had to extend i16/i8 above. + if (VT != ExtVT) { + WideVT = MVT::getVectorVT(VTElt, NumElts); + V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); + } + + // Extract back to 128/256-bit if we widened. + if (WideVT != VT) + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, + DAG.getIntPtrConstant(0, dl)); + + return V; +} + +static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue In = Op->getOperand(0); + MVT InVT = In.getSimpleValueType(); + + if (InVT.getVectorElementType() == MVT::i1) + return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); + + if (Subtarget.hasFp256()) + if (SDValue Res = LowerAVXExtend(Op, DAG, Subtarget)) + return Res; + + return SDValue(); } // Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG. @@ -18431,12 +18787,17 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) - return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG); + if (InVT.getVectorElementType() == MVT::i1) + return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); - if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && - (VT != MVT::v8i32 || InVT != MVT::v8i16) && - (VT != MVT::v16i16 || InVT != MVT::v16i8)) + if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && + (VT != MVT::v8i32 || InVT != MVT::v8i16) && + (VT != MVT::v16i16 || InVT != MVT::v16i8) && + (VT != MVT::v8i64 || InVT != MVT::v8i32) && + (VT != MVT::v8i64 || InVT != MVT::v8i16) && + (VT != MVT::v16i32 || InVT != MVT::v16i16) && + (VT != MVT::v16i32 || InVT != MVT::v16i8) && + (VT != MVT::v32i16 || InVT != MVT::v32i8)) return SDValue(); if (Subtarget.hasInt256()) @@ -18509,6 +18870,7 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget, DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl)); } Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op); + Op = DAG.getBitcast(MVT::i8, Op); return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(), St->getMemOperand()); } @@ -18525,12 +18887,12 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget, DAG.getIntPtrConstant(16, dl)); Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi); - SDValue BasePtrHi = - DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(2, dl, BasePtr.getValueType())); + SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl); SDValue StHi = DAG.getStore(St->getChain(), dl, Hi, - BasePtrHi, St->getMemOperand()); + BasePtrHi, St->getPointerInfo().getWithOffset(2), + MinAlign(St->getAlignment(), 2U), + St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi); } @@ -18559,6 +18921,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load); @@ -18578,22 +18948,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, if (NumElts <= 8) { // A subset, assume that we have only AVX-512F - unsigned NumBitsToLoad = 8; - MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad); - SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(), + SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad); - SDValue BitVec = DAG.getBitcast(MaskVT, Load); + SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load); if (NumElts == 8) return DAG.getNode(ExtOpcode, dl, VT, BitVec); - // we should take care to v4i1 and v2i1 + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec); @@ -18603,23 +18976,20 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, assert(VT == MVT::v32i8 && "Unexpected extload type"); - SmallVector Chains; - SDValue BasePtr = Ld->getBasePtr(); SDValue LoadLo = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); - Chains.push_back(LoadLo.getValue(1)); - SDValue BasePtrHi = - DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(2, dl, BasePtr.getValueType())); + SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl); - SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), - BasePtrHi, - Ld->getMemOperand()); - Chains.push_back(LoadHi.getValue(1)); - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi, + Ld->getPointerInfo().getWithOffset(2), + MinAlign(Ld->getAlignment(), 2U), + Ld->getMemOperand()->getFlags()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + LoadLo.getValue(1), LoadHi.getValue(1)); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); SDValue Lo = DAG.getNode(ExtOpcode, dl, MVT::v16i8, LoadLo); @@ -19173,8 +19543,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, if (Is64Bit) { // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. - const Function *F = MF.getFunction(); - for (const auto &A : F->args()) { + const Function &F = MF.getFunction(); + for (const auto &A : F.args()) { if (A.hasNestAttr()) report_fatal_error("Cannot use segmented stacks with functions that " "have nested arguments."); @@ -19221,7 +19591,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); if (!Subtarget.is64Bit() || - Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) { + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -19275,7 +19645,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 4); MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv())) + if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) // The Win64 ABI uses char* instead of a structure. return DAG.expandVAArg(Op.getNode()); @@ -19306,7 +19676,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. assert(!Subtarget.useSoftFloat() && - !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) && + !(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) && Subtarget.hasSSE1()); } @@ -19316,13 +19686,12 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(ArgMode, dl, MVT::i8), DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); - SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, - VTs, InstOps, MVT::i64, - MachinePointerInfo(SV), - /*Align=*/0, - /*Volatile=*/false, - /*ReadMem=*/true, - /*WriteMem=*/true); + SDValue VAARG = DAG.getMemIntrinsicNode( + X86ISD::VAARG_64, dl, + VTs, InstOps, MVT::i64, + MachinePointerInfo(SV), + /*Align=*/0, + MachineMemOperand::MOLoad | MachineMemOperand::MOStore); Chain = VAARG.getValue(1); // Load the next argument and return it @@ -19335,7 +19704,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, // where a va_list is still an i8*. assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); if (Subtarget.isCallingConvWin64( - DAG.getMachineFunction().getFunction()->getCallingConv())) + DAG.getMachineFunction().getFunction().getCallingConv())) // Probably a Win64 va_copy. return DAG.expandVACopy(Op.getNode()); @@ -19499,9 +19868,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const SDLoc &dl) { if (isAllOnesConstant(Mask)) - return DAG.getTargetConstant(1, dl, MaskVT); + return DAG.getConstant(1, dl, MaskVT); if (X86::isZeroNode(Mask)) - return DAG.getTargetConstant(0, dl, MaskVT); + return DAG.getConstant(0, dl, MaskVT); if (MaskVT.bitsGT(Mask.getSimpleValueType())) { // Mask should be extended @@ -19564,9 +19933,11 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, case X86ISD::CMPM: case X86ISD::CMPM_RND: case X86ISD::CMPMU: + case X86ISD::VPSHUFBITQMB: return DAG.getNode(ISD::AND, dl, VT, Op, VMask); case X86ISD::VFPCLASS: return DAG.getNode(ISD::OR, dl, VT, Op, VMask); + case ISD::TRUNCATE: case X86ISD::VTRUNC: case X86ISD::VTRUNCS: case X86ISD::VTRUNCUS: @@ -20088,9 +20459,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, MVT BitcastVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); - SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, - DAG.getTargetConstant(0, dl, MaskVT), - Subtarget, DAG); + SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, SDValue(), + Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), FPclassMask, DAG.getIntPtrConstant(0, dl)); @@ -20101,9 +20471,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); - SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, - DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); - return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask, + SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), + Subtarget, DAG); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, FPclassMask, DAG.getIntPtrConstant(0, dl)); } case CMP_MASK: @@ -20145,9 +20515,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), Op.getOperand(2)); } - SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, - DAG.getTargetConstant(0, dl, - MaskVT), + SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, @@ -20170,11 +20538,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if(!Cmp.getNode()) Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); - SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, - DAG.getTargetConstant(0, dl, - MVT::i1), + SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), Subtarget, DAG); - return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CmpMask, DAG.getIntPtrConstant(0, dl)); } case COMI: { // Comparison intrinsics @@ -20228,7 +20594,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, else FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8), Sae); - return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, FCmp, DAG.getIntPtrConstant(0, dl)); } case VSHIFT: @@ -20253,18 +20619,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Mask = DAG.getBitcast(MaskVT, Mask); return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask); } - case KUNPCK: { - MVT VT = Op.getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); - - SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); - SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); - // Arguments should be swapped. - SDValue Res = DAG.getNode(IntrData->Opc0, dl, - MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), - Src2, Src1); - return DAG.getBitcast(VT, Res); - } case MASK_BINOP: { MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); @@ -20303,18 +20657,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src1, Src2, Src3, Imm, Rnd), Mask, Passthru, Subtarget, DAG); } - case CONVERT_TO_MASK: { - MVT SrcVT = Op.getOperand(1).getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); - MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); - - SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT, - Op.getOperand(1)); - SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, - DAG.getUNDEF(BitcastVT), CvtMask, - DAG.getIntPtrConstant(0, dl)); - return DAG.getBitcast(Op.getValueType(), Res); - } case ROUNDP: { assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); // Clear the upper bits of the rounding immediate so that the legacy @@ -20597,7 +20939,7 @@ static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. - // TODO: use undef instead and let ExecutionDepsFix deal with it? + // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain}; @@ -20625,7 +20967,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. - // TODO: use undef instead and let ExecutionDepsFix deal with it? + // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain}; @@ -21033,7 +21375,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // ADC/ADCX/SBB case ADX: { SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32); + SDVTList VTs = DAG.getVTList(Op.getOperand(3).getValueType(), MVT::i32); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), @@ -21582,7 +21924,8 @@ static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) { // ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, // split the vector, perform operation on it's Lo a Hi part and // concatenate the results. -static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { assert(Op.getOpcode() == ISD::CTLZ); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); @@ -21593,7 +21936,8 @@ static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) { "Unsupported element type"); // Split vector, it's Lo and Hi parts will be handled in next iteration. - if (16 < NumElems) + if (NumElems > 16 || + (NumElems == 16 && !Subtarget.canExtendTo512DQ())) return LowerVectorIntUnary(Op, DAG); MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); @@ -21698,8 +22042,10 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - if (Subtarget.hasCDI()) - return LowerVectorCTLZ_AVX512CDI(Op, DAG); + if (Subtarget.hasCDI() && + // vXi8 vectors need to be promoted to 512-bits for vXi32. + (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) + return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget.hasInt256()) @@ -21983,7 +22329,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && - "Should not custom lower when pmuldq is available!"); + "Should not custom lower when pmulld is available!"); + + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, dl, VT, + DAG.getBitcast(MVT::v8i16, A), + DAG.getBitcast(MVT::v8i16, B)); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; @@ -22035,6 +22388,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask); bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask); + // If DQI is supported we can use MULLQ, but MULUDQ is still better if the + // the high bits are known to be zero. + if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero)) + return Op; + // Bit cast to 32-bit vectors for MULUDQ. SDValue Alo = DAG.getBitcast(MulVT, A); SDValue Blo = DAG.getBitcast(MulVT, B); @@ -22103,7 +22461,7 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl); if (VT == MVT::v32i8) { - if (Subtarget.hasBWI()) { + if (Subtarget.canExtendTo512BW()) { SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v32i16, A); SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v32i16, B); SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB); @@ -22136,6 +22494,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask)); } + assert(VT == MVT::v16i8 && "Unexpected VT"); + SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A); SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B); SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB); @@ -22890,16 +23250,20 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // It's worth extending once and using the vXi16/vXi32 shifts for smaller // types, but without AVX512 the extra overheads to get from vXi8 to vXi32 // make the existing SSE solution better. + // NOTE: We honor prefered vector width before promoting to 512-bits. if ((Subtarget.hasInt256() && VT == MVT::v8i16) || - (Subtarget.hasAVX512() && VT == MVT::v16i16) || - (Subtarget.hasAVX512() && VT == MVT::v16i8) || - (Subtarget.hasBWI() && VT == MVT::v32i8)) { - MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32); + (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || + (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || + (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || + (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { + assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && + "Unexpected vector type"); + MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32; MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); unsigned ExtOpc = Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; R = DAG.getNode(ExtOpc, dl, ExtVT, R); - Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt); + Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt)); } @@ -22919,7 +23283,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); - Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); + Sel = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, + DAG.getConstant(0, dl, VT), Sel); return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers @@ -23712,15 +24077,14 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. if (Subtarget.hasVPOPCNTDQ()) { - if (VT == MVT::v8i16) { - Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0); - Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op); - return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); - } - if (VT == MVT::v16i8 || VT == MVT::v16i16) { - Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0); - Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op); - return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + unsigned NumElems = VT.getVectorNumElements(); + assert((VT.getVectorElementType() == MVT::i8 || + VT.getVectorElementType() == MVT::i16) && "Unexpected type"); + if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { + MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); + Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); } } @@ -23795,12 +24159,13 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (Subtarget.hasXOP()) + MVT VT = Op.getSimpleValueType(); + + if (Subtarget.hasXOP() && !VT.is512BitVector()) return LowerBITREVERSE_XOP(Op, DAG); assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); - MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); SDLoc DL(Op); @@ -23872,7 +24237,7 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, if (auto *C = dyn_cast(N->getOperand(2))) { // Convert to inc/dec if they aren't slow or we are optimizing for size. if (AllowIncDec && (!Subtarget.slowIncDec() || - DAG.getMachineFunction().getFunction()->optForSize())) { + DAG.getMachineFunction().getFunction().optForSize())) { if ((NewOpc == X86ISD::LADD && C->isOne()) || (NewOpc == X86ISD::LSUB && C->isAllOnesValue())) return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N), @@ -24006,8 +24371,9 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, // Only optimize x86_64 for now. i386 is a bit messy. For f32, // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. - const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; + const char *LibcallName = TLI.getLibcallName(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); @@ -24096,76 +24462,81 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); SDLoc dl(Op); + SDValue Scale = N->getScale(); SDValue Index = N->getIndex(); SDValue Mask = N->getMask(); SDValue Chain = N->getChain(); SDValue BasePtr = N->getBasePtr(); - MVT MemVT = N->getMemoryVT().getSimpleVT(); + + if (VT == MVT::v2f32) { + assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); + // If the index is v2i64 and we have VLX we can use xmm for data and index. + if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, + DAG.getUNDEF(MVT::v2f32)); + SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); + SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; + SDValue NewScatter = DAG.getTargetMemSDNode( + VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); + DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); + return SDValue(NewScatter.getNode(), 1); + } + return SDValue(); + } + + if (VT == MVT::v2i32) { + assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, + DAG.getUNDEF(MVT::v2i32)); + // If the index is v2i64 and we have VLX we can use xmm for data and index. + if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { + SDVTList VTs = DAG.getVTList(MVT::v2i1, MVT::Other); + SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; + SDValue NewScatter = DAG.getTargetMemSDNode( + VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); + DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); + return SDValue(NewScatter.getNode(), 1); + } + // Custom widen all the operands to avoid promotion. + EVT NewIndexVT = EVT::getVectorVT( + *DAG.getContext(), Index.getValueType().getVectorElementType(), 4); + Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, + DAG.getUNDEF(Index.getValueType())); + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, + DAG.getConstant(0, dl, MVT::v2i1)); + SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), N->getMemoryVT(), dl, + Ops, N->getMemOperand()); + } + MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); - if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) { - // The v2i32 value was promoted to v2i64. - // Now we "redo" the type legalizer's work and widen the original - // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64 - // with a shuffle. - assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) && - "Unexpected memory type"); - int ShuffleMask[] = {0, 2, -1, -1}; - Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src), - DAG.getUNDEF(MVT::v4i32), ShuffleMask); - // Now we have 4 elements instead of 2. - // Expand the index. - MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4); - Index = ExtendToType(Index, NewIndexVT, DAG); - - // Expand the mask with zeroes - // Mask may be <2 x i64> or <2 x i1> at this moment - assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) && - "Unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4); - Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); - VT = MVT::v4i32; - } + // If the index is v2i32, we're being called by type legalization and we + // should just let the default handling take care of it. + if (IndexVT == MVT::v2i32) + return SDValue(); - unsigned NumElts = VT.getVectorNumElements(); + // If we don't have VLX and neither the passthru or index is 512-bits, we + // need to widen until one is. if (!Subtarget.hasVLX() && !VT.is512BitVector() && !Index.getSimpleValueType().is512BitVector()) { - // AVX512F supports only 512-bit vectors. Or data or index should - // be 512 bit wide. If now the both index and data are 256-bit, but - // the vector contains 8 elements, we just sign-extend the index - if (IndexVT == MVT::v8i32) - // Just extend index - Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - else { - // The minimal number of elts in scatter is 8 - NumElts = 8; - // Index - MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); - // Use original index here, do not modify the index twice - Index = ExtendToType(N->getIndex(), NewIndexVT, DAG); - if (IndexVT.getScalarType() == MVT::i32) - Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - - // Mask - // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); - // Use the original mask here, do not modify the mask twice - Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); - - // The value that should be stored - MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); - Src = ExtendToType(Src, NewVT, DAG); - } - } - // If the mask is "wide" at this point - truncate it to i1 vector - MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); - Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); - - // The mask is killed by scatter, add it to the values - SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); - SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; + // Determine how much we need to widen by to get a 512-bit type. + unsigned Factor = std::min(512/VT.getSizeInBits(), + 512/IndexVT.getSizeInBits()); + unsigned NumElts = VT.getVectorNumElements() * Factor; + + VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); + MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + + Src = ExtendToType(Src, VT, DAG); + Index = ExtendToType(Index, IndexVT, DAG); + Mask = ExtendToType(Mask, MaskVT, DAG, true); + } + + SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); + SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); @@ -24187,11 +24558,6 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of - // VLX. These types for exp-loads are handled here. - if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); @@ -24208,16 +24574,12 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, Src0 = ExtendToType(Src0, WideDataVT, DAG); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), @@ -24246,10 +24608,6 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX. - if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); @@ -24264,17 +24622,13 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore(), N->isCompressingStore()); @@ -24294,92 +24648,40 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, MVT IndexVT = Index.getSimpleValueType(); MVT MaskVT = Mask.getSimpleValueType(); - unsigned NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); + // If the index is v2i32, we're being called by type legalization. + if (IndexVT == MVT::v2i32) + return SDValue(); + + // If we don't have VLX and neither the passthru or index is 512-bits, we + // need to widen until one is. + MVT OrigVT = VT; if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && - !Index.getSimpleValueType().is512BitVector()) { - // AVX512F supports only 512-bit vectors. Or data or index should - // be 512 bit wide. If now the both index and data are 256-bit, but - // the vector contains 8 elements, we just sign-extend the index - if (NumElts == 8) { - Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; - SDValue NewGather = DAG.getTargetMemSDNode( - DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), - N->getMemOperand()); - return DAG.getMergeValues({NewGather, NewGather.getValue(2)}, dl); - } - - // Minimal number of elements in Gather - NumElts = 8; - // Index - MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); - Index = ExtendToType(Index, NewIndexVT, DAG); - if (IndexVT.getScalarType() == MVT::i32) - Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); - - // Mask - MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); - // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); - Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); - Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); - - // The pass-through value - MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); - Src0 = ExtendToType(Src0, NewVT, DAG); - - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; - SDValue NewGather = DAG.getTargetMemSDNode( - DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(), - N->getMemOperand()); - SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - NewGather.getValue(0), - DAG.getIntPtrConstant(0, dl)); - SDValue RetOps[] = {Extract, NewGather.getValue(2)}; - return DAG.getMergeValues(RetOps, dl); - } - if (N->getMemoryVT() == MVT::v2i32) { - // There is a special case when the return type is v2i32 is illegal and - // the type legaizer extended it to v2i64. Without this conversion we end up - // with VPGATHERQQ (reading q-words from the memory) instead of VPGATHERQD. - // In order to avoid this situation, we'll build an X86 specific Gather node - // with index v2i64 and value type v4i32. - assert(VT == MVT::v2i64 && Src0.getValueType() == MVT::v2i64 && - "Unexpected type in masked gather"); - Src0 = - DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src0), - DAG.getUNDEF(MVT::v4i32), { 0, 2, -1, -1 }); - // The mask should match the destination type. Extending mask with zeroes - // is not necessary since instruction itself reads only two values from - // memory. - SDVTList VTList; - if (Subtarget.hasVLX()) { - Mask = ExtendToType(Mask, MVT::v4i1, DAG, false); - VTList = DAG.getVTList(MVT::v4i32, MVT::v2i1, MVT::Other); - } else { - Mask = - DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Mask), - DAG.getUNDEF(MVT::v4i32), {0, 2, -1, -1}); - VTList = DAG.getVTList(MVT::v4i32, MVT::v4i32, MVT::Other); - } - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; - SDValue NewGather = DAG.getTargetMemSDNode( - VTList, Ops, dl, N->getMemoryVT(), N->getMemOperand()); + !IndexVT.is512BitVector()) { + // Determine how much we need to widen by to get a 512-bit type. + unsigned Factor = std::min(512/VT.getSizeInBits(), + 512/IndexVT.getSizeInBits()); - SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, MVT::v2i64, - NewGather.getValue(0), DAG); - SDValue RetOps[] = { Sext, NewGather.getValue(2) }; - return DAG.getMergeValues(RetOps, dl); + unsigned NumElts = VT.getVectorNumElements() * Factor; + + VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); + IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); + MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + + Src0 = ExtendToType(Src0, VT, DAG); + Index = ExtendToType(Index, IndexVT, DAG); + Mask = ExtendToType(Mask, MaskVT, DAG, true); } - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, + N->getScale() }; SDValue NewGather = DAG.getTargetMemSDNode( DAG.getVTList(VT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); - return DAG.getMergeValues({NewGather, NewGather.getValue(2)}, dl); + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, + NewGather, DAG.getIntPtrConstant(0, dl)); + return DAG.getMergeValues({Extract, NewGather.getValue(2)}, dl); } SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, @@ -24446,6 +24748,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -24639,12 +24942,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); SDValue Src = N->getOperand(0); if (Src.getValueType() == MVT::v2f64) { - SDValue Idx = DAG.getIntPtrConstant(0, dl); - SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI - : X86ISD::CVTTP2UI, - dl, MVT::v4i32, Src); - if (!ExperimentalVectorWideningLegalization) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx); + MVT ResVT = MVT::v4i32; + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + if (!IsSigned && !Subtarget.hasVLX()) { + // Widen to 512-bits. + ResVT = MVT::v8i32; + Opc = ISD::FP_TO_UINT; + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, + DAG.getUNDEF(MVT::v8f64), + Src, DAG.getIntPtrConstant(0, dl)); + } + SDValue Res = DAG.getNode(Opc, dl, ResVT, Src); + ResVT = ExperimentalVectorWideningLegalization ? MVT::v4i32 + : MVT::v2i32; + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Res, + DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; } @@ -24853,7 +25165,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT DstVT = N->getValueType(0); - EVT SrcVT = N->getOperand(0)->getValueType(0); + EVT SrcVT = N->getOperand(0).getValueType(); if (SrcVT != MVT::f64 || (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8)) @@ -24883,7 +25195,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::MGATHER: { EVT VT = N->getValueType(0); - if (VT == MVT::v2f32 && Subtarget.hasVLX()) { + if (VT == MVT::v2f32 && (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { auto *Gather = cast(N); SDValue Index = Gather->getIndex(); if (Index.getValueType() != MVT::v2i64) @@ -24893,15 +25205,74 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Gather->getValue(), DAG.getUNDEF(MVT::v2f32)); + if (!Subtarget.hasVLX()) { + // We need to widen the mask, but the instruction will only use 2 + // of its elements. So we can use undef. + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, + DAG.getUNDEF(MVT::v2i1)); + Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); + } SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(), - Index }; + Index, Gather->getScale() }; SDValue Res = DAG.getTargetMemSDNode( - DAG.getVTList(MVT::v4f32, MVT::v2i1, MVT::Other), Ops, dl, + DAG.getVTList(MVT::v4f32, Mask.getValueType(), MVT::Other), Ops, dl, Gather->getMemoryVT(), Gather->getMemOperand()); Results.push_back(Res); Results.push_back(Res.getValue(2)); return; } + if (VT == MVT::v2i32) { + auto *Gather = cast(N); + SDValue Index = Gather->getIndex(); + SDValue Mask = Gather->getMask(); + assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); + SDValue Src0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, + Gather->getValue(), + DAG.getUNDEF(MVT::v2i32)); + // If the index is v2i64 we can use it directly. + if (Index.getValueType() == MVT::v2i64 && + (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { + if (!Subtarget.hasVLX()) { + // We need to widen the mask, but the instruction will only use 2 + // of its elements. So we can use undef. + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, + DAG.getUNDEF(MVT::v2i1)); + Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); + } + SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(), + Index, Gather->getScale() }; + SDValue Res = DAG.getTargetMemSDNode( + DAG.getVTList(MVT::v4i32, Mask.getValueType(), MVT::Other), Ops, dl, + Gather->getMemoryVT(), Gather->getMemOperand()); + SDValue Chain = Res.getValue(2); + if (!ExperimentalVectorWideningLegalization) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Chain); + return; + } + EVT IndexVT = Index.getValueType(); + EVT NewIndexVT = EVT::getVectorVT(*DAG.getContext(), + IndexVT.getScalarType(), 4); + // Otherwise we need to custom widen everything to avoid promotion. + Index = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewIndexVT, Index, + DAG.getUNDEF(IndexVT)); + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, + DAG.getConstant(0, dl, MVT::v2i1)); + SDValue Ops[] = { Gather->getChain(), Src0, Mask, Gather->getBasePtr(), + Index, Gather->getScale() }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(MVT::v4i32, MVT::Other), + Gather->getMemoryVT(), dl, Ops, + Gather->getMemOperand()); + SDValue Chain = Res.getValue(1); + if (!ExperimentalVectorWideningLegalization) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Chain); + return; + } break; } } @@ -25025,7 +25396,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; - case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -25096,7 +25466,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; - case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; @@ -25312,9 +25681,9 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { if (Bits == 8) return false; - // On AVX2 there are new vpsllv[dq] instructions (and other shifts), that make - // variable shifts just as cheap as scalar ones. - if (Subtarget.hasInt256() && (Bits == 32 || Bits == 64)) + // AVX2 has vpsllv[dq] instructions (and other shifts) that make variable + // shifts just as cheap as scalar ones. + if (Subtarget.hasAVX2() && (Bits == 32 || Bits == 64)) return false; // Otherwise, it's significantly cheaper to shift by a scalar amount than by a @@ -25451,6 +25820,15 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, return isShuffleMaskLegal(Mask, VT); } +bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { + // If the subtarget is using retpolines, we need to not generate jump tables. + if (Subtarget.useRetpoline()) + return false; + + // Otherwise, fallback on the generic logic. + return TargetLowering::areJTsAllowed(Fn); +} + //===----------------------------------------------------------------------===// // X86 Scheduler Hooks //===----------------------------------------------------------------------===// @@ -25979,7 +26357,7 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( int64_t RegSaveFrameIndex = MI.getOperand(1).getImm(); int64_t VarArgsFPOffset = MI.getOperand(2).getImm(); - if (!Subtarget.isCallingConvWin64(F->getFunction()->getCallingConv())) { + if (!Subtarget.isCallingConvWin64(F->getFunction().getCallingConv())) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB); @@ -26622,7 +27000,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); assert(!isAsynchronousEHPersonality( - classifyEHPersonality(MF->getFunction()->getPersonalityFn())) && + classifyEHPersonality(MF->getFunction().getPersonalityFn())) && "SEH does not use catchret!"); // Only 32-bit EH needs to worry about manually restoring stack pointers. @@ -26649,7 +27027,7 @@ MachineBasicBlock * X86TargetLowering::EmitLoweredCatchPad(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); - const Constant *PerFn = MF->getFunction()->getPersonalityFn(); + const Constant *PerFn = MF->getFunction().getPersonalityFn(); bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); // Only 32-bit SEH requires special handling for catchpad. if (IsSEH && Subtarget.is32Bit()) { @@ -26753,6 +27131,115 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, return BB; } +static unsigned getOpcodeForRetpoline(unsigned RPOpc) { + switch (RPOpc) { + case X86::RETPOLINE_CALL32: + return X86::CALLpcrel32; + case X86::RETPOLINE_CALL64: + return X86::CALL64pcrel32; + case X86::RETPOLINE_TCRETURN32: + return X86::TCRETURNdi; + case X86::RETPOLINE_TCRETURN64: + return X86::TCRETURNdi64; + } + llvm_unreachable("not retpoline opcode"); +} + +static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { + switch (Reg) { + case 0: + assert(!Subtarget.is64Bit() && "R11 should always be available on x64"); + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_push" + : "__llvm_retpoline_push"; + case X86::EAX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_eax" + : "__llvm_retpoline_eax"; + case X86::ECX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_ecx" + : "__llvm_retpoline_ecx"; + case X86::EDX: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_edx" + : "__llvm_retpoline_edx"; + case X86::R11: + return Subtarget.useRetpolineExternalThunk() + ? "__llvm_external_retpoline_r11" + : "__llvm_retpoline_r11"; + } + llvm_unreachable("unexpected reg for retpoline"); +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI, + MachineBasicBlock *BB) const { + // Copy the virtual register into the R11 physical register and + // call the retpoline thunk. + DebugLoc DL = MI.getDebugLoc(); + const X86InstrInfo *TII = Subtarget.getInstrInfo(); + unsigned CalleeVReg = MI.getOperand(0).getReg(); + unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + + // Find an available scratch register to hold the callee. On 64-bit, we can + // just use R11, but we scan for uses anyway to ensure we don't generate + // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't + // already a register use operand to the call to hold the callee. If none + // are available, push the callee instead. This is less efficient, but is + // necessary for functions using 3 regparms. Such function calls are + // (currently) not eligible for tail call optimization, because there is no + // scratch register available to hold the address of the callee. + SmallVector AvailableRegs; + if (Subtarget.is64Bit()) + AvailableRegs.push_back(X86::R11); + else + AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX}); + + // Zero out any registers that are already used. + for (const auto &MO : MI.operands()) { + if (MO.isReg() && MO.isUse()) + for (unsigned &Reg : AvailableRegs) + if (Reg == MO.getReg()) + Reg = 0; + } + + // Choose the first remaining non-zero available register. + unsigned AvailableReg = 0; + for (unsigned MaybeReg : AvailableRegs) { + if (MaybeReg) { + AvailableReg = MaybeReg; + break; + } + } + + const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + + if (AvailableReg == 0) { + // No register available. Use PUSH. This must not be a tailcall, and this + // must not be x64. + if (Subtarget.is64Bit()) + report_fatal_error( + "Cannot make an indirect call on x86-64 using both retpoline and a " + "calling convention that preservers r11"); + if (Opc != X86::CALLpcrel32) + report_fatal_error("Cannot make an indirect tail call on x86 using " + "retpoline without a preserved register"); + BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg); + MI.getOperand(0).ChangeToES(Symbol); + MI.setDesc(TII->get(Opc)); + } else { + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) + .addReg(CalleeVReg); + MI.getOperand(0).ChangeToES(Symbol); + MI.setDesc(TII->get(Opc)); + MachineInstrBuilder(*BB->getParent(), &MI) + .addReg(AvailableReg, RegState::Implicit | RegState::Kill); + } + return BB; +} + MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -27253,21 +27740,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); - case X86::TAILJMPd64: - case X86::TAILJMPr64: - case X86::TAILJMPm64: - case X86::TAILJMPr64_REX: - case X86::TAILJMPm64_REX: - llvm_unreachable("TAILJMP64 would not be touched here."); - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - return BB; case X86::TLS_addr32: case X86::TLS_addr64: case X86::TLS_base_addr32: case X86::TLS_base_addr64: return EmitLoweredTLSAddr(MI, BB); + case X86::RETPOLINE_CALL32: + case X86::RETPOLINE_CALL64: + case X86::RETPOLINE_TCRETURN32: + case X86::RETPOLINE_TCRETURN64: + return EmitLoweredRetpoline(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); case X86::CATCHPAD: @@ -27536,6 +28018,65 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // X86 Optimization Hooks //===----------------------------------------------------------------------===// +bool +X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { + // Only optimize Ands to prevent shrinking a constant that could be + // matched by movzx. + if (Op.getOpcode() != ISD::AND) + return false; + + EVT VT = Op.getValueType(); + + // Ignore vectors. + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + + // Make sure the RHS really is a constant. + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + + const APInt &Mask = C->getAPIntValue(); + + // Clear all non-demanded bits initially. + APInt ShrunkMask = Mask & Demanded; + + // Find the width of the shrunk mask. + unsigned Width = ShrunkMask.getActiveBits(); + + // If the mask is all 0s there's nothing to do here. + if (Width == 0) + return false; + + // Find the next power of 2 width, rounding up to a byte. + Width = PowerOf2Ceil(std::max(Width, 8U)); + // Truncate the width to size to handle illegal types. + Width = std::min(Width, Size); + + // Calculate a possible zero extend mask for this constant. + APInt ZeroExtendMask = APInt::getLowBitsSet(Size, Width); + + // If we aren't changing the mask, just return true to keep it and prevent + // the caller from optimizing. + if (ZeroExtendMask == Mask) + return true; + + // Make sure the bits in the ZeroExtendMask are also set in the original mask. + // TODO: We should be able to set bits that aren't demanded too. + if (!ZeroExtendMask.isSubsetOf(Mask)) + return false; + + // Replace the constant with the zero extend mask. + SDLoc DL(Op); + SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); + SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); + return TLO.CombineTo(Op, NewOp); +} + void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, @@ -27751,7 +28292,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool AllowFloatDomain, bool AllowIntDomain, - SDValue &V1, SDLoc &DL, SelectionDAG &DAG, + SDValue &V1, const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); @@ -27999,7 +28541,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // TODO: Investigate sharing more of this with shuffle lowering. static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool AllowFloatDomain, bool AllowIntDomain, - SDValue &V1, SDValue &V2, SDLoc &DL, + SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, @@ -28009,6 +28551,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, if (MaskVT.is128BitVector()) { if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; + V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); Shuffle = X86ISD::MOVLHPS; SrcVT = DstVT = MVT::v4f32; return true; @@ -28062,15 +28605,11 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, return false; } -static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, - const APInt &Zeroable, - bool AllowFloatDomain, - bool AllowIntDomain, - SDValue &V1, SDValue &V2, SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &ShuffleVT, - unsigned &PermuteImm) { +static bool matchBinaryPermuteVectorShuffle( + MVT MaskVT, ArrayRef Mask, const APInt &Zeroable, + bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, + const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, + unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); @@ -28274,8 +28813,6 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // TODO - attempt to narrow Mask back to writemask size. bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); - if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits)) - return SDValue(); // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. @@ -28356,14 +28893,14 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operand in case early exit happens. if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT)) { + NewV1, DL, DAG, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT) && + (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return SDValue(); // AVX512 Writemask clash. - Res = DAG.getBitcast(ShuffleSrcVT, V1); + Res = DAG.getBitcast(ShuffleSrcVT, NewV1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); @@ -28372,11 +28909,10 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, - ShuffleVT, PermuteImm)) { + ShuffleVT, PermuteImm) && + (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return SDValue(); // AVX512 Writemask clash. Res = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, @@ -28386,35 +28922,36 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operands in case early exit happens. + SDValue NewV2 = V2; if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT, UnaryShuffle)) { + NewV1, NewV2, DL, DAG, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT, UnaryShuffle) && + (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return SDValue(); // AVX512 Writemask clash. - V1 = DAG.getBitcast(ShuffleSrcVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleSrcVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); + NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, V1, V2, DL, DAG, - Subtarget, Shuffle, ShuffleVT, - PermuteImm)) { + NewV1 = V1; // Save operands in case early exit happens. + NewV2 = V2; + if (matchBinaryPermuteVectorShuffle( + MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && + (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return SDValue(); // AVX512 Writemask clash. - V1 = DAG.getBitcast(ShuffleVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, + NewV1 = DAG.getBitcast(ShuffleVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); @@ -28461,8 +28998,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. - // TODO This should probably be target specific. - bool AllowVariableMask = (Depth >= 3) || HasVariableMask; + int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; + bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); @@ -29565,17 +30102,18 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } -/// Returns true iff the shuffle node \p N can be replaced with ADDSUB -/// operation. If true is returned then the operands of ADDSUB operation +/// Returns true iff the shuffle node \p N can be replaced with ADDSUB(SUBADD) +/// operation. If true is returned then the operands of ADDSUB(SUBADD) operation /// are written to the parameters \p Opnd0 and \p Opnd1. /// -/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes +/// We combine shuffle to ADDSUB(SUBADD) directly on the abstract vector shuffle nodes /// so it is easier to generically match. We also insert dummy vector shuffle /// nodes for the operands which explicitly discard the lanes which are unused /// by this operation to try to flow through the rest of the combiner /// the fact that they're unused. -static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget, - SDValue &Opnd0, SDValue &Opnd1) { +static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, + SDValue &Opnd0, SDValue &Opnd1, + bool matchSubAdd = false) { EVT VT = N->getValueType(0); if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && @@ -29595,12 +30133,15 @@ static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget, SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); - // We require the first shuffle operand to be the FSUB node, and the second to - // be the FADD node. - if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) { + unsigned ExpectedOpcode = matchSubAdd ? ISD::FADD : ISD::FSUB; + unsigned NextExpectedOpcode = matchSubAdd ? ISD::FSUB : ISD::FADD; + + // We require the first shuffle operand to be the ExpectedOpcode node, + // and the second to be the NextExpectedOpcode node. + if (V1.getOpcode() == NextExpectedOpcode && V2.getOpcode() == ExpectedOpcode) { ShuffleVectorSDNode::commuteMask(Mask); std::swap(V1, V2); - } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD) + } else if (V1.getOpcode() != ExpectedOpcode || V2.getOpcode() != NextExpectedOpcode) return false; // If there are other uses of these operations we can't fold them. @@ -29634,7 +30175,7 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue Opnd0, Opnd1; - if (!isAddSub(N, Subtarget, Opnd0, Opnd1)) + if (!isAddSubOrSubAdd(N, Subtarget, Opnd0, Opnd1)) return SDValue(); EVT VT = N->getValueType(0); @@ -29642,7 +30183,7 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; - if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2)) + if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2); // Do not generate X86ISD::ADDSUB node for 512-bit types even though @@ -29654,6 +30195,26 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); } +/// \brief Try to combine a shuffle into a target-specific +/// mul-sub-add node. +static SDValue combineShuffleToFMSubAdd(SDNode *N, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue Opnd0, Opnd1; + if (!isAddSubOrSubAdd(N, Subtarget, Opnd0, Opnd1, true)) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Try to generate X86ISD::FMSUBADD node here. + SDValue Opnd2; + if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) + return DAG.getNode(X86ISD::FMSUBADD, DL, VT, Opnd0, Opnd1, Opnd2); + + return SDValue(); +} + // We are looking for a shuffle where both sources are concatenated with undef // and have a width that is half of the output's width. AVX2 has VPERMD/Q, so // if we can express this as a single-source shuffle, that's preferable. @@ -29740,11 +30301,14 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we have legalized the vector types, look for blends of FADD and FSUB - // nodes that we can fuse into an ADDSUB node. + // nodes that we can fuse into an ADDSUB, FMADDSUB, or FMSUBADD node. if (TLI.isTypeLegal(VT)) { if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) return AddSub; + if (SDValue FMSubAdd = combineShuffleToFMSubAdd(N, Subtarget, DAG)) + return FMSubAdd; + if (SDValue HAddSub = foldShuffleOfHorizOp(N)) return HAddSub; } @@ -29968,6 +30532,53 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, SDValue N0 = BitCast.getOperand(0); EVT VecVT = N0->getValueType(0); + if (VT.isVector() && VecVT.isScalarInteger() && Subtarget.hasAVX512() && + N0->getOpcode() == ISD::OR) { + SDValue Op0 = N0->getOperand(0); + SDValue Op1 = N0->getOperand(1); + MVT TrunckVT; + MVT BitcastVT; + switch (VT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v16i1: + TrunckVT = MVT::i8; + BitcastVT = MVT::v8i1; + break; + case MVT::v32i1: + TrunckVT = MVT::i16; + BitcastVT = MVT::v16i1; + break; + case MVT::v64i1: + TrunckVT = MVT::i32; + BitcastVT = MVT::v32i1; + break; + } + bool isArg0UndefRight = Op0->getOpcode() == ISD::SHL; + bool isArg0UndefLeft = + Op0->getOpcode() == ISD::ZERO_EXTEND || Op0->getOpcode() == ISD::AND; + bool isArg1UndefRight = Op1->getOpcode() == ISD::SHL; + bool isArg1UndefLeft = + Op1->getOpcode() == ISD::ZERO_EXTEND || Op1->getOpcode() == ISD::AND; + SDValue OpLeft; + SDValue OpRight; + if (isArg0UndefRight && isArg1UndefLeft) { + OpLeft = Op0; + OpRight = Op1; + } else if (isArg1UndefRight && isArg0UndefLeft) { + OpLeft = Op1; + OpRight = Op0; + } else + return SDValue(); + SDLoc DL(BitCast); + SDValue Shr = OpLeft->getOperand(0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, Shr); + SDValue Bitcast1 = DAG.getBitcast(BitcastVT, Trunc1); + SDValue Trunc2 = DAG.getNode(ISD::TRUNCATE, DL, TrunckVT, OpRight); + SDValue Bitcast2 = DAG.getBitcast(BitcastVT, Trunc2); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Bitcast1, Bitcast2); + } + if (!VT.isScalarInteger() || !VecVT.isSimple()) return SDValue(); @@ -30001,7 +30612,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - N0->getOperand(0)->getValueType(0).is256BitVector()) { + N0->getOperand(0).getValueType().is256BitVector()) { SExtVT = MVT::v4i64; FPCastVT = MVT::v4f64; } @@ -30014,8 +30625,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - (N0->getOperand(0)->getValueType(0).is256BitVector() || - N0->getOperand(0)->getValueType(0).is512BitVector())) { + (N0->getOperand(0).getValueType().is256BitVector() || + N0->getOperand(0).getValueType().is512BitVector())) { SExtVT = MVT::v8i32; FPCastVT = MVT::v8f32; } @@ -30075,40 +30686,76 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, // (i16 movmsk (16i8 sext (v16i1 x))) // before the setcc result is scalarized on subtargets that don't have legal // vxi1 types. - if (DCI.isBeforeLegalize()) + if (DCI.isBeforeLegalize()) { if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget)) return V; + + // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer + // type, widen both sides to avoid a trip through memory. + if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && + Subtarget.hasAVX512()) { + SDLoc dl(N); + N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); + N0 = DAG.getBitcast(MVT::v8i1, N0); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, + DAG.getIntPtrConstant(0, dl)); + } + + // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer + // type, widen both sides to avoid a trip through memory. + if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && + Subtarget.hasAVX512()) { + SDLoc dl(N); + unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); + SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); + Ops[0] = N0; + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); + N0 = DAG.getBitcast(MVT::i8, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + } + } + // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. + if (VT == MVT::x86mmx) { + // Detect zero-extended MMX constant vectors. + APInt UndefElts; + SmallVector EltBits; + if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) && + EltBits[1] == 0) { + SDLoc DL(N0); + return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, + DAG.getConstant(EltBits[0], DL, MVT::i32)); + } - // Detect bitcasts between i32 to x86mmx low word. - if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR && - SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) { - SDValue N00 = N0->getOperand(0); - if (N00.getValueType() == MVT::i32) - return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); - } + // Detect bitcasts between i32 to x86mmx low word. + if (N0.getOpcode() == ISD::BUILD_VECTOR && SrcVT == MVT::v2i32) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (N00.getValueType() == MVT::i32 && + (N01.getOpcode() == ISD::UNDEF || isNullConstant(N01))) + return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); + } - // Detect bitcasts between element or subvector extraction to x86mmx. - if (VT == MVT::x86mmx && - (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && - isNullConstant(N0.getOperand(1))) { - SDValue N00 = N0->getOperand(0); - if (N00.getValueType().is128BitVector()) - return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, - DAG.getBitcast(MVT::v2i64, N00)); - } + // Detect bitcasts between element or subvector extraction to x86mmx. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && + isNullConstant(N0.getOperand(1))) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().is128BitVector()) + return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, + DAG.getBitcast(MVT::v2i64, N00)); + } - // Detect bitcasts from FP_TO_SINT to x86mmx. - if (VT == MVT::x86mmx && SrcVT == MVT::v2i32 && - N0.getOpcode() == ISD::FP_TO_SINT) { - SDLoc DL(N0); - SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, - DAG.getUNDEF(MVT::v2i32)); - return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, - DAG.getBitcast(MVT::v2i64, Res)); + // Detect bitcasts from FP_TO_SINT to x86mmx. + if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) { + SDLoc DL(N0); + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, + DAG.getUNDEF(MVT::v2i32)); + return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, + DAG.getBitcast(MVT::v2i64, Res)); + } } // Convert a bitcasted integer logic operation that has one bitcasted @@ -30166,7 +30813,7 @@ static SDValue matchBinOpReduction(SDNode *Extract, unsigned &BinOp, // Match against one of the candidate binary ops. if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { - return Op.getOpcode() == BinOp; + return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); @@ -30304,7 +30951,8 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1); } -// Attempt to replace an min/max v8i16 horizontal reduction with PHMINPOSUW. +// Attempt to replace an min/max v8i16/v16i8 horizontal reduction with +// PHMINPOSUW. static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // Bail without SSE41. @@ -30312,7 +30960,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, return SDValue(); EVT ExtractVT = Extract->getValueType(0); - if (ExtractVT != MVT::i16) + if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8) return SDValue(); // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns. @@ -30324,7 +30972,7 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, EVT SrcVT = Src.getValueType(); EVT SrcSVT = SrcVT.getScalarType(); - if (SrcSVT != MVT::i16 || (SrcVT.getSizeInBits() % 128) != 0) + if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0) return SDValue(); SDLoc DL(Extract); @@ -30340,22 +30988,39 @@ static SDValue combineHorizontalMinMaxResult(SDNode *Extract, SelectionDAG &DAG, SDValue Hi = extractSubVector(MinPos, NumSubElts, DAG, DL, SubSizeInBits); MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); } - assert(SrcVT == MVT::v8i16 && "Unexpected value type"); + assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || + (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) && + "Unexpected value type"); // PHMINPOSUW applies to UMIN(v8i16), for SMIN/SMAX/UMAX we must apply a mask // to flip the value accordingly. SDValue Mask; + unsigned MaskEltsBits = ExtractVT.getSizeInBits(); if (BinOp == ISD::SMAX) - Mask = DAG.getConstant(APInt::getSignedMaxValue(16), DL, SrcVT); + Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT); else if (BinOp == ISD::SMIN) - Mask = DAG.getConstant(APInt::getSignedMinValue(16), DL, SrcVT); + Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT); else if (BinOp == ISD::UMAX) - Mask = DAG.getConstant(APInt::getAllOnesValue(16), DL, SrcVT); + Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT); if (Mask) MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); - MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, SrcVT, MinPos); + // For v16i8 cases we need to perform UMIN on pairs of byte elements, + // shuffling each upper element down and insert zeros. This means that the + // v16i8 UMIN will leave the upper element as zero, performing zero-extension + // ready for the PHMINPOS. + if (ExtractVT == MVT::i8) { + SDValue Upper = DAG.getVectorShuffle( + SrcVT, DL, MinPos, getZeroVector(MVT::v16i8, Subtarget, DAG, DL), + {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); + MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); + } + + // Perform the PHMINPOS on a v8i16 vector, + MinPos = DAG.getBitcast(MVT::v8i16, MinPos); + MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos); + MinPos = DAG.getBitcast(SrcVT, MinPos); if (Mask) MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); @@ -30539,6 +31204,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, if (SrcSVT == MVT::i1 || !isa(Idx)) return SDValue(); + // Handle extract(broadcast(scalar_value)), it doesn't matter what index is. + if (X86ISD::VBROADCAST == Src.getOpcode() && + Src.getOperand(0).getValueType() == VT) + return Src.getOperand(0); + // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; @@ -30671,7 +31341,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget)) return Cmp; - // Attempt to replace min/max v8i16 reductions with PHMINPOSUW. + // Attempt to replace min/max v8i16/v16i8 reductions with PHMINPOSUW. if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) return MinMax; @@ -30769,8 +31439,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } - // The replacement was made in place; don't return anything. - return SDValue(); + // The replacement was made in place; return N so it won't be revisited. + return SDValue(N, 0); } /// If a vector select has an operand that is -1 or 0, try to simplify the @@ -31169,14 +31839,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // v16i8 (select v16i1, v16i8, v16i8) does not have a proper // lowering on KNL. In this case we convert it to // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. - // The same situation for all 128 and 256-bit vectors of i8 and i16. + // The same situation all vectors of i8 and i16 without BWI. + // Make sure we extend these even before type legalization gets a chance to + // split wide vectors. // Since SKX these selects have a proper lowering. - if (Subtarget.hasAVX512() && CondVT.isVector() && + if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && - (VT.is128BitVector() || VT.is256BitVector()) && + VT.getVectorNumElements() > 4 && (VT.getVectorElementType() == MVT::i8 || - VT.getVectorElementType() == MVT::i16) && - !(Subtarget.hasBWI() && Subtarget.hasVLX())) { + VT.getVectorElementType() == MVT::i16)) { Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); DCI.AddToWorklist(Cond.getNode()); return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); @@ -32008,7 +32679,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, // pmulld is supported since SSE41. It is better to use pmulld // instead of pmullw+pmulhw, except for subtargets where pmulld is slower than // the expansion. - bool OptForMinSize = DAG.getMachineFunction().getFunction()->optForMinSize(); + bool OptForMinSize = DAG.getMachineFunction().getFunction().optForMinSize(); if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow())) return SDValue(); @@ -32024,6 +32695,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) && + DAG.MaskedValueIsZero(N1, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0), + DAG.getBitcast(MVT::v8i16, N1)); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -32201,7 +32879,7 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (!MulConstantOptimization) return SDValue(); // An imul is usually smaller than the alternative sequence. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) @@ -32375,7 +33053,7 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) { // 1. MOVs can write to a register that differs from source // 2. MOVs accept memory operands - if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant || + if (VT.isVector() || N1.getOpcode() != ISD::Constant || N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || N0.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -32389,11 +33067,11 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) { if (SarConst.isNegative()) return SDValue(); - for (MVT SVT : MVT::integer_valuetypes()) { + for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); // skipping types without corresponding sext/zext and // ShlConst that is not one of [56,48,32,24,16] - if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize) + if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = @@ -32446,37 +33124,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// \brief Returns a vector of 0s if the node in input is a vector logical -/// shift by a constant amount which is known to be bigger than or equal -/// to the vector element size in bits. -static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - EVT VT = N->getValueType(0); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget.hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue Amt = N->getOperand(1); - SDLoc DL(N); - if (auto *AmtBV = dyn_cast(Amt)) - if (auto *AmtSplat = AmtBV->getConstantSplatNode()) { - const APInt &ShiftAmt = AmtSplat->getAPIntValue(); - unsigned MaxAmount = - VT.getSimpleVT().getScalarSizeInBits(); - - // SSE2/AVX2 logical shifts always return a vector of 0s - // if the shift amount is bigger than or equal to - // the element size. The constant shift amount will be - // encoded as a 8-bit immediate. - if (ShiftAmt.trunc(8).uge(MaxAmount)) - return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL); - } - - return SDValue(); -} - static SDValue combineShift(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -32492,11 +33139,6 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG, if (SDValue V = combineShiftRightLogical(N, DAG)) return V; - // Try to fold this logical shift into a zero vector. - if (N->getOpcode() != ISD::SRA) - if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget)) - return V; - return SDValue(); } @@ -32750,8 +33392,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); - return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0), - FSetCC, DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + N->getSimpleValueType(0), FSetCC, + DAG.getIntPtrConstant(0, DL)); } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, @@ -32815,21 +33458,20 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { // register. In most cases we actually compare or select YMM-sized registers // and mixing the two types creates horrible code. This method optimizes // some of the transition sequences. +// Even with AVX-512 this is still useful for removing casts around logical +// operations on vXi1 mask types. static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - if (!VT.is256BitVector()) - return SDValue(); + assert(VT.isVector() && "Expected vector type"); assert((N->getOpcode() == ISD::ANY_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); SDValue Narrow = N->getOperand(0); - EVT NarrowVT = Narrow->getValueType(0); - if (!NarrowVT.is128BitVector()) - return SDValue(); + EVT NarrowVT = Narrow.getValueType(); if (Narrow->getOpcode() != ISD::XOR && Narrow->getOpcode() != ISD::AND && @@ -32845,51 +33487,40 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, return SDValue(); // The type of the truncated inputs. - EVT WideVT = N0->getOperand(0)->getValueType(0); - if (WideVT != VT) + if (N0->getOperand(0).getValueType() != VT) return SDValue(); // The right side has to be a 'trunc' or a constant vector. - bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; - ConstantSDNode *RHSConstSplat = nullptr; - if (auto *RHSBV = dyn_cast(N1)) - RHSConstSplat = RHSBV->getConstantSplatNode(); - if (!RHSTrunc && !RHSConstSplat) + bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getValueType() == VT; + if (!RHSTrunc && + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT)) + if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), VT)) return SDValue(); // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); - if (RHSConstSplat) { - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(), - SDValue(RHSConstSplat, 0)); - N1 = DAG.getSplatBuildVector(WideVT, DL, N1); - } else if (RHSTrunc) { + if (RHSTrunc) N1 = N1->getOperand(0); - } + else + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); // Generate the wide operation. - SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1); + SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); unsigned Opcode = N->getOpcode(); switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); case ISD::ANY_EXTEND: return Op; - case ISD::ZERO_EXTEND: { - unsigned InBits = NarrowVT.getScalarSizeInBits(); - APInt Mask = APInt::getAllOnesValue(InBits); - Mask = Mask.zext(VT.getScalarSizeInBits()); - return DAG.getNode(ISD::AND, DL, VT, - Op, DAG.getConstant(Mask, DL, VT)); - } + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendInReg(Op, DL, NarrowVT.getScalarType()); case ISD::SIGN_EXTEND: return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, DAG.getValueType(NarrowVT)); - default: - llvm_unreachable("Unexpected opcode"); } } @@ -32960,6 +33591,124 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(N->getValueType(0), Shift); } +// Get the index node from the lowered DAG of a GEP IR instruction with one +// indexing dimension. +static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) { + if (Ld->isIndexed()) + return SDValue(); + + SDValue Base = Ld->getBasePtr(); + + if (Base.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue ShiftedIndex = Base.getOperand(0); + + if (ShiftedIndex.getOpcode() != ISD::SHL) + return SDValue(); + + return ShiftedIndex.getOperand(0); + +} + +static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { + if (Subtarget.hasBMI2() && VT.isScalarInteger()) { + switch (VT.getSizeInBits()) { + default: return false; + case 64: return Subtarget.is64Bit() ? true : false; + case 32: return true; + } + } + return false; +} + +// This function recognizes cases where X86 bzhi instruction can replace and +// 'and-load' sequence. +// In case of loading integer value from an array of constants which is defined +// as follows: +// +// int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1} +// +// then applying a bitwise and on the result with another input. +// It's equivalent to performing bzhi (zero high bits) on the input, with the +// same index of the load. +static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + MVT VT = Node->getSimpleValueType(0); + SDLoc dl(Node); + + // Check if subtarget has BZHI instruction for the node's type + if (!hasBZHI(Subtarget, VT)) + return SDValue(); + + // Try matching the pattern for both operands. + for (unsigned i = 0; i < 2; i++) { + SDValue N = Node->getOperand(i); + LoadSDNode *Ld = dyn_cast(N.getNode()); + + // continue if the operand is not a load instruction + if (!Ld) + return SDValue(); + + const Value *MemOp = Ld->getMemOperand()->getValue(); + + if (!MemOp) + return SDValue(); + + if (const GetElementPtrInst *GEP = dyn_cast(MemOp)) { + if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + + Constant *Init = GV->getInitializer(); + Type *Ty = Init->getType(); + if (!isa(Init) || + !Ty->getArrayElementType()->isIntegerTy() || + Ty->getArrayElementType()->getScalarSizeInBits() != + VT.getSizeInBits() || + Ty->getArrayNumElements() > + Ty->getArrayElementType()->getScalarSizeInBits()) + continue; + + // Check if the array's constant elements are suitable to our case. + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + bool ConstantsMatch = true; + for (uint64_t j = 0; j < ArrayElementCount; j++) { + ConstantInt *Elem = + dyn_cast(Init->getAggregateElement(j)); + if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { + ConstantsMatch = false; + break; + } + } + if (!ConstantsMatch) + continue; + + // Do the transformation (For 32-bit type): + // -> (and (load arr[idx]), inp) + // <- (and (srl 0xFFFFFFFF, (sub 32, idx))) + // that will be replaced with one bzhi instruction. + SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); + SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, VT); + + // Get the Node which indexes into the array. + SDValue Index = getIndexFromUnindexedLoad(Ld); + if (!Index) + return SDValue(); + Index = DAG.getZExtOrTrunc(Index, dl, VT); + + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SizeC, Index); + + SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); + SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); + + return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); + } + } + } + } + return SDValue(); +} + static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -32988,6 +33737,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) return ShiftRight; + if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) + return R; + // Attempt to recursively combine a bitmask AND with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); @@ -33298,7 +34050,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize(); + bool OptForSize = DAG.getMachineFunction().getFunction().optForSize(); // SHLD/SHRD instructions have lower register pressure, but on some // platforms they have higher latency than the equivalent @@ -33556,6 +34308,53 @@ combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, return SDValue(); } +// Helper for splitting operands of a binary operation to legal target size and +// apply a function on each part. +// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in +// 256-bit and on AVX512BW in 512-bit. +// The argument VT is the type used for deciding if/how to split the operands +// Op0 and Op1. Op0 and Op1 do *not* have to be of type VT. +// The argument Builder is a function that will be applied on each split psrt: +// SDValue Builder(SelectionDAG&G, SDLoc, SDValue, SDValue) +template +SDValue SplitBinaryOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, + SDLoc DL, EVT VT, SDValue Op0, SDValue Op1, + F Builder) { + assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); + unsigned NumSubs = 1; + if (Subtarget.hasBWI()) { + if (VT.getSizeInBits() > 512) { + NumSubs = VT.getSizeInBits() / 512; + assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); + } + } else if (Subtarget.hasAVX2()) { + if (VT.getSizeInBits() > 256) { + NumSubs = VT.getSizeInBits() / 256; + assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); + } + } else { + if (VT.getSizeInBits() > 128) { + NumSubs = VT.getSizeInBits() / 128; + assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); + } + } + + if (NumSubs == 1) + return Builder(DAG, DL, Op0, Op1); + + SmallVector Subs; + EVT InVT = Op0.getValueType(); + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), + InVT.getVectorNumElements() / NumSubs); + for (unsigned i = 0; i != NumSubs; ++i) { + unsigned Idx = i * SubVT.getVectorNumElements(); + SDValue LHS = extractSubVector(Op0, Idx, DAG, DL, SubVT.getSizeInBits()); + SDValue RHS = extractSubVector(Op1, Idx, DAG, DL, SubVT.getSizeInBits()); + Subs.push_back(Builder(DAG, DL, LHS, RHS)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); +} + /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -33580,16 +34379,6 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, if (!Subtarget.hasSSE2()) return SDValue(); - if (Subtarget.hasBWI()) { - if (VT.getSizeInBits() > 512) - return SDValue(); - } else if (Subtarget.hasAVX2()) { - if (VT.getSizeInBits() > 256) - return SDValue(); - } else { - if (VT.getSizeInBits() > 128) - return SDValue(); - } // Detect the following pattern: // @@ -33601,7 +34390,6 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, // %6 = trunc %5 to // // In AVX512, the last instruction can also be a trunc store. - if (In.getOpcode() != ISD::SRL) return SDValue(); @@ -33635,6 +34423,10 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, Operands[0] = LHS.getOperand(0); Operands[1] = LHS.getOperand(1); + auto AVGBuilder = [](SelectionDAG &DAG, SDLoc DL, SDValue Op0, SDValue Op1) { + return DAG.getNode(X86ISD::AVG, DL, Op0.getValueType(), Op0, Op1); + }; + // Take care of the case when one of the operands is a constant vector whose // element is in the range [1, 256]. if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && @@ -33645,8 +34437,9 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, SDValue VecOnes = DAG.getConstant(1, DL, InVT); Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); - return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), - Operands[1]); + return SplitBinaryOpsAndApply(DAG, Subtarget, DL, VT, + Operands[0].getOperand(0), Operands[1], + AVGBuilder); } if (Operands[0].getOpcode() == ISD::ADD) @@ -33669,9 +34462,10 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, Operands[j].getOperand(0).getValueType() != VT) return SDValue(); - // The pattern is detected, emit X86ISD::AVG instruction. - return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), - Operands[1].getOperand(0)); + // The pattern is detected, emit X86ISD::AVG instruction(s). + return SplitBinaryOpsAndApply(DAG, Subtarget, DL, VT, + Operands[0].getOperand(0), + Operands[1].getOperand(0), AVGBuilder); } return SDValue(); @@ -33712,15 +34506,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); SDValue Load2 = - DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - std::min(16U, Alignment), Ld->getMemOperand()->getFlags()); + DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); - SDValue NewVec = DAG.getUNDEF(RegVT); - NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl); - NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl); + SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); } @@ -34126,8 +34919,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), - std::min(16U, Alignment), St->getMemOperand()->getFlags()); + DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } @@ -34238,8 +35032,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() != 64) return SDValue(); - const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat); + const Function &F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); if ((VT.isVector() || @@ -34247,28 +35041,10 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, isa(St->getValue()) && !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { - SDNode* LdVal = St->getValue().getNode(); - LoadSDNode *Ld = nullptr; - int TokenFactorIndex = -1; + LoadSDNode *Ld = cast(St->getValue().getNode()); SmallVector Ops; - SDNode* ChainVal = St->getChain().getNode(); - // Must be a store of a load. We currently handle two cases: the load - // is a direct child, and it's under an intervening TokenFactor. It is - // possible to dig deeper under nested TokenFactors. - if (ChainVal == LdVal) - Ld = cast(St->getChain()); - else if (St->getValue().hasOneUse() && - ChainVal->getOpcode() == ISD::TokenFactor) { - for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) { - if (ChainVal->getOperand(i).getNode() == LdVal) { - TokenFactorIndex = i; - Ld = cast(St->getValue()); - } else - Ops.push_back(ChainVal->getOperand(i)); - } - } - if (!Ld || !ISD::isNormalLoad(Ld)) + if (!ISD::isNormalLoad(Ld)) return SDValue(); // If this is not the MMX case, i.e. we are just turning i64 load/store @@ -34285,17 +35061,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, if (Subtarget.is64Bit() || F64IsLegal) { MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); + Ld->getMemOperand()); + // Make sure new load is placed in same chain order. - SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd); - if (TokenFactorIndex >= 0) { - Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); - } - return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); + return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), + St->getMemOperand()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -34310,23 +35081,19 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, MinAlign(Ld->getAlignment(), 4), Ld->getMemOperand()->getFlags()); // Make sure new loads are placed in same chain order. - SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd); - NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd); - - if (TokenFactorIndex >= 0) { - Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); - } + DAG.makeEquivalentMemoryOrdering(Ld, LoLd); + DAG.makeEquivalentMemoryOrdering(Ld, HiLd); LoAddr = St->getBasePtr(); HiAddr = DAG.getMemBasePlusOffset(LoAddr, 4, StDL); SDValue LoSt = - DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(), + DAG.getStore(St->getChain(), StDL, LoLd, LoAddr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); - SDValue HiSt = DAG.getStore( - NewChain, StDL, HiLd, HiAddr, St->getPointerInfo().getWithOffset(4), - MinAlign(St->getAlignment(), 4), St->getMemOperand()->getFlags()); + SDValue HiSt = DAG.getStore(St->getChain(), StDL, HiLd, HiAddr, + St->getPointerInfo().getWithOffset(4), + MinAlign(St->getAlignment(), 4), + St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -34576,7 +35343,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && - !TLI.isOperationLegal(Opcode, SrcVT)) + !Subtarget.hasDQI()) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; case ISD::ADD: { @@ -35141,7 +35908,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, // This takes at least 3 instructions, so favor a library call when operating // on a scalar and minimizing code size. - if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize()) + if (!VT.isVector() && DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); SDValue Op0 = N->getOperand(0); @@ -35583,6 +36350,45 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Attempt to combine a (sext/zext (setcc)) to a setcc with a xmm/ymm/zmm +// result type. +static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // Only do this combine with AVX512 for vector extends. + if (!Subtarget.hasAVX512() || !VT.isVector() || N0->getOpcode() != ISD::SETCC) + return SDValue(); + + // Only combine legal element types. + EVT SVT = VT.getVectorElementType(); + if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && + SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) + return SDValue(); + + // We can only do this if the vector size in 256 bits or less. + unsigned Size = VT.getSizeInBits(); + if (Size > 256) + return SDValue(); + + // Don't fold if the condition code can't be handled by PCMPEQ/PCMPGT since + // that's the only integer compares with we have. + ISD::CondCode CC = cast(N0->getOperand(2))->get(); + if (ISD::isUnsignedIntSetCC(CC) || CC == ISD::SETLE || CC == ISD::SETGE || + CC == ISD::SETNE) + return SDValue(); + + // Only do this combine if the extension will be fully consumed by the setcc. + EVT N00VT = N0.getOperand(0).getValueType(); + EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); + if (Size != MatchingVecType.getSizeInBits()) + return SDValue(); + + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); +} + static SDValue combineSext(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -35597,14 +36403,11 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; - if (!DCI.isBeforeLegalizeOps()) { - if (InVT == MVT::i1) { - SDValue Zero = DAG.getConstant(0, DL, VT); - SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); - return DAG.getSelect(DL, VT, N0, AllOnes, Zero); - } + if (!DCI.isBeforeLegalizeOps()) return SDValue(); - } + + if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) + return V; if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { @@ -35622,7 +36425,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; - if (Subtarget.hasAVX() && VT.is256BitVector()) + if (VT.isVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; @@ -35814,7 +36617,7 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; - if (VT.is256BitVector()) + if (VT.isVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; @@ -35837,13 +36640,23 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); - // We're looking for an oversized integer equality comparison, but ignore a - // comparison with zero because that gets special treatment in EmitTest(). + // We're looking for an oversized integer equality comparison. SDValue X = SetCC->getOperand(0); SDValue Y = SetCC->getOperand(1); EVT OpVT = X.getValueType(); unsigned OpSize = OpVT.getSizeInBits(); - if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y)) + if (!OpVT.isScalarInteger() || OpSize < 128) + return SDValue(); + + // Ignore a comparison with zero because that gets special treatment in + // EmitTest(). But make an exception for the special case of a pair of + // logically-combined vector-sized operands compared to zero. This pattern may + // be generated by the memcmp expansion pass with oversized integer compares + // (see PR33325). + bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR && + X.getOperand(0).getOpcode() == ISD::XOR && + X.getOperand(1).getOpcode() == ISD::XOR; + if (isNullConstant(Y) && !IsOrXorXorCCZero) return SDValue(); // Bail out if we know that this is not really just an oversized integer. @@ -35858,15 +36671,29 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, if ((OpSize == 128 && Subtarget.hasSSE2()) || (OpSize == 256 && Subtarget.hasAVX2())) { EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8; - SDValue VecX = DAG.getBitcast(VecVT, X); - SDValue VecY = DAG.getBitcast(VecVT, Y); - + SDValue Cmp; + if (IsOrXorXorCCZero) { + // This is a bitwise-combined equality comparison of 2 pairs of vectors: + // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne + // Use 2 vector equality compares and 'and' the results before doing a + // MOVMSK. + SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0)); + SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1)); + SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0)); + SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1)); + SDValue Cmp1 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, A, B); + SDValue Cmp2 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, C, D); + Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2); + } else { + SDValue VecX = DAG.getBitcast(VecVT, X); + SDValue VecY = DAG.getBitcast(VecVT, Y); + Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY); + } // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality. // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne - SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY); SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, MVT::i32); @@ -35882,10 +36709,10 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT VT = N->getValueType(0); + EVT OpVT = LHS.getValueType(); SDLoc DL(N); if (CC == ISD::SETNE || CC == ISD::SETEQ) { - EVT OpVT = LHS.getValueType(); // 0-x == y --> x+y == 0 // 0-x != y --> x+y != 0 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && @@ -35934,6 +36761,20 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, } } + // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just + // pre-promote its result type since vXi1 vectors don't get promoted + // during type legalization. + // NOTE: The element count check is to ignore operand types that need to + // go through type promotion to a 128-bit vector. + if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && + VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() > 4 && + (OpVT.getVectorElementType() == MVT::i8 || + OpVT.getVectorElementType() == MVT::i16)) { + SDValue Setcc = DAG.getNode(ISD::SETCC, DL, OpVT, LHS, RHS, + N->getOperand(2)); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); + } + // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early // to avoid scalarization via legalization because v4i32 is not a legal type. if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && @@ -35943,55 +36784,98 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue Src = N->getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + // MOVMSK only uses the MSB from each vector element. + KnownBits Known; + APInt DemandedMask(APInt::getSignMask(SrcVT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(Src, DemandedMask, Known, TLO)) { + DCI.AddToWorklist(Src.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(N, 0); + } + + return SDValue(); +} + static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { SDLoc DL(N); - // Pre-shrink oversized index elements to avoid triggering scalarization. - if (DCI.isBeforeLegalize()) { + if (DCI.isBeforeLegalizeOps()) { SDValue Index = N->getOperand(4); - if (Index.getScalarValueSizeInBits() > 64) { - EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), MVT::i64, + // Remove any sign extends from 32 or smaller to larger than 32. + // Only do this before LegalizeOps in case we need the sign extend for + // legalization. + if (Index.getOpcode() == ISD::SIGN_EXTEND) { + if (Index.getScalarValueSizeInBits() > 32 && + Index.getOperand(0).getScalarValueSizeInBits() <= 32) { + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[4] = Index.getOperand(0); + DAG.UpdateNodeOperands(N, NewOps); + // The original sign extend has less users, add back to worklist in case + // it needs to be removed + DCI.AddToWorklist(Index.getNode()); + DCI.AddToWorklist(N); + return SDValue(N, 0); + } + } + + // Make sure the index is either i32 or i64 + unsigned ScalarSize = Index.getScalarValueSizeInBits(); + if (ScalarSize != 32 && ScalarSize != 64) { + MVT EltVT = ScalarSize > 32 ? MVT::i64 : MVT::i32; + EVT IndexVT = EVT::getVectorVT(*DAG.getContext(), EltVT, Index.getValueType().getVectorNumElements()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index); + Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Trunc; + NewOps[4] = Index; DAG.UpdateNodeOperands(N, NewOps); DCI.AddToWorklist(N); return SDValue(N, 0); } - } - // Try to remove sign extends from i32 to i64 on the index. - // Only do this before legalize in case we are relying on it for - // legalization. - // TODO: We should maybe remove any sign extend once we learn how to sign - // extend narrow index during lowering. - if (DCI.isBeforeLegalizeOps()) { - SDValue Index = N->getOperand(4); - if (Index.getScalarValueSizeInBits() == 64 && - Index.getOpcode() == ISD::SIGN_EXTEND && + // Try to remove zero extends from 32->64 if we know the sign bit of + // the input is zero. + if (Index.getOpcode() == ISD::ZERO_EXTEND && + Index.getScalarValueSizeInBits() == 64 && Index.getOperand(0).getScalarValueSizeInBits() == 32) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[4] = Index.getOperand(0); - DAG.UpdateNodeOperands(N, NewOps); - // The original sign extend has less users, add back to worklist in case - // it needs to be removed. - DCI.AddToWorklist(Index.getNode()); - DCI.AddToWorklist(N); - return SDValue(N, 0); + if (DAG.SignBitIsZero(Index.getOperand(0))) { + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[4] = Index.getOperand(0); + DAG.UpdateNodeOperands(N, NewOps); + // The original zero extend has less users, add back to worklist in case + // it needs to be removed + DCI.AddToWorklist(Index.getNode()); + DCI.AddToWorklist(N); + return SDValue(N, 0); + } } } - // Gather and Scatter instructions use k-registers for masks. The type of - // the masks is v*i1. So the mask will be truncated anyway. - // The SIGN_EXTEND_INREG my be dropped. - SDValue Mask = N->getOperand(2); - if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) { - SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[2] = Mask.getOperand(0); - DAG.UpdateNodeOperands(N, NewOps); + // With AVX2 we only demand the upper bit of the mask. + if (!Subtarget.hasAVX512()) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + SDValue Mask = N->getOperand(2); + KnownBits Known; + APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) { + DCI.AddToWorklist(Mask.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(N, 0); + } } + return SDValue(); } @@ -36044,7 +36928,7 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, EVT VT = N->getValueType(0); if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || - VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits()) + VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) return SDValue(); // Now check that the other operand of the AND is a constant. We could @@ -36080,7 +36964,6 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); EVT InSVT = InVT.getScalarType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32)) // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32)) @@ -36090,9 +36973,7 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); - if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT)) - return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P); - + // UINT_TO_FP isn't legal without AVX512 so use SINT_TO_FP. return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } @@ -36539,6 +37420,113 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); } +static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, + SDLoc DL, EVT VT, const X86Subtarget &Subtarget) { + // Example of pattern we try to detect: + // t := (v8i32 mul (sext (v8i16 x0), (sext (v8i16 x1)))) + //(add (build_vector (extract_elt t, 0), + // (extract_elt t, 2), + // (extract_elt t, 4), + // (extract_elt t, 6)), + // (build_vector (extract_elt t, 1), + // (extract_elt t, 3), + // (extract_elt t, 5), + // (extract_elt t, 7))) + + if (!Subtarget.hasSSE2()) + return SDValue(); + + if (Op0.getOpcode() != ISD::BUILD_VECTOR || + Op1.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || + VT.getVectorNumElements() < 4 || + !isPowerOf2_32(VT.getVectorNumElements())) + return SDValue(); + + // Check if one of Op0,Op1 is of the form: + // (build_vector (extract_elt Mul, 0), + // (extract_elt Mul, 2), + // (extract_elt Mul, 4), + // ... + // the other is of the form: + // (build_vector (extract_elt Mul, 1), + // (extract_elt Mul, 3), + // (extract_elt Mul, 5), + // ... + // and identify Mul. + SDValue Mul; + for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { + SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), + Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); + // TODO: Be more tolerant to undefs. + if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + auto *Const0L = dyn_cast(Op0L->getOperand(1)); + auto *Const1L = dyn_cast(Op1L->getOperand(1)); + auto *Const0H = dyn_cast(Op0H->getOperand(1)); + auto *Const1H = dyn_cast(Op1H->getOperand(1)); + if (!Const0L || !Const1L || !Const0H || !Const1H) + return SDValue(); + unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), + Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); + // Commutativity of mul allows factors of a product to reorder. + if (Idx0L > Idx1L) + std::swap(Idx0L, Idx1L); + if (Idx0H > Idx1H) + std::swap(Idx0H, Idx1H); + // Commutativity of add allows pairs of factors to reorder. + if (Idx0L > Idx0H) { + std::swap(Idx0L, Idx0H); + std::swap(Idx1L, Idx1H); + } + if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 || + Idx1H != 2 * i + 3) + return SDValue(); + if (!Mul) { + // First time an extract_elt's source vector is visited. Must be a MUL + // with 2X number of vector elements than the BUILD_VECTOR. + // Both extracts must be from same MUL. + Mul = Op0L->getOperand(0); + if (Mul->getOpcode() != ISD::MUL || + Mul.getValueType().getVectorNumElements() != 2 * e) + return SDValue(); + } + // Check that the extract is from the same MUL previously seen. + if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || + Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) + return SDValue(); + } + + // Check if the Mul source can be safely shrunk. + ShrinkMode Mode; + if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || Mode == MULU16) + return SDValue(); + + auto PMADDBuilder = [](SelectionDAG &DAG, SDLoc DL, SDValue Op0, + SDValue Op1) { + // Shrink by adding truncate nodes and let DAGCombine fold with the + // sources. + EVT InVT = Op0.getValueType(); + assert(InVT.getScalarType() == MVT::i32 && + "Unexpected scalar element type"); + assert(InVT == Op1.getValueType() && "Operands' types mismatch"); + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, + InVT.getVectorNumElements() / 2); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, + InVT.getVectorNumElements()); + return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Op0), + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Op1)); + }; + return SplitBinaryOpsAndApply(DAG, Subtarget, DL, VT, Mul.getOperand(0), + Mul.getOperand(1), PMADDBuilder); +} + static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); @@ -36552,6 +37540,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); + if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) + return MAdd; + // Try to synthesize horizontal adds from adds of shuffles. if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || (Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && @@ -37101,8 +38092,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FMSUBADD_RND: case X86ISD::FMADDSUB: case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, Subtarget); + case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI); + case X86ISD::MGATHER: + case X86ISD::MSCATTER: case ISD::MGATHER: - case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); + case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); case X86ISD::TESTM: return combineTestM(N, DAG, Subtarget); case X86ISD::PCMPEQ: case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); @@ -37118,6 +38112,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; + + // There are no vXi8 shifts. + if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) + return false; + if (VT != MVT::i16) return true; @@ -38119,7 +39118,7 @@ void X86TargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); @@ -38142,8 +39141,8 @@ bool X86TargetLowering::supportSwiftError() const { /// string if not applicable. StringRef X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { // If the function specifically requests stack probes, emit them. - if (MF.getFunction()->hasFnAttribute("probe-stack")) - return MF.getFunction()->getFnAttribute("probe-stack").getValueAsString(); + if (MF.getFunction().hasFnAttribute("probe-stack")) + return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); // Generally, if we aren't on Windows, the platform ABI does not include // support for stack probes, so don't emit them. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 90830f4d5d11..56c33e5d1628 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -304,9 +304,6 @@ namespace llvm { // Vector FP round. VFPROUND, VFPROUND_RND, VFPROUNDS_RND, - // Convert a vector to mask, set bits base on MSB. - CVT2MASK, - // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -453,9 +450,6 @@ namespace llvm { // Broadcast subvector to vector. SUBV_BROADCAST, - // Extract vector element. - VEXTRACT, - /// SSE4A Extraction and Insertion. EXTRQI, INSERTQI, @@ -832,10 +826,18 @@ namespace llvm { /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. MVT hasFastEqualityCompare(unsigned NumBits) const override; + /// Allow multiple load pairs per block for smaller and faster code. + unsigned getMemcmpEqZeroLoadsPerBlock() const override { + return 2; + } + /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, @@ -965,6 +967,7 @@ namespace llvm { /// true and stores the intrinsic information into the IntrinsicInfo that was /// passed to the function. bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// Returns true if the target can instruction select the @@ -984,6 +987,9 @@ namespace llvm { bool isVectorClearMaskLegal(const SmallVectorImpl &Mask, EVT VT) const override; + /// Returns true if lowering to a jump table is allowed. + bool areJTsAllowed(const Function *Fn) const override; + /// If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. @@ -1025,6 +1031,8 @@ namespace llvm { return NumElem > 2; } + bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; + /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { return nullptr; // nothing to do, move along. @@ -1055,9 +1063,13 @@ namespace llvm { Value *getIRStackGuard(IRBuilder<> &IRB) const override; bool useLoadStackGuardNode() const override; + bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; Value *getSSPStackGuardCheck(const Module &M) const override; + SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const override; + /// Return true if the target stores SafeStack pointer at a fixed offset in /// some non-standard address space, and populates the address space and @@ -1165,11 +1177,8 @@ namespace llvm { bool isReplace) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; - SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr) const; @@ -1183,9 +1192,6 @@ namespace llvm { SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; @@ -1225,8 +1231,8 @@ namespace llvm { const SDLoc &dl, SelectionDAG &DAG) const override; bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; void insertCopiesSplitCSR( @@ -1296,6 +1302,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; @@ -1442,6 +1451,7 @@ namespace llvm { const SDValue &getIndex() const { return getOperand(4); } const SDValue &getMask() const { return getOperand(2); } const SDValue &getValue() const { return getOperand(1); } + const SDValue &getScale() const { return getOperand(5); } static bool classof(const SDNode *N) { return N->getOpcode() == X86ISD::MGATHER || diff --git a/lib/Target/X86/X86IndirectBranchTracking.cpp b/lib/Target/X86/X86IndirectBranchTracking.cpp new file mode 100644 index 000000000000..1570e7a0b2df --- /dev/null +++ b/lib/Target/X86/X86IndirectBranchTracking.cpp @@ -0,0 +1,163 @@ +//===---- X86IndirectBranchTracking.cpp - Enables CET IBT mechanism -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that enables Indirect Branch Tracking (IBT) as part +// of Control-Flow Enforcement Technology (CET). +// The pass adds ENDBR (End Branch) machine instructions at the beginning of +// each basic block or function that is referenced by an indrect jump/call +// instruction. +// The ENDBR instructions have a NOP encoding and as such are ignored in +// targets that do not support CET IBT mechanism. +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-indirect-branch-tracking" + +static cl::opt IndirectBranchTracking( + "x86-indirect-branch-tracking", cl::init(false), cl::Hidden, + cl::desc("Enable X86 indirect branch tracking pass.")); + +STATISTIC(NumEndBranchAdded, "Number of ENDBR instructions added"); + +namespace { +class X86IndirectBranchTrackingPass : public MachineFunctionPass { +public: + X86IndirectBranchTrackingPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Indirect Branch Tracking"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + static char ID; + + /// Machine instruction info used throughout the class. + const X86InstrInfo *TII; + + /// Endbr opcode for the current machine function. + unsigned int EndbrOpcode; + + /// The function looks for an indirect jump terminator in MBB predecessors. + /// + /// Jump tables are generated when lowering switch-case statements or + /// setjmp/longjump functions. + /// As a result only indirect jumps use jump tables. + /// The function verifies this assumption. + /// + /// \return true if the input \p MBB has a predecessor MBB with indirect + /// branch terminator or false otherwise. + bool verifyIndirectJump(const MachineBasicBlock *MBB) const; + + /// Adds a new ENDBR instruction to the begining of the MBB. + /// The function will not add it if already exists. + /// It will add ENDBR32 or ENDBR64 opcode, depending on the target. + void addENDBR(MachineBasicBlock &MBB) const; +}; + +} // end anonymous namespace + +char X86IndirectBranchTrackingPass::ID = 0; + +FunctionPass *llvm::createX86IndirectBranchTrackingPass() { + return new X86IndirectBranchTrackingPass(); +} + +bool X86IndirectBranchTrackingPass::verifyIndirectJump( + const MachineBasicBlock *MBB) const { + for (auto &PredMBB : MBB->predecessors()) + for (auto &TermI : PredMBB->terminators()) + if (TermI.isIndirectBranch()) + return true; + + return false; +} + +void X86IndirectBranchTrackingPass::addENDBR(MachineBasicBlock &MBB) const { + assert(TII && "Target instruction info was not initialized"); + assert((X86::ENDBR64 == EndbrOpcode || X86::ENDBR32 == EndbrOpcode) && + "Unexpected Endbr opcode"); + + auto MI = MBB.begin(); + // If the MBB is empty or the first instruction is not ENDBR, + // add the ENDBR instruction to the beginning of the MBB. + if (MI == MBB.end() || EndbrOpcode != MI->getOpcode()) { + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(EndbrOpcode)); + NumEndBranchAdded++; + } +} + +bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) { + const X86Subtarget &SubTarget = MF.getSubtarget(); + + // Make sure that the target supports ENDBR instruction. + if (!SubTarget.hasIBT()) + return false; + + // Check that the cf-protection-branch is enabled. + Metadata *isCFProtectionSupported = + MF.getMMI().getModule()->getModuleFlag("cf-protection-branch"); + if (!isCFProtectionSupported && !IndirectBranchTracking) + return false; + + // True if the current MF was changed and false otherwise. + bool Changed = false; + + TII = SubTarget.getInstrInfo(); + EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32; + + // Non-internal function or function whose address was taken, can be + // invoked through indirect calls. Mark the first BB with ENDBR instruction. + // TODO: Do not add ENDBR instruction in case notrack attribute is used. + if (MF.getFunction().hasAddressTaken() || + !MF.getFunction().hasLocalLinkage()) { + auto MBB = MF.begin(); + addENDBR(*MBB); + Changed = true; + } + + for (auto &MBB : MF) { + // Find all basic blocks that thier address was taken (for example + // in the case of indirect jump) and add ENDBR instruction. + if (MBB.hasAddressTaken()) { + addENDBR(MBB); + Changed = true; + } + } + + // Adds ENDBR instructions to MBB destinations of the jump table. + // TODO: In case of more than 50 destinations, do not add ENDBR and + // instead add DS_PREFIX. + if (MachineJumpTableInfo *JTI = MF.getJumpTableInfo()) { + for (const auto &JT : JTI->getJumpTables()) { + for (auto *MBB : JT.MBBs) { + // This assert verifies the assumption that this MBB has an indirect + // jump terminator in one of its predecessor. + assert(verifyIndirectJump(MBB) && + "The MBB is not the destination of an indirect jump"); + + addENDBR(*MBB); + Changed = true; + } + } + } + + return Changed; +} diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 2acd8d17beb2..0d30b7d47f3e 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -116,14 +116,30 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)], IIC_MMX_EMMS>; +// PREFETCHWT1 is supported we want to use it for everything but T0. +def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{ + return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1(); +}]>; + +// Use PREFETCHWT1 for NTA, T2, T1. +def PrefetchWT1Level : ImmLeaf; + let SchedRW = [WriteLoad] in { +let Predicates = [Has3DNow, NoSSEPrefetch] in def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), "prefetch\t$addr", - [(prefetch addr:$addr, (i32 0), imm, (i32 1))], + [(prefetch addr:$addr, imm, imm, (i32 1))], IIC_SSE_PREFETCH>; + def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", - [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))], + [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))], IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>; + +def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr", + [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))], + IIC_SSE_PREFETCH>, TB, Requires<[HasPREFETCHWT1]>; } // "3DNowA" instructions diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c4e89bdac5ad..7f267e7f6871 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -212,8 +212,8 @@ multiclass AVX512_maskable_custom O, Format F, list Pattern, list MaskingPattern, list ZeroMaskingPattern, + InstrItinClass itin, string MaskingConstraint = "", - InstrItinClass itin = NoItinerary, bit IsCommutable = 0, bit IsKCommutable = 0> { let isCommutable = IsCommutable in @@ -252,9 +252,9 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, + InstrItinClass itin, SDNode Select = vselect, string MaskingConstraint = "", - InstrItinClass itin = NoItinerary, bit IsCommutable = 0, bit IsKCommutable = 0> : AVX512_maskable_custom O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, MaskingRHS)], [(set _.RC:$dst, (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], - MaskingConstraint, itin, IsCommutable, + itin, MaskingConstraint, IsCommutable, IsKCommutable>; // This multiclass generates the unconditional/non-masking, the masking and @@ -274,7 +274,7 @@ multiclass AVX512_maskable_split O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskRHS, - InstrItinClass itin = NoItinerary, + InstrItinClass itin, bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect> : AVX512_maskable_custom O, Format F, X86VectorVTInfo _, (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))], [(set _.RC:$dst, (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], - "$src0 = $dst", itin, IsCommutable, IsKCommutable>; + itin, "$src0 = $dst", IsCommutable, IsKCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -295,15 +295,15 @@ multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, - InstrItinClass itin = NoItinerary, + InstrItinClass itin, bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect> : AVX512_maskable_common; + (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin, + Select, "$src0 = $dst", IsCommutable, IsKCommutable>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the scalar instruction. @@ -311,7 +311,7 @@ multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, - InstrItinClass itin = NoItinerary, + InstrItinClass itin, bit IsCommutable = 0> : AVX512_maskable; @@ -323,7 +323,8 @@ multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, bit IsCommutable = 0, + dag RHS, InstrItinClass itin, + bit IsCommutable = 0, bit IsKCommutable = 0, SDNode Select = vselect, bit MaskOnly = 0> : @@ -333,29 +334,31 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), OpcodeStr, AttSrcAsm, IntelSrcAsm, !if(MaskOnly, (null_frag), RHS), - (Select _.KRCWM:$mask, RHS, _.RC:$src1), - Select, "", NoItinerary, IsCommutable, IsKCommutable>; + (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin, + Select, "", IsCommutable, IsKCommutable>; multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, bit IsCommutable = 0, + dag RHS, InstrItinClass itin, + bit IsCommutable = 0, bit IsKCommutable = 0, bit MaskOnly = 0> : AVX512_maskable_3src; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - list Pattern> : + list Pattern, + InstrItinClass itin> : AVX512_maskable_custom; + itin, "$src0 = $dst">; // Instruction with mask that puts result in mask register, @@ -367,17 +370,18 @@ multiclass AVX512_maskable_custom_cmp O, Format F, string AttSrcAsm, string IntelSrcAsm, list Pattern, list MaskingPattern, + InstrItinClass itin, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; + Pattern, itin>; def NAME#k: AVX512, EVEX_K; + MaskingPattern, itin>, EVEX_K; } multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, @@ -386,27 +390,30 @@ multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, + InstrItinClass itin, bit IsCommutable = 0> : AVX512_maskable_custom_cmp; + [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>; multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, bit IsCommutable = 0> : + dag RHS, InstrItinClass itin, + bit IsCommutable = 0> : AVX512_maskable_common_cmp; + (and _.KRCWM:$mask, RHS), itin, IsCommutable>; multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm> : + string AttSrcAsm, string IntelSrcAsm, + InstrItinClass itin> : AVX512_maskable_custom_cmp; + AttSrcAsm, IntelSrcAsm, [],[], itin>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -415,7 +422,7 @@ multiclass AVX512_maskable_logic O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskedRHS, - InstrItinClass itin = NoItinerary, + InstrItinClass itin, bit IsCommutable = 0, SDNode Select = vselect> : AVX512_maskable_custom O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, (Select _.KRCWM:$mask, MaskedRHS, _.ImmAllZerosV))], - "$src0 = $dst", itin, IsCommutable>; + itin, "$src0 = $dst", IsCommutable>; // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then -// swizzled by ExecutionDepsFix to pxor. +// swizzled by ExecutionDomainFix to pxor. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -446,7 +453,7 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", // Alias instructions that allow VPTERNLOG to be used with a mask to create // a mix of all ones and all zeros elements. This is done this way to force // the same register to be used as input for all three sources. -let isPseudo = 1, Predicates = [HasAVX512] in { +let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), (ins VK16WM:$mask), "", [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), @@ -486,7 +493,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass vinsert_for_size_split { + SDPatternOperator vinsert_for_mask, + OpndItins itins> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V; - + (iPTR imm)), itins.rr>, + AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_split, AVX512AIi8Base, EVEX_4V, - EVEX_CD8; + (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V, + EVEX_CD8, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vinsert_for_size : - vinsert_for_size_split; + SDPatternOperator vinsert_insert, + OpndItins itins> : + vinsert_for_size_split; multiclass vinsert_for_size_lowering { + ValueType EltVT64, int Opcode256, + OpndItins itins> { let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vinsert_for_size, X86VectorVTInfo< 8, EltVT32, VR256X>, - vinsert128_insert>, EVEX_V256; + vinsert128_insert, itins>, EVEX_V256; defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, - vinsert128_insert>, EVEX_V512; + vinsert128_insert, itins>, EVEX_V512; defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert256_insert>, VEX_W, EVEX_V512; + vinsert256_insert, itins>, VEX_W, EVEX_V512; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vinsert_for_size_split, X86VectorVTInfo< 4, EltVT64, VR256X>, - null_frag, vinsert128_insert>, VEX_W, EVEX_V256; + null_frag, vinsert128_insert, itins>, + VEX_W, EVEX_V256; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasDQI] in { defm NAME # "64x2Z" : vinsert_for_size_split, X86VectorVTInfo< 8, EltVT64, VR512>, - null_frag, vinsert128_insert>, VEX_W, EVEX_V512; + null_frag, vinsert128_insert, itins>, + VEX_W, EVEX_V512; defm NAME # "32x8Z" : vinsert_for_size_split, X86VectorVTInfo<16, EltVT32, VR512>, - null_frag, vinsert256_insert>, EVEX_V512; + null_frag, vinsert256_insert, itins>, + EVEX_V512; } } -defm VINSERTF : vinsert_for_type; -defm VINSERTI : vinsert_for_type; +// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI? +let Sched = WriteFShuffle256 in +def AVX512_VINSERTF : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; +let Sched = WriteShuffle256 in +def AVX512_VINSERTI : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +defm VINSERTF : vinsert_for_type; +defm VINSERTI : vinsert_for_type; // Codegen pattern with the alternative types, // Even with AVX512DQ we'll still use these for unmasked operations. @@ -754,14 +778,15 @@ let ExeDomain = SSEPackedSingle in { def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, - EVEX_4V; + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))], + IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; + imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V, + EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -773,7 +798,8 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), multiclass vextract_for_size_split { + SDPatternOperator vextract_for_mask, + OpndItins itins> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split, - AVX512AIi8Base, EVEX; + (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)), + itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>; + def mr : AVX512AIi8, EVEX; + addr:$dst)], itins.rm>, EVEX, + Sched<[itins.Sched.Folded, ReadAfterLd]>; let mayStore = 1, hasSideEffects = 0 in def mrk : AVX512AIi8, EVEX_K, EVEX; + [], itins.rm>, EVEX_K, EVEX, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Passes the same pattern operator for masked and unmasked ops. multiclass vextract_for_size : - vextract_for_size_split; + SDPatternOperator vextract_extract, + OpndItins itins> : + vextract_for_size_split; // Codegen pattern for the alternative types multiclass vextract_for_size_lowering { + ValueType EltVT64, int Opcode256, + OpndItins itins> { let Predicates = [HasAVX512] in { defm NAME # "32x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, itins>, EVEX_V512, EVEX_CD8<32, CD8VT4>; defm NAME # "64x4Z" : vextract_for_size, X86VectorVTInfo< 4, EltVT64, VR256X>, - vextract256_extract>, + vextract256_extract, itins>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; } let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vextract_for_size, X86VectorVTInfo< 4, EltVT32, VR128X>, - vextract128_extract>, + vextract128_extract, itins>, EVEX_V256, EVEX_CD8<32, CD8VT4>; // Even with DQI we'd like to only use these instructions for masking. @@ -850,7 +881,7 @@ multiclass vextract_for_type, X86VectorVTInfo< 2, EltVT64, VR128X>, - null_frag, vextract128_extract>, + null_frag, vextract128_extract, itins>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; // Even with DQI we'd like to only use these instructions for masking. @@ -858,18 +889,28 @@ multiclass vextract_for_type, X86VectorVTInfo< 2, EltVT64, VR128X>, - null_frag, vextract128_extract>, + null_frag, vextract128_extract, itins>, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; defm NAME # "32x8Z" : vextract_for_size_split, X86VectorVTInfo< 8, EltVT32, VR256X>, - null_frag, vextract256_extract>, + null_frag, vextract256_extract, itins>, EVEX_V512, EVEX_CD8<32, CD8VT8>; } } -defm VEXTRACTF : vextract_for_type; -defm VEXTRACTI : vextract_for_type; +// FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI? +let Sched = WriteFShuffle256 in +def AVX512_VEXTRACTF : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; +let Sched = WriteShuffle256 in +def AVX512_VEXTRACTI : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +defm VEXTRACTF : vextract_for_type; +defm VEXTRACTI : vextract_for_type; // extract_subvector codegen patterns with the alternative types. // Even with AVX512DQ we'll still use these for unmasked operations. @@ -1075,14 +1116,15 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, - EVEX, VEX_WIG; + [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))], + IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), - addr:$dst)]>, EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>; + addr:$dst)], IIC_SSE_EXTRACTPS_RM>, + EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>; //===---------------------------------------------------------------------===// // AVX-512 BROADCAST @@ -1109,6 +1151,7 @@ multiclass avx512_broadcast_scalar opc, string OpcodeStr, // Split version to allow mask and broadcast node to be different types. This // helps support the 32x2 broadcasts. multiclass avx512_broadcast_rm_split opc, string OpcodeStr, + SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, @@ -1124,8 +1167,8 @@ multiclass avx512_broadcast_rm_split opc, string OpcodeStr, (MaskInfo.VT (bitconvert (DestInfo.VT - (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>, - T8PD, EVEX; + (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), + NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>; let mayLoad = 1 in defm m : AVX512_maskable_split opc, string OpcodeStr, (MaskInfo.VT (bitconvert (DestInfo.VT (X86VBroadcast - (SrcInfo.ScalarLdFrag addr:$src)))))>, - T8PD, EVEX, EVEX_CD8; + (SrcInfo.ScalarLdFrag addr:$src))))), + NoItinerary>, T8PD, EVEX, EVEX_CD8, + Sched<[SchedRM]>; } def : Pat<(MaskInfo.VT @@ -1169,36 +1213,45 @@ multiclass avx512_broadcast_rm_split opc, string OpcodeStr, // Helper class to force mask and broadcast result to same type. multiclass avx512_broadcast_rm opc, string OpcodeStr, + SchedWrite SchedRR, SchedWrite SchedRM, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> : - avx512_broadcast_rm_split; + avx512_broadcast_rm_split; multiclass avx512_fp_broadcast_sd opc, string OpcodeStr, AVX512VLVectorVTInfo _> { - let Predicates = [HasAVX512] in - defm Z : avx512_broadcast_rm, + let Predicates = [HasAVX512] in { + defm Z : avx512_broadcast_rm, avx512_broadcast_scalar, - EVEX_V512; + EVEX_V512; + } let Predicates = [HasVLX] in { - defm Z256 : avx512_broadcast_rm, + defm Z256 : avx512_broadcast_rm, avx512_broadcast_scalar, - EVEX_V256; + EVEX_V256; } } multiclass avx512_fp_broadcast_ss opc, string OpcodeStr, AVX512VLVectorVTInfo _> { - let Predicates = [HasAVX512] in - defm Z : avx512_broadcast_rm, + let Predicates = [HasAVX512] in { + defm Z : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V512; + } let Predicates = [HasVLX] in { - defm Z256 : avx512_broadcast_rm, + defm Z256 : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V256; - defm Z128 : avx512_broadcast_rm, + defm Z128 : avx512_broadcast_rm, avx512_broadcast_scalar, EVEX_V128; } @@ -1213,17 +1266,18 @@ def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src), def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src), (VBROADCASTSDZm addr:$src)>; -multiclass avx512_int_broadcast_reg opc, X86VectorVTInfo _, - SDPatternOperator OpNode, +multiclass avx512_int_broadcast_reg opc, SchedWrite SchedRR, + X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC> { let ExeDomain = _.ExeDomain in defm r : AVX512_maskable, T8PD, EVEX; + (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX, + Sched<[SchedRR]>; } -multiclass avx512_int_broadcastbw_reg opc, string Name, +multiclass avx512_int_broadcastbw_reg opc, string Name, SchedWrite SchedRR, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg> { let hasSideEffects = 0, ExeDomain = _.ExeDomain in @@ -1232,7 +1286,7 @@ multiclass avx512_int_broadcastbw_reg opc, string Name, !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), !con((ins _.KRCWM:$mask), (ins GR32:$src)), "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [], - "$src0 = $dst">, T8PD, EVEX; + NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; def : Pat <(_.VT (OpNode SrcRC:$src)), (!cast(Name#r) @@ -1251,13 +1305,13 @@ multiclass avx512_int_broadcastbw_reg_vl opc, string Name, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_int_broadcastbw_reg, EVEX_V512; + defm Z : avx512_int_broadcastbw_reg, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; - defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; + defm Z256 : avx512_int_broadcastbw_reg, EVEX_V256; + defm Z128 : avx512_int_broadcastbw_reg, EVEX_V128; } } @@ -1265,10 +1319,13 @@ multiclass avx512_int_broadcast_reg_vl opc, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_int_broadcast_reg, EVEX_V512; + defm Z : avx512_int_broadcast_reg, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_int_broadcast_reg, EVEX_V256; - defm Z128 : avx512_int_broadcast_reg, EVEX_V128; + defm Z256 : avx512_int_broadcast_reg, EVEX_V256; + defm Z128 : avx512_int_broadcast_reg, EVEX_V128; } } @@ -1282,11 +1339,6 @@ defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, X86VBroadcast, GR64, HasAVX512>, VEX_W; -def : Pat <(v16i32 (X86vzext VK16WM:$mask)), - (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>; -def : Pat <(v8i64 (X86vzext VK8WM:$mask)), - (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>; - // Provide aliases for broadcast from the same register class that // automatically does the extract. multiclass avx512_int_broadcast_rm_lowering opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd> { let Predicates = [prd] in { - defm Z : avx512_broadcast_rm, + defm Z : avx512_broadcast_rm, avx512_int_broadcast_rm_lowering<_.info512, _.info256>, EVEX_V512; // Defined separately to avoid redefinition. defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>; } let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_broadcast_rm, + defm Z256 : avx512_broadcast_rm, avx512_int_broadcast_rm_lowering<_.info256, _.info256>, EVEX_V256; - defm Z128 : avx512_broadcast_rm, + defm Z128 : avx512_broadcast_rm, EVEX_V128; } } @@ -1328,8 +1383,9 @@ multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, defm rm : AVX512_maskable, - AVX5128IBase, EVEX; + (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))), + NoItinerary>, AVX5128IBase, EVEX, + Sched<[WriteShuffleLd]>; } // This should be used for the AVX512DQ broadcast instructions. It disables @@ -1342,8 +1398,9 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", (null_frag), (_Dst.VT (X86SubVBroadcast - (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, - AVX5128IBase, EVEX; + (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))), + NoItinerary>, AVX5128IBase, EVEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX512] in { @@ -1498,11 +1555,13 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { let Predicates = [HasDQI] in - defm Z : avx512_broadcast_rm_split, EVEX_V512; let Predicates = [HasDQI, HasVLX] in - defm Z256 : avx512_broadcast_rm_split, EVEX_V256; } @@ -1512,7 +1571,8 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, avx512_common_broadcast_32x2 { let Predicates = [HasDQI, HasVLX] in - defm Z128 : avx512_broadcast_rm_split, EVEX_V128; } @@ -1546,7 +1606,8 @@ multiclass avx512_mask_broadcastm opc, string OpcodeStr, X86VectorVTInfo _, RegisterClass KRC> { def rr : AVX512XS8I, EVEX; + [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))], + IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>; } multiclass avx512_mask_broadcast opc, string OpcodeStr, @@ -1566,7 +1627,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- -multiclass avx512_perm_i opc, string OpcodeStr, X86VectorVTInfo _> { + +let Sched = WriteFShuffle256 in +def AVX512_PERM2_F : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; + +let Sched = WriteShuffle256 in +def AVX512_PERM2_I : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +multiclass avx512_perm_i opc, string OpcodeStr, OpndItins itins, + X86VectorVTInfo _> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { // The index operand in the pattern should really be an integer type. However, // if we do that and it happens to come from a bitcast, then it becomes @@ -1576,18 +1649,19 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_3src, EVEX_4V, - AVX5128IBase; + (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), + itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>; defm rm: AVX512_maskable_3src, - EVEX_4V, AVX5128IBase; + (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>, + EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_perm_i_mb opc, string OpcodeStr, + +multiclass avx512_perm_i_mb opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rmb: AVX512_maskable_3src opc, string OpcodeStr, !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), - 1>, AVX5128IBase, EVEX_4V, EVEX_B; + itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass avx512_perm_i_sizes opc, string OpcodeStr, +multiclass avx512_perm_i_sizes opc, string OpcodeStr, OpndItins itins, AVX512VLVectorVTInfo VTInfo> { - defm NAME: avx512_perm_i, - avx512_perm_i_mb, EVEX_V512; + defm NAME: avx512_perm_i, + avx512_perm_i_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_i, - avx512_perm_i_mb, EVEX_V128; - defm NAME#256: avx512_perm_i, - avx512_perm_i_mb, EVEX_V256; + defm NAME#128: avx512_perm_i, + avx512_perm_i_mb, EVEX_V128; + defm NAME#256: avx512_perm_i, + avx512_perm_i_mb, EVEX_V256; } } multiclass avx512_perm_i_sizes_bw opc, string OpcodeStr, - AVX512VLVectorVTInfo VTInfo, - Predicate Prd> { + OpndItins itins, + AVX512VLVectorVTInfo VTInfo, + Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_i, EVEX_V512; + defm NAME: avx512_perm_i, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_i, EVEX_V128; - defm NAME#256: avx512_perm_i, EVEX_V256; + defm NAME#128: avx512_perm_i, EVEX_V128; + defm NAME#256: avx512_perm_i, EVEX_V256; } } -defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", +defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", +defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I, avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; -defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F, avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; // VPERMT2 -multiclass avx512_perm_t opc, string OpcodeStr, +multiclass avx512_perm_t opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_3src, - EVEX_4V, AVX5128IBase; + (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), + itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>; defm rm: AVX512_maskable_3src, - EVEX_4V, AVX5128IBase; + (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>, + EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_perm_t_mb opc, string OpcodeStr, +multiclass avx512_perm_t_mb opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rmb: AVX512_maskable_3src opc, string OpcodeStr, !strconcat("$src2, ${src3}", _.BroadcastStr ), (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), - 1>, AVX5128IBase, EVEX_4V, EVEX_B; + itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass avx512_perm_t_sizes opc, string OpcodeStr, +multiclass avx512_perm_t_sizes opc, string OpcodeStr, OpndItins itins, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { - defm NAME: avx512_perm_t, - avx512_perm_t_mb, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_t, - avx512_perm_t_mb, EVEX_V128; - defm NAME#256: avx512_perm_t, - avx512_perm_t_mb, EVEX_V256; } } -multiclass avx512_perm_t_sizes_bw opc, string OpcodeStr, +multiclass avx512_perm_t_sizes_bw opc, string OpcodeStr, OpndItins itins, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_t, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_t, EVEX_V128; - defm NAME#256: avx512_perm_t, EVEX_V256; } } -defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", +defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I, avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", +defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I, avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", +defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I, avx512vl_i16_info, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", +defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I, avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; -defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", +defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F, avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", +defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F, avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // -multiclass avx512_blendmask opc, string OpcodeStr, X86VectorVTInfo _> { + +let Sched = WriteFVarBlend in +def AVX512_BLENDM : OpndItins< + IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM +>; + +let Sched = WriteVarBlend in +def AVX512_PBLENDM : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; + +multiclass avx512_blendmask opc, string OpcodeStr, OpndItins itins, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { def rr : AVX5128I, EVEX_4V; + [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>; def rrk : AVX5128I, EVEX_4V, EVEX_K; + [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>; def rrkz : AVX5128I, EVEX_4V, EVEX_KZ; + [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>; let mayLoad = 1 in { def rm : AVX5128I, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmk : AVX5128I, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>; + [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>; + [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } } -multiclass avx512_blendmask_rmb opc, string OpcodeStr, X86VectorVTInfo _> { - +multiclass avx512_blendmask_rmb opc, string OpcodeStr, OpndItins itins, + X86VectorVTInfo _> { let mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmb : AVX5128I, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass blendmask_dq opc, string OpcodeStr, +multiclass blendmask_dq opc, string OpcodeStr, OpndItins itins, AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_blendmask , - avx512_blendmask_rmb , EVEX_V512; + defm Z : avx512_blendmask , + avx512_blendmask_rmb , EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : avx512_blendmask, - avx512_blendmask_rmb , EVEX_V256; - defm Z128 : avx512_blendmask, - avx512_blendmask_rmb , EVEX_V128; + defm Z256 : avx512_blendmask, + avx512_blendmask_rmb, EVEX_V256; + defm Z128 : avx512_blendmask, + avx512_blendmask_rmb, EVEX_V128; } } -multiclass blendmask_bw opc, string OpcodeStr, +multiclass blendmask_bw opc, string OpcodeStr, OpndItins itins, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm Z : avx512_blendmask , EVEX_V512; + defm Z : avx512_blendmask, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm Z256 : avx512_blendmask , EVEX_V256; - defm Z128 : avx512_blendmask , EVEX_V128; + defm Z256 : avx512_blendmask, EVEX_V256; + defm Z128 : avx512_blendmask, EVEX_V128; } } -defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>; -defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W; -defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>; -defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W; -defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>; -defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W; +defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>; +defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W; +defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>; +defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W; +defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>; +defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// @@ -1813,8 +1907,8 @@ defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W; // avx512_cmp_scalar - AVX512 CMPSS and CMPSD -multiclass avx512_cmp_scalar{ - +multiclass avx512_cmp_scalar { defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), @@ -1822,7 +1916,7 @@ multiclass avx512_cmp_scalar "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, EVEX_4V; + imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -1830,7 +1924,8 @@ multiclass avx512_cmp_scalar "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, - imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), @@ -1840,28 +1935,31 @@ multiclass avx512_cmp_scalar (OpNodeRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, - (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_4V, EVEX_B, Sched<[itins.Sched]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs VK1:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V, + Sched<[itins.Sched]>; let mayLoad = 1 in defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, - EVEX_4V, EVEX_B; + "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>, + EVEX_4V, EVEX_B, Sched<[itins.Sched]>; }// let isAsmParserOnly = 1, hasSideEffects = 0 let isCodeGenOnly = 1 in { @@ -1873,7 +1971,7 @@ multiclass avx512_cmp_scalar [(set _.KRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, imm:$cc))], - IIC_SSE_ALU_F32S_RR>, EVEX_4V; + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), @@ -1882,33 +1980,34 @@ multiclass avx512_cmp_scalar [(set _.KRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), imm:$cc))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX512] in { let ExeDomain = SSEPackedSingle in - defm VCMPSSZ : avx512_cmp_scalar, - AVX512XSIi8Base; + defm VCMPSSZ : avx512_cmp_scalar, AVX512XSIi8Base; let ExeDomain = SSEPackedDouble in - defm VCMPSDZ : avx512_cmp_scalar, - AVX512XDIi8Base, VEX_W; + defm VCMPSDZ : avx512_cmp_scalar, AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable> { + OpndItins itins, X86VectorVTInfo _, bit IsCommutable> { let isCommutable = IsCommutable in def rr : AVX512BI, EVEX_4V; + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; def rm : AVX512BI, EVEX_4V; + itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCommutable = IsCommutable in def rrk : AVX512BI opc, string OpcodeStr, SDNode OpNode, "$dst {${mask}}, $src1, $src2}"), [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))], - IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K; + itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>; def rmk : AVX512BI opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))))))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; + itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed { + OpndItins itins, X86VectorVTInfo _, bit IsCommutable> : + avx512_icmp_packed { def rmb : AVX512BI, EVEX_4V, EVEX_B; + itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmbk : AVX512BI opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B; + itins.rm>, EVEX_4V, EVEX_K, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_vl opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd, - bit IsCommutable = 0> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed, EVEX_V256; - defm Z128 : avx512_icmp_packed, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, - SDNode OpNode, AVX512VLVectorVTInfo VTInfo, - Predicate prd, bit IsCommutable = 0> { + SDNode OpNode, OpndItins itins, + AVX512VLVectorVTInfo VTInfo, + Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed_rmb, EVEX_V256; - defm Z128 : avx512_icmp_packed_rmb, EVEX_V128; } } +// FIXME: Is there a better scheduler itinerary for VPCMP? defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm, - avx512vl_i8_info, HasBWI, 1>, + SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm, - avx512vl_i16_info, HasBWI, 1>, + SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm, - avx512vl_i32_info, HasAVX512, 1>, + SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm, - avx512vl_i64_info, HasAVX512, 1>, + SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, - avx512vl_i8_info, HasBWI>, + SSE_ALU_F32P, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, - avx512vl_i16_info, HasBWI>, + SSE_ALU_F32P, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, - avx512vl_i32_info, HasAVX512>, + SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, - avx512vl_i64_info, HasAVX512>, + SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; // Transforms to swizzle an immediate to help matching memory operand in first @@ -2033,7 +2135,7 @@ def CommutePCMPCC : SDNodeXForm; multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let isCommutable = 1 in def rri : AVX512AIi8 opc, string Suffix, SDNode OpNode, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc))], - IIC_SSE_ALU_F32P_RR>, EVEX_4V; + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; def rmi : AVX512AIi8 opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V; + itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCommutable = 1 in def rrik : AVX512AIi8 opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc)))], - IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K; + itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>; def rmik : AVX512AIi8 opc, string Suffix, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; + itins.rm>, EVEX_4V, EVEX_K, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -2079,20 +2182,20 @@ multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", "$dst, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; + [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in def rmi_alt : AVX512AIi8, EVEX_4V; + [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; def rrik_alt : AVX512AIi8, EVEX_4V, EVEX_K; + [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>; let mayLoad = 1 in def rmik_alt : AVX512AIi8 opc, string Suffix, SDNode OpNode, !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2, $cc}"), - [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; + [], itins.rm>, EVEX_4V, EVEX_K, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)), @@ -2116,8 +2220,8 @@ multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, } multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, - X86VectorVTInfo _> : - avx512_icmp_cc { + OpndItins itins, X86VectorVTInfo _> : + avx512_icmp_cc { def rmib : AVX512AIi8 opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2)), imm:$cc))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B; + itins.rm>, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmibk : AVX512AIi8 opc, string Suffix, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2)), imm:$cc)))], - IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B; + itins.rm>, EVEX_4V, EVEX_K, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { @@ -2148,14 +2254,16 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, !strconcat("vpcmp", Suffix, "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), - [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B; + [], itins.rm>, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmibk_alt : AVX512AIi8, EVEX_4V, EVEX_K, EVEX_B; + [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), @@ -2172,60 +2280,72 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, } multiclass avx512_icmp_cc_vl opc, string Suffix, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_icmp_cc, EVEX_V512; + defm Z : avx512_icmp_cc, + EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_cc, EVEX_V256; - defm Z128 : avx512_icmp_cc, EVEX_V128; + defm Z256 : avx512_icmp_cc, + EVEX_V256; + defm Z128 : avx512_icmp_cc, + EVEX_V128; } } multiclass avx512_icmp_cc_rmb_vl opc, string Suffix, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_icmp_cc_rmb, + defm Z : avx512_icmp_cc_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_cc_rmb, + defm Z256 : avx512_icmp_cc_rmb, EVEX_V256; - defm Z128 : avx512_icmp_cc_rmb, + defm Z128 : avx512_icmp_cc_rmb, EVEX_V128; } } -defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info, - HasBWI>, EVEX_CD8<8, CD8VF>; -defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info, - HasBWI>, EVEX_CD8<8, CD8VF>; - -defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info, - HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info, - HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; +// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU? +defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P, + avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; +defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P, + avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; -defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info, - HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info, - HasAVX512>, EVEX_CD8<32, CD8VF>; +defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P, + avx512vl_i16_info, HasBWI>, + VEX_W, EVEX_CD8<16, CD8VF>; +defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P, + avx512vl_i16_info, HasBWI>, + VEX_W, EVEX_CD8<16, CD8VF>; -defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, - HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, - HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P, + avx512vl_i32_info, HasAVX512>, + EVEX_CD8<32, CD8VF>; +defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P, + avx512vl_i32_info, HasAVX512>, + EVEX_CD8<32, CD8VF>; +defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P, + avx512vl_i64_info, HasAVX512>, + VEX_W, EVEX_CD8<64, CD8VF>; +defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P, + avx512vl_i64_info, HasAVX512>, + VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_vcmp_common { +multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc), 1>; + imm:$cc), itins.rr, 1>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), @@ -2233,7 +2353,8 @@ multiclass avx512_vcmp_common { "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)>; + imm:$cc), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2243,28 +2364,32 @@ multiclass avx512_vcmp_common { "$src1, ${src2}"##_.BroadcastStr, (X86cmpm (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - imm:$cc)>,EVEX_B; + imm:$cc), itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in { defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; + "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2295,7 +2420,7 @@ multiclass avx512_vcmp_common { imm:$cc)>; } -multiclass avx512_vcmp_sae { +multiclass avx512_vcmp_sae { // comparison code form (VCMP[EQ/LT/LE/...] defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), @@ -2304,7 +2429,8 @@ multiclass avx512_vcmp_sae { (X86cmpmRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2312,25 +2438,26 @@ multiclass avx512_vcmp_sae { (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", - "$src1, $src2, {sae}, $cc">, EVEX_B; + "$src1, $src2, {sae}, $cc", itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } } -multiclass avx512_vcmp { +multiclass avx512_vcmp { let Predicates = [HasAVX512] in { - defm Z : avx512_vcmp_common<_.info512>, - avx512_vcmp_sae<_.info512>, EVEX_V512; + defm Z : avx512_vcmp_common, + avx512_vcmp_sae, EVEX_V512; } let Predicates = [HasAVX512,HasVLX] in { - defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128; - defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256; + defm Z128 : avx512_vcmp_common, EVEX_V128; + defm Z256 : avx512_vcmp_common, EVEX_V256; } } -defm VCMPPD : avx512_vcmp, +defm VCMPPD : avx512_vcmp, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VCMPPS : avx512_vcmp, +defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -2350,34 +2477,39 @@ let Predicates = [HasAVX512] in { //handle fpclass instruction mask = op(reg_scalar,imm) // op(mem_scalar,imm) multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, Predicate prd> { + OpndItins itins, X86VectorVTInfo _, + Predicate prd> { let Predicates = [prd], ExeDomain = _.ExeDomain in { def rr : AVX512; + (i32 imm:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rrk : AVX512, EVEX_K; + (i32 imm:$src2))))], itins.rr>, + EVEX_K, Sched<[itins.Sched]>; def rm : AVX512; + (i32 imm:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmk : AVX512, EVEX_K; + (i32 imm:$src2))))], itins.rm>, + EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2385,34 +2517,39 @@ multiclass avx512_scalar_fpclass opc, string OpcodeStr, SDNode OpNode, // fpclass(reg_vec, mem_vec, imm) // fpclass(reg_vec, broadcast(eltVt), imm) multiclass avx512_vector_fpclass opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, string mem, string broadcast>{ + OpndItins itins, X86VectorVTInfo _, + string mem, string broadcast>{ let ExeDomain = _.ExeDomain in { def rr : AVX512; + (i32 imm:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rrk : AVX512, EVEX_K; + (i32 imm:$src2))))], itins.rr>, + EVEX_K, Sched<[itins.Sched]>; def rm : AVX512; + (i32 imm:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmk : AVX512, EVEX_K; + (i32 imm:$src2))))], itins.rm>, + EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmb : AVX512 opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(OpNode (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2)))], NoItinerary>,EVEX_B; + (i32 imm:$src2)))], itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; def rmbk : AVX512 opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), - (i32 imm:$src2))))], NoItinerary>, - EVEX_B, EVEX_K; + (i32 imm:$src2))))], itins.rm>, + EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_vector_fpclass_all opc, SDNode OpNode, Predicate prd, - string broadcast>{ +multiclass avx512_vector_fpclass_all opc, SDNode OpNode, + OpndItins itins, Predicate prd, + string broadcast>{ let Predicates = [prd] in { - defm Z : avx512_vector_fpclass, EVEX_V512; + defm Z : avx512_vector_fpclass, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_vector_fpclass, EVEX_V128; - defm Z256 : avx512_vector_fpclass, EVEX_V256; + defm Z128 : avx512_vector_fpclass, EVEX_V128; + defm Z256 : avx512_vector_fpclass, EVEX_V256; } } +// FIXME: Is there a better scheduler itinerary for VFPCLASS? multiclass avx512_fp_fpclass_all opcVec, bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ defm PS : avx512_vector_fpclass_all, EVEX_CD8<32, CD8VF>; + VecOpNode, SSE_ALU_F32P, prd, "{l}">, + EVEX_CD8<32, CD8VF>; defm PD : avx512_vector_fpclass_all,EVEX_CD8<64, CD8VF> , VEX_W; + VecOpNode, SSE_ALU_F64P, prd, "{q}">, + EVEX_CD8<64, CD8VF> , VEX_W; defm SS : avx512_scalar_fpclass, EVEX_CD8<32, CD8VT1>; + SSE_ALU_F32S, f32x_info, prd>, + EVEX_CD8<32, CD8VT1>; defm SD : avx512_scalar_fpclass, EVEX_CD8<64, CD8VT1>, VEX_W; + SSE_ALU_F64S, f64x_info, prd>, + EVEX_CD8<64, CD8VT1>, VEX_W; } defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, @@ -2474,15 +2618,16 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass, multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, string OpcodeStr, RegisterClass KRC, ValueType vvt, X86MemOperand x86memop> { - let hasSideEffects = 0 in + let hasSideEffects = 0, SchedRW = [WriteMove] in def kk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], + IIC_SSE_MOVDQ>; def km : I; + [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>; def mk : I; + [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>; } multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, @@ -2490,9 +2635,11 @@ multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, RegisterClass KRC, RegisterClass GRC> { let hasSideEffects = 0 in { def kr : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def rk : I; + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; } } @@ -2533,8 +2680,6 @@ def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), (COPY_TO_REGCLASS VK16:$src, GR32)>; -def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), @@ -2551,11 +2696,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), // Load/store kreg let Predicates = [HasDQI] in { - def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst), - (KMOVBmk addr:$dst, VK8:$src)>; - def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), - (KMOVBkm addr:$src)>; - def : Pat<(store VK4:$src, addr:$dst), (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>; def : Pat<(store VK2:$src, addr:$dst), @@ -2595,22 +2735,10 @@ let Predicates = [HasAVX512, NoDQI] in { } let Predicates = [HasAVX512] in { - def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst), - (KMOVWmk addr:$dst, VK16:$src)>; def : Pat<(v1i1 (load addr:$src)), - (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>; - def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), - (KMOVWkm addr:$src)>; -} -let Predicates = [HasBWI] in { - def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst), - (KMOVDmk addr:$dst, VK32:$src)>; - def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))), - (KMOVDkm addr:$src)>; - def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst), - (KMOVQmk addr:$dst, VK64:$src)>; - def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))), - (KMOVQkm addr:$src)>; + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>; + def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; } let Predicates = [HasAVX512] in { @@ -2618,17 +2746,11 @@ let Predicates = [HasAVX512] in { def : Pat<(maskVT (scalar_to_vector GR32:$src)), (COPY_TO_REGCLASS GR32:$src, maskRC)>; - def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))), + def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))), (COPY_TO_REGCLASS maskRC:$src, GR32)>; def : Pat<(maskVT (scalar_to_vector GR8:$src)), (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; - - def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; - - def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))), - (COPY_TO_REGCLASS maskRC:$src, GR32)>; } defm : operation_gpr_mask_copy_lowering; @@ -2658,26 +2780,27 @@ let Predicates = [HasAVX512] in { // - KNOT multiclass avx512_mask_unop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, - Predicate prd> { + OpndItins itins, Predicate prd> { let Predicates = [prd] in def rr : I; + [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>, + Sched<[itins.Sched]>; } multiclass avx512_mask_unop_all opc, string OpcodeStr, - SDPatternOperator OpNode> { + SDPatternOperator OpNode, OpndItins itins> { defm B : avx512_mask_unop, VEX, PD; + itins, HasDQI>, VEX, PD; defm W : avx512_mask_unop, VEX, PS; + itins, HasAVX512>, VEX, PS; defm D : avx512_mask_unop, VEX, PD, VEX_W; + itins, HasBWI>, VEX, PD, VEX_W; defm Q : avx512_mask_unop, VEX, PS, VEX_W; + itins, HasBWI>, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>; +defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>; // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit let Predicates = [HasAVX512, NoDQI] in @@ -2693,25 +2816,26 @@ def : Pat<(vnot VK2:$src), // - KAND, KANDN, KOR, KXNOR, KXOR multiclass avx512_mask_binop opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, - Predicate prd, bit IsCommutable> { + OpndItins itins, Predicate prd, bit IsCommutable> { let Predicates = [prd], isCommutable = IsCommutable in def rr : I; + [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } multiclass avx512_mask_binop_all opc, string OpcodeStr, - SDPatternOperator OpNode, bit IsCommutable, - Predicate prdW = HasAVX512> { + SDPatternOperator OpNode, OpndItins itins, + bit IsCommutable, Predicate prdW = HasAVX512> { defm B : avx512_mask_binop, VEX_4V, VEX_L, PD; + itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; defm W : avx512_mask_binop, VEX_4V, VEX_L, PS; + itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS; defm D : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PD; + itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; defm Q : avx512_mask_binop, VEX_4V, VEX_L, VEX_W, PS; + itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; } def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; @@ -2720,12 +2844,12 @@ def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; -defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>; -defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>; -defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>; -defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>; -defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>; -defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>; +defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>; multiclass avx512_binop_pat { @@ -2760,13 +2884,13 @@ defm : avx512_binop_pat; // Mask unpacking multiclass avx512_mask_unpck { + RegisterClass KRCSrc, OpndItins itins, Predicate prd> { let Predicates = [prd] in { let hasSideEffects = 0 in def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), - "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V, VEX_L; + "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>; def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), (!cast(NAME##rr) @@ -2775,108 +2899,168 @@ multiclass avx512_mask_unpck, PD; -defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; -defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; +defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS; +defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode, Predicate prd> { + SDNode OpNode, OpndItins itins, Predicate prd> { let Predicates = [prd], Defs = [EFLAGS] in def rr : I; + [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } multiclass avx512_mask_testop_w opc, string OpcodeStr, SDNode OpNode, - Predicate prdW = HasAVX512> { - defm B : avx512_mask_testop, + OpndItins itins, Predicate prdW = HasAVX512> { + defm B : avx512_mask_testop, VEX, PD; - defm W : avx512_mask_testop, + defm W : avx512_mask_testop, VEX, PS; - defm Q : avx512_mask_testop, + defm Q : avx512_mask_testop, VEX, PS, VEX_W; - defm D : avx512_mask_testop, + defm D : avx512_mask_testop, VEX, PD, VEX_W; } -defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>; +defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>; // Mask shift multiclass avx512_mask_shiftop opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX512] in def ri : Ii8; + [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))], + itins.rr>, Sched<[itins.Sched]>; } multiclass avx512_mask_shiftop_w opc1, bits<8> opc2, string OpcodeStr, - SDNode OpNode> { - defm W : avx512_mask_shiftop, - VEX, TAPD, VEX_W; + SDNode OpNode, OpndItins itins> { + defm W : avx512_mask_shiftop, VEX, TAPD, VEX_W; let Predicates = [HasDQI] in - defm B : avx512_mask_shiftop, - VEX, TAPD; + defm B : avx512_mask_shiftop, VEX, TAPD; let Predicates = [HasBWI] in { - defm Q : avx512_mask_shiftop, - VEX, TAPD, VEX_W; - defm D : avx512_mask_shiftop, - VEX, TAPD; + defm Q : avx512_mask_shiftop, VEX, TAPD, VEX_W; + defm D : avx512_mask_shiftop, VEX, TAPD; } } -defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; -defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; - -multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; +defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>; +defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>; -def : Pat<(v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), +multiclass axv512_icmp_packed_no_vlx_lowering { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2))), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrr) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - VK8)>; + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1), - (_.info256.VT VR256X:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrri) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; +} + +let Predicates = [HasBWI, NoVLX] in { + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; } // Mask setting all 0s or 1s multiclass avx512_mask_setop { let Predicates = [HasAVX512] in - let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in + let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, + SchedRW = [WriteZero] in def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", [(set KRC:$dst, (VT Val))]>; } @@ -2938,107 +3122,45 @@ defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; - -multiclass vextract_for_mask_to_mask { -let Predicates = [prd] in - def : - Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))), - (To.KVT(COPY_TO_REGCLASS - (!cast(InstrStr#"ri") From.KVT:$src, - (i8 imm:$imm8)), To.KRC))>; -} - -multiclass vextract_for_mask_to_mask_legal_w { -def : - Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))), - (To.KVT(COPY_TO_REGCLASS - (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16), - (i8 imm:$imm8)), To.KRC))>; -} - -defm : vextract_for_mask_to_mask_legal_w; -defm : vextract_for_mask_to_mask_legal_w; -defm : vextract_for_mask_to_mask_legal_w; -defm : vextract_for_mask_to_mask_legal_w; -defm : vextract_for_mask_to_mask_legal_w; -defm : vextract_for_mask_to_mask_legal_w; - -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>; -defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>; - -// Patterns for kmask shift -multiclass mask_shift_lowering { - def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))), - (VT (COPY_TO_REGCLASS - (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16), - (I8Imm $imm)), - RC))>; - def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))), - (VT (COPY_TO_REGCLASS - (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16), - (I8Imm $imm)), - RC))>; -} - -defm : mask_shift_lowering, Requires<[HasAVX512, NoDQI]>; -defm : mask_shift_lowering, Requires<[HasAVX512]>; -defm : mask_shift_lowering, Requires<[HasAVX512]>; //===----------------------------------------------------------------------===// // AVX-512 - Aligned and unaligned load and store // -multiclass avx512_load opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag ld_frag, PatFrag mload, - bit NoRMPattern = 0, - SDPatternOperator SelectOprr = vselect> { +multiclass avx512_load opc, string OpcodeStr, MoveLoadStoreItins itins, + X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, + bit NoRMPattern = 0, + SDPatternOperator SelectOprr = vselect> { let hasSideEffects = 0 in { def rr : AVX512PI, EVEX; + _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>; def rrkz : AVX512PI, - EVEX, EVEX_KZ; + _.ImmAllZerosV)))], _.ExeDomain, + itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>; - let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1, - SchedRW = [WriteLoad] in + let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI, EVEX; + _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>; let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { - def rrk : AVX512PI, - EVEX, EVEX_K; - let SchedRW = [WriteLoad] in + def rrk : AVX512PI, EVEX, EVEX_K, Sched<[WriteMove]>; def rmk : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask, (_.VT (bitconvert (ld_frag addr:$src1))), - (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K; + (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>, + EVEX, EVEX_K, Sched<[WriteLoad]>; } - let SchedRW = [WriteLoad] in def rmkz : AVX512PI, EVEX, EVEX_KZ; + _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>; } def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), (!cast(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>; @@ -3070,16 +3192,20 @@ multiclass avx512_load opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_alignedload_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, - Predicate prd> { + Predicate prd, + bit NoRMPattern = 0> { let Predicates = [prd] in - defm Z : avx512_load, EVEX_V512; + defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_load, EVEX_V256; - defm Z128 : avx512_load, EVEX_V128; + defm Z256 : avx512_load, EVEX_V256; + defm Z128 : avx512_load, EVEX_V128; } } @@ -3089,38 +3215,40 @@ multiclass avx512_load_vl opc, string OpcodeStr, bit NoRMPattern = 0, SDPatternOperator SelectOprr = vselect> { let Predicates = [prd] in - defm Z : avx512_load, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_load, EVEX_V256; - defm Z128 : avx512_load, EVEX_V128; } } -multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag st_frag, PatFrag mstore, string Name, - bit NoMRPattern = 0> { - +multiclass avx512_store opc, string OpcodeStr, MoveLoadStoreItins itins, + X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, + string Name, bit NoMRPattern = 0> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX, FoldGenData; + [], _.ExeDomain, itins.rr>, EVEX, FoldGenData, + Sched<[WriteMove]>; def rrk_REV : AVX512PI, EVEX, EVEX_K, FoldGenData; + [], _.ExeDomain, itins.rr>, EVEX, EVEX_K, + FoldGenData, Sched<[WriteMove]>; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ, FoldGenData; + [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ, + FoldGenData, Sched<[WriteMove]>; } let hasSideEffects = 0, mayStore = 1 in @@ -3128,11 +3256,11 @@ multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), !if(NoMRPattern, [], [(st_frag (_.VT _.RC:$src), addr:$dst)]), - _.ExeDomain>, EVEX; + _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>; def mrk : AVX512PI, EVEX, EVEX_K; + [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>; def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(NAME#_.ZSuffix##mrk) addr:$ptr, @@ -3144,14 +3272,14 @@ multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, string Name, bit NoMRPattern = 0> { let Predicates = [prd] in - defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_store, EVEX_V256; - defm Z128 : avx512_store, EVEX_V128; } @@ -3159,15 +3287,15 @@ multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, multiclass avx512_alignedstore_vl opc, string OpcodeStr, AVX512VLVectorVTInfo _, Predicate prd, - string Name> { + string Name, bit NoMRPattern = 0> { let Predicates = [prd] in - defm Z : avx512_store, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_store, EVEX_V256; - defm Z128 : avx512_store, EVEX_V128; } } @@ -3197,9 +3325,9 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, - HasAVX512>, + HasAVX512, 1>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512, "VMOVDQA32">, + HasAVX512, "VMOVDQA32", 1>, PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, @@ -3219,9 +3347,9 @@ defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1> XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, - 0, null_frag>, + 1, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512, "VMOVDQU32">, + HasAVX512, "VMOVDQU32", 1>, XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, @@ -3236,24 +3364,24 @@ defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, let isReMaterializable = 1, canFoldAsLoad = 1, isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in { def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_RM>; def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_RM>; def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_RM>; def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_RM>; } -let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { +let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in { def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_MR>; def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_MR>; def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_MR>; def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), - "", []>; + "", [], IIC_SSE_MOVA_P_MR>; } def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), @@ -3276,62 +3404,88 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), (v16i32 VR512:$src))), (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; +multiclass mask_move_lowering { + def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), + Narrow.RC:$src1, Narrow.RC:$src0)), + (EXTRACT_SUBREG + (Wide.VT + (!cast(InstrStr#"rrk") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), + (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), + Narrow.SubRegIdx)>; + + def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), + Narrow.RC:$src1, Narrow.ImmAllZerosV)), + (EXTRACT_SUBREG + (Wide.VT + (!cast(InstrStr#"rrkz") + (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), + Narrow.SubRegIdx)>; +} + // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't // available. Use a 512-bit operation and extract. let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), - (v8f32 VR256X:$src0))), - (EXTRACT_SUBREG - (v16f32 - (VMOVAPSZrrk - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)), - (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), - sub_ymm)>; - -def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), - (v8i32 VR256X:$src0))), - (EXTRACT_SUBREG - (v16i32 - (VMOVDQA32Zrrk - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)), - (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), - sub_ymm)>; + defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; + defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; + defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; + defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; + + defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; + defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; +} + +let Predicates = [HasBWI, NoVLX] in { + defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; + defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; + + defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; + defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; } let Predicates = [HasAVX512] in { // 512-bit store. def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), - (VMOVDQA32Zmr addr:$dst, VR512:$src)>; + (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), - (VMOVDQA32Zmr addr:$dst, VR512:$src)>; + (VMOVDQA64Zmr addr:$dst, VR512:$src)>; + def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v32i16 VR512:$src), addr:$dst), - (VMOVDQU32Zmr addr:$dst, VR512:$src)>; + (VMOVDQU64Zmr addr:$dst, VR512:$src)>; def : Pat<(store (v64i8 VR512:$src), addr:$dst), - (VMOVDQU32Zmr addr:$dst, VR512:$src)>; + (VMOVDQU64Zmr addr:$dst, VR512:$src)>; } let Predicates = [HasVLX] in { // 128-bit store. def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), - (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>; + (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), - (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>; + (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(store (v4i32 VR128X:$src), addr:$dst), + (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v8i16 VR128X:$src), addr:$dst), - (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>; + (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(store (v16i8 VR128X:$src), addr:$dst), - (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>; + (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; // 256-bit store. def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), - (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>; + (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), - (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>; + (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(store (v8i32 VR256X:$src), addr:$dst), + (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v16i16 VR256X:$src), addr:$dst), - (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>; + (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(store (v32i8 VR256X:$src), addr:$dst), - (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>; + (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; } multiclass masked_move_for_extract, - EVEX; + EVEX, Sched<[WriteMove]>; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>; def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, EVEX, VEX_W; + IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), - "vmovq\t{$src, $dst|$dst, $src}", []>, - EVEX, VEX_W, EVEX_CD8<64, CD8VT1>; + "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, + EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>; let isCodeGenOnly = 1 in { def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -3434,7 +3588,7 @@ def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src) def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, - EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>; def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64X:$src))], @@ -3453,12 +3607,12 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, EVEX; + IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>; } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 // Move doubleword from xmm register to r/m32 @@ -3468,13 +3622,13 @@ def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$s "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - EVEX; + EVEX, Sched<[WriteMove]>; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (extractelt (v4i32 VR128X:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - EVEX, EVEX_CD8<32, CD8VT1>; + EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>; } // ExeDomain = SSEPackedInt // Move quadword from xmm1 register to r/m64 @@ -3484,13 +3638,13 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))], - IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, + IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>, Requires<[HasAVX512, In64BitMode]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, + [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>; def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), @@ -3504,8 +3658,8 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), let hasSideEffects = 0 in def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src), - "vmovq.s\t{$src, $dst|$dst, $src}",[]>, - EVEX, VEX_W; + "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, + EVEX, VEX_W, Sched<[WriteMove]>; } // ExeDomain = SSEPackedInt // Move Scalar Single to Double Int @@ -3515,12 +3669,12 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))], - IIC_SSE_MOVD_ToGP>, EVEX; + IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32X:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>; + IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>; } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 // Move Quadword Int to Packed Quadword Int @@ -3531,9 +3685,15 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, - EVEX, VEX_W, EVEX_CD8<8, CD8VT8>; + EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>; } // ExeDomain = SSEPackedInt +// Allow "vmovd" but print "vmovq". +def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", + (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; +def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", + (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; + //===----------------------------------------------------------------------===// // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// @@ -3544,7 +3704,7 @@ multiclass avx512_move_scalar, EVEX_4V; + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", @@ -3552,7 +3712,7 @@ multiclass avx512_move_scalar, EVEX_4V, EVEX_KZ; + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -3561,34 +3721,34 @@ multiclass avx512_move_scalar, EVEX_4V, EVEX_K; + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], - _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>; let mayLoad = 1, hasSideEffects = 0 in { let Constraints = "$src0 = $dst" in def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|", "$dst {${mask}}, $src}"), - [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K; + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>; def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src}"), - [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ; + [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>; } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, - EVEX; + EVEX, Sched<[WriteStore]>; let mayStore = 1, hasSideEffects = 0 in def mrk: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>; } defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, @@ -3762,8 +3922,8 @@ let hasSideEffects = 0 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [], NoItinerary>, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrr">; + [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>; let Constraints = "$src0 = $dst" in def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), @@ -3771,21 +3931,21 @@ let Constraints = "$src0 = $dst" in VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", - [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrrk">; + [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrrkz">; + [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, - FoldGenData<"VMOVSDZrr">; + [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W, + FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>; let Constraints = "$src0 = $dst" in def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), @@ -3793,16 +3953,16 @@ let Constraints = "$src0 = $dst" in VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", - [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, - VEX_W, FoldGenData<"VMOVSDZrrk">; + [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>; def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", - [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, - VEX_W, FoldGenData<"VMOVSDZrrkz">; + [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>; } let Predicates = [HasAVX512] in { @@ -4105,16 +4265,16 @@ multiclass avx512_binop_rm opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), - itins.rr, IsCommutable>, - AVX512BIBase, EVEX_4V; + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - AVX512BIBase, EVEX_4V; + itins.rm>, AVX512BIBase, EVEX_4V, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, @@ -4128,8 +4288,8 @@ multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - itins.rm>, - AVX512BIBase, EVEX_4V, EVEX_B; + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, @@ -4234,14 +4394,14 @@ multiclass avx512_binop_rm2 opc, string OpcodeStr, OpndItins itins, (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), itins.rr, IsCommutable>, - AVX512BIBase, EVEX_4V; + AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable, - AVX512BIBase, EVEX_4V; + itins.rm>, AVX512BIBase, EVEX_4V, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmb : AVX512_maskable opc, string OpcodeStr, OpndItins itins, (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Brdct.VT (X86VBroadcast (_Brdct.ScalarLdFrag addr:$src2)))))), - itins.rm>, - AVX512BIBase, EVEX_4V, EVEX_B; + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, @@ -4268,12 +4428,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, HasBWI, 1>; @@ -4302,7 +4462,7 @@ multiclass avx512_binop_all opc, string OpcodeStr, OpndItins itins, } } -defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P, avx512vl_i32_info, avx512vl_i64_info, X86pmuldq, HasAVX512, 1>,T8PD; defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, @@ -4448,6 +4608,46 @@ let Predicates = [HasDQI, NoVLX] in { sub_xmm)>; } +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasDQI, NoVLX] in { + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; +} + +multiclass avx512_min_max_lowering { + def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), + (EXTRACT_SUBREG + (Instr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), + (EXTRACT_SUBREG + (Instr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; +} + +let Predicates = [HasAVX512, NoVLX] in { + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; + defm : avx512_min_max_lowering; +} + //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -4456,7 +4656,7 @@ let Predicates = [HasDQI, NoVLX] in { // be set to null_frag for 32-bit elements. multiclass avx512_logic_rm opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, X86VectorVTInfo _, + SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _, bit IsCommutable = 0> { let hasSideEffects = 0 in defm rr : AVX512_maskable_logic opc, string OpcodeStr, (bitconvert (_.VT _.RC:$src2)))), (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, _.RC:$src2)))), - IIC_SSE_BIT_P_RR, IsCommutable>, - AVX512BIBase, EVEX_4V; + itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V, + Sched<[itins.Sched]>; let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_logic opc, string OpcodeStr, (bitconvert (_.LdFrag addr:$src2)))), (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))))), - IIC_SSE_BIT_P_RM>, - AVX512BIBase, EVEX_4V; + itins.rm>, AVX512BIBase, EVEX_4V, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // OpNodeMsk is the OpNode to use where element size is important. So use // for all of the broadcast patterns. multiclass avx512_logic_rmb opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, X86VectorVTInfo _, + SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _, bit IsCommutable = 0> : - avx512_logic_rm { + avx512_logic_rm { defm rmb : AVX512_maskable_logic opc, string OpcodeStr, (bitconvert (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))))), - IIC_SSE_BIT_P_RM>, - AVX512BIBase, EVEX_4V, EVEX_B; + itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_logic_rmb_vl opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, AVX512VLVectorVTInfo VTInfo, + SDNode OpNodeMsk, OpndItins itins, + AVX512VLVectorVTInfo VTInfo, bit IsCommutable = 0> { let Predicates = [HasAVX512] in - defm Z : avx512_logic_rmb, EVEX_V512; + defm Z : avx512_logic_rmb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_logic_rmb, EVEX_V256; - defm Z128 : avx512_logic_rmb, EVEX_V128; } } multiclass avx512_logic_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, bit IsCommutable = 0> { - defm Q : avx512_logic_rmb_vl { + defm Q : avx512_logic_rmb_vl, VEX_W, EVEX_CD8<64, CD8VF>; - defm D : avx512_logic_rmb_vl, EVEX_CD8<32, CD8VF>; } -defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>; -defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>; -defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>; -defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>; +defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>; +defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>; +defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>; +defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -4547,7 +4750,7 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar opc, string OpcodeStr,X86VectorVTInfo _, (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))), - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr> { + itins.rr>, Sched<[itins.Sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } } @@ -4576,12 +4780,12 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { let ExeDomain = _.ExeDomain in - defm rrb : AVX512_maskable_scalar, - EVEX_B, EVEX_RC; + EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, @@ -4591,35 +4795,37 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2)), - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr> { + itins.rr>, Sched<[itins.Sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } - defm rrb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B, + Sched<[itins.Sched]>; } } @@ -4666,14 +4872,15 @@ multiclass avx512_comutable_binop_s opc, string OpcodeStr, (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], - itins.rr> { + itins.rr>, Sched<[itins.Sched]> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, - (_.ScalarLdFrag addr:$src2)))], itins.rm>; + (_.ScalarLdFrag addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, @@ -4700,43 +4907,43 @@ multiclass avx512_fp_packed opc, string OpcodeStr, SDPatternOperator OpN (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr, - IsCommutable>, EVEX_4V; + IsCommutable>, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in { defm rm: AVX512_maskable, - EVEX_4V; + EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable, EVEX_4V, EVEX_B; + itins.rm>, EVEX_4V, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } } multiclass avx512_fp_round_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in - defm rb: AVX512_maskable, - EVEX_4V, EVEX_B, EVEX_RC; + (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>, + EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } - multiclass avx512_fp_sae_packed opc, string OpcodeStr, SDPatternOperator OpNodeRnd, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in - defm rb: AVX512_maskable, - EVEX_4V, EVEX_B; + (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>, + EVEX_4V, EVEX_B, Sched<[itins.Sched]>; } multiclass avx512_fp_binop_p opc, string OpcodeStr, SDPatternOperator OpNode, @@ -4768,36 +4975,38 @@ multiclass avx512_fp_binop_p opc, string OpcodeStr, SDPatternOperator Op } } -multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_round_packed, +multiclass avx512_fp_binop_p_round opc, string OpcodeStr, SDNode OpNodeRnd, + SizeItins itins> { + defm PSZ : avx512_fp_round_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed, + defm PDZ : avx512_fp_round_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } -multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd> { - defm PSZ : avx512_fp_sae_packed, +multiclass avx512_fp_binop_p_sae opc, string OpcodeStr, SDNode OpNodeRnd, + SizeItins itins> { + defm PSZ : avx512_fp_sae_packed, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed, + defm PDZ : avx512_fp_sae_packed, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, SSE_ALU_ITINS_P, 1>, - avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>; + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, SSE_MUL_ITINS_P, 1>, - avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>; + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>; defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>, - avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>; + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>, - avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>; + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, SSE_ALU_ITINS_P, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>; + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, SSE_ALU_ITINS_P, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>; + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>; let isCodeGenOnly = 1 in { defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, SSE_ALU_ITINS_P, 1>; @@ -4918,64 +5127,69 @@ let Predicates = [HasVLX,HasDQI] in { } multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable, EVEX_4V; + (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; defm rm: AVX512_maskable, EVEX_4V; + (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)), + itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable, - EVEX_4V, EVEX_B; + (_.ScalarLdFrag addr:$src2))), + (i32 FROUND_CURRENT)), itins.rm>, + EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_scalar opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_scalar; + (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>, + Sched<[itins.Sched]>; defm rm: AVX512_maskable_scalar; + (i32 FROUND_CURRENT)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> { - defm PSZ : avx512_fp_scalef_p, - avx512_fp_round_packed, + defm PSZ : avx512_fp_scalef_p, + avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p, - avx512_fp_round_packed, + defm PDZ : avx512_fp_scalef_p, + avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ128 : avx512_fp_scalef_scalar, + defm SSZ128 : avx512_fp_scalef_scalar, avx512_fp_scalar_round, EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ128 : avx512_fp_scalef_scalar, + defm SDZ128 : avx512_fp_scalef_scalar, avx512_fp_scalar_round, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp_scalef_p, + defm PSZ128 : avx512_fp_scalef_p, EVEX_V128, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp_scalef_p, + defm PSZ256 : avx512_fp_scalef_p, EVEX_V256, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp_scalef_p, + defm PDZ128 : avx512_fp_scalef_p, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp_scalef_p, + defm PDZ256 : avx512_fp_scalef_p, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } @@ -4986,34 +5200,35 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs //===----------------------------------------------------------------------===// multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { let isCommutable = 1 in defm rr : AVX512_maskable_cmp, - EVEX_4V; + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable_cmp, - EVEX_4V, - EVEX_CD8<_.EltSize, CD8VF>; + (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_vptest_mb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable_cmp, - EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (_.ScalarLdFrag addr:$src2)))), + itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. @@ -5030,16 +5245,17 @@ multiclass avx512_vptest_lowering opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo _, string Suffix> { + OpndItins itins, AVX512VLVectorVTInfo _, + string Suffix> { let Predicates = [HasAVX512] in - defm Z : avx512_vptest, - avx512_vptest_mb, EVEX_V512; + defm Z : avx512_vptest, + avx512_vptest_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_vptest, - avx512_vptest_mb, EVEX_V256; - defm Z128 : avx512_vptest, - avx512_vptest_mb, EVEX_V128; + defm Z256 : avx512_vptest, + avx512_vptest_mb, EVEX_V256; + defm Z128 : avx512_vptest, + avx512_vptest_mb, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>; @@ -5047,30 +5263,31 @@ multiclass avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_vptest_dq opc, string OpcodeStr, SDNode OpNode> { - defm D : avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm D : avx512_vptest_dq_sizes; - defm Q : avx512_vptest_dq_sizes, VEX_W; } multiclass avx512_vptest_wb opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let Predicates = [HasBWI] in { - defm WZ: avx512_vptest, + defm WZ: avx512_vptest, EVEX_V512, VEX_W; - defm BZ: avx512_vptest, + defm BZ: avx512_vptest, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_vptest, + defm WZ256: avx512_vptest, EVEX_V256, VEX_W; - defm WZ128: avx512_vptest, + defm WZ128: avx512_vptest, EVEX_V128, VEX_W; - defm BZ256: avx512_vptest, + defm BZ256: avx512_vptest, EVEX_V256; - defm BZ128: avx512_vptest, + defm BZ128: avx512_vptest, EVEX_V128; } @@ -5080,151 +5297,165 @@ multiclass avx512_vptest_wb opc, string OpcodeStr, defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">; defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">; } - } multiclass avx512_vptest_all_forms opc_wb, bits<8> opc_dq, string OpcodeStr, - SDNode OpNode> : - avx512_vptest_wb , - avx512_vptest_dq; + SDNode OpNode, OpndItins itins> : + avx512_vptest_wb , + avx512_vptest_dq; -defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD; -defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS; +defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm, + SSE_BIT_ITINS_P>, T8PD; +defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm, + SSE_BIT_ITINS_P>, T8XS; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + string OpcodeStr, SDNode OpNode, OpndItins itins, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm ri : AVX512_maskable; + itins.rr>, Sched<[itins.Sched]>; defm mi : AVX512_maskable; + itins.rm>, Sched<[itins.Sched.Folded]>; } } multiclass avx512_shift_rmbi opc, Format ImmFormM, - string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + string OpcodeStr, SDNode OpNode, OpndItins itins, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm mbi : AVX512_maskable, EVEX_B; + itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_rrm opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> { + OpndItins itins, ValueType SrcVT, PatFrag bc_frag, + X86VectorVTInfo _> { // src2 is always 128-bit let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, AVX512BIBase, EVEX_4V; + itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable, AVX512BIBase, - EVEX_4V; + itins.rm>, AVX512BIBase, + EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_shift_sizes opc, string OpcodeStr, SDNode OpNode, - ValueType SrcVT, PatFrag bc_frag, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, ValueType SrcVT, PatFrag bc_frag, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_shift_rrm, EVEX_V512, EVEX_CD8 ; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_shift_rrm, EVEX_V256, EVEX_CD8; - defm Z128 : avx512_shift_rrm, EVEX_V128, EVEX_CD8; } } multiclass avx512_shift_types opcd, bits<8> opcq, bits<8> opcw, - string OpcodeStr, SDNode OpNode> { - defm D : avx512_shift_sizes; - defm Q : avx512_shift_sizes, VEX_W; - defm W : avx512_shift_sizes; + string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm D : avx512_shift_sizes; + defm Q : avx512_shift_sizes, VEX_W; + defm W : avx512_shift_sizes; } multiclass avx512_shift_rmi_sizes opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo> { + string OpcodeStr, SDNode OpNode, + OpndItins itins, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in - defm Z: avx512_shift_rmi, - avx512_shift_rmbi, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256: avx512_shift_rmi, - avx512_shift_rmbi, EVEX_V256; defm Z128: avx512_shift_rmi, - avx512_shift_rmbi, + avx512_shift_rmbi, EVEX_V128; } } multiclass avx512_shift_rmi_w opcw, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode> { + string OpcodeStr, SDNode OpNode, + OpndItins itins> { let Predicates = [HasBWI] in defm WZ: avx512_shift_rmi, EVEX_V512, VEX_WIG; + itins, v32i16_info>, EVEX_V512, VEX_WIG; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_shift_rmi, EVEX_V256, VEX_WIG; + itins, v16i16x_info>, EVEX_V256, VEX_WIG; defm WZ128: avx512_shift_rmi, EVEX_V128, VEX_WIG; + itins, v8i16x_info>, EVEX_V128, VEX_WIG; } } multiclass avx512_shift_rmi_dq opcd, bits<8> opcq, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode> { + string OpcodeStr, SDNode OpNode, OpndItins itins> { defm D: avx512_shift_rmi_sizes, EVEX_CD8<32, CD8VF>; + itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm Q: avx512_shift_rmi_sizes, EVEX_CD8<64, CD8VF>, VEX_W; + itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; } -defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>, - avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V; +defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, + SSE_INTSHIFT_P>, + avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, + SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; -defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>, - avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V; +defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, + SSE_INTSHIFT_P>, + avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, + SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; -defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>, - avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V; +defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, + SSE_INTSHIFT_P>, + avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, + SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; -defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V; -defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V; +defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, + SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; +defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, + SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; -defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>; -defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>; -defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>; +defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>; +defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>; +defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -5257,25 +5488,27 @@ let Predicates = [HasAVX512, NoVLX] in { // Variable Bit Shifts //===-------------------------------------------------------------------===// multiclass avx512_var_shift opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, AVX5128IBase, EVEX_4V; + itins.rr>, AVX5128IBase, EVEX_4V, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, AVX5128IBase, EVEX_4V, - EVEX_CD8<_.EltSize, CD8VF>; + itins.rm>, AVX5128IBase, EVEX_4V, + EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_var_shift_mb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))), - SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + itins.rm>, AVX5128IBase, EVEX_B, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo _> { + OpndItins itins, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in - defm Z : avx512_var_shift, - avx512_var_shift_mb, EVEX_V512; + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_var_shift, - avx512_var_shift_mb, EVEX_V256; - defm Z128 : avx512_var_shift, - avx512_var_shift_mb, EVEX_V128; + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; + defm Z128 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V128; } } multiclass avx512_var_shift_types opc, string OpcodeStr, - SDNode OpNode> { - defm D : avx512_var_shift_sizes { + defm D : avx512_var_shift_sizes; - defm Q : avx512_var_shift_sizes, VEX_W; } @@ -5331,30 +5565,30 @@ multiclass avx512_var_shift_lowering opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let Predicates = [HasBWI] in - defm WZ: avx512_var_shift, + defm WZ: avx512_var_shift, EVEX_V512, VEX_W; let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_var_shift, + defm WZ256: avx512_var_shift, EVEX_V256, VEX_W; - defm WZ128: avx512_var_shift, + defm WZ128: avx512_var_shift, EVEX_V128, VEX_W; } } -defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>, - avx512_var_shift_w<0x12, "vpsllvw", shl>; +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>, + avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>; -defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>, - avx512_var_shift_w<0x11, "vpsravw", sra>; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>, + avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>; -defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>, - avx512_var_shift_w<0x10, "vpsrlvw", srl>; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>, + avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>; -defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>; -defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>; +defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>; +defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>; defm : avx512_var_shift_lowering; defm : avx512_var_shift_lowering; @@ -5532,84 +5766,86 @@ let Predicates = [HasAVX512, NoVLX] in { // 1-src variable permutation VPERMW/D/Q //===-------------------------------------------------------------------===// multiclass avx512_vperm_dq_sizes opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo _> { + OpndItins itins, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in - defm Z : avx512_var_shift, - avx512_var_shift_mb, EVEX_V512; + defm Z : avx512_var_shift, + avx512_var_shift_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in - defm Z256 : avx512_var_shift, - avx512_var_shift_mb, EVEX_V256; + defm Z256 : avx512_var_shift, + avx512_var_shift_mb, EVEX_V256; } multiclass avx512_vpermi_dq_sizes opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in defm Z: avx512_shift_rmi, + itins, VTInfo.info512>, avx512_shift_rmbi, EVEX_V512; + itins, VTInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256: avx512_shift_rmi, + itins, VTInfo.info256>, avx512_shift_rmbi, EVEX_V256; + itins, VTInfo.info256>, EVEX_V256; } multiclass avx512_vperm_bw opc, string OpcodeStr, Predicate prd, SDNode OpNode, - AVX512VLVectorVTInfo _> { + OpndItins itins, AVX512VLVectorVTInfo _> { let Predicates = [prd] in - defm Z: avx512_var_shift, + defm Z: avx512_var_shift, EVEX_V512 ; let Predicates = [HasVLX, prd] in { - defm Z256: avx512_var_shift, + defm Z256: avx512_var_shift, EVEX_V256 ; - defm Z128: avx512_var_shift, + defm Z128: avx512_var_shift, EVEX_V128 ; } } defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, - avx512vl_i16_info>, VEX_W; + AVX2_PERMV_I, avx512vl_i16_info>, VEX_W; defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, - avx512vl_i8_info>; + AVX2_PERMV_I, avx512vl_i8_info>; defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, - avx512vl_i32_info>; + AVX2_PERMV_I, avx512vl_i32_info>; defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, - avx512vl_i64_info>, VEX_W; + AVX2_PERMV_I, avx512vl_i64_info>, VEX_W; defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, - avx512vl_f32_info>; + AVX2_PERMV_F, avx512vl_f32_info>; defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, - avx512vl_f64_info>, VEX_W; + AVX2_PERMV_F, avx512vl_f64_info>, VEX_W; defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", - X86VPermi, avx512vl_i64_info>, + X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", - X86VPermi, avx512vl_f64_info>, + X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPERMIL //===----------------------------------------------------------------------===// -multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, X86VectorVTInfo Ctrl> { +multiclass avx512_permil_vec OpcVar, string OpcodeStr, SDNode OpNode, + OpndItins itins, X86VectorVTInfo _, + X86VectorVTInfo Ctrl> { defm rr: AVX512_maskable, - T8PD, EVEX_4V; + (Ctrl.VT Ctrl.RC:$src2))), itins.rr>, + T8PD, EVEX_4V, Sched<[itins.Sched]>; defm rm: AVX512_maskable, - T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))), + itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable OpcVar, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (Ctrl.VT (X86VBroadcast - (Ctrl.ScalarLdFrag addr:$src2)))))>, - T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + (Ctrl.ScalarLdFrag addr:$src2))))), + itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_permil_vec_common OpcVar, - AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + OpndItins itins, AVX512VLVectorVTInfo _, + AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { - defm Z : avx512_permil_vec, EVEX_V512; + defm Z : avx512_permil_vec, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_permil_vec, EVEX_V128; - defm Z256 : avx512_permil_vec, EVEX_V256; + defm Z128 : avx512_permil_vec, EVEX_V128; + defm Z256 : avx512_permil_vec, EVEX_V256; } } multiclass avx512_permil OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ - - defm NAME: avx512_permil_vec_common; + defm NAME: avx512_permil_vec_common; defm NAME: avx512_shift_rmi_sizes, + X86VPermilpi, AVX_VPERMILV, _>, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } @@ -5650,29 +5887,31 @@ defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, let ExeDomain = SSEPackedDouble in defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, avx512vl_i64_info>, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, avx512vl_i32_info>, + X86PShufd, SSE_PSHUF, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", - X86PShufhw>, EVEX, AVX512XSIi8Base; + X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", - X86PShuflw>, EVEX, AVX512XDIi8Base; + X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base; -multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { let Predicates = [HasBWI] in - defm Z: avx512_var_shift, EVEX_V512; + defm Z: avx512_var_shift, EVEX_V512; let Predicates = [HasVLX, HasBWI] in { - defm Z256: avx512_var_shift, EVEX_V256; - defm Z128: avx512_var_shift, EVEX_V128; + defm Z256: avx512_var_shift, EVEX_V256; + defm Z128: avx512_var_shift, EVEX_V128; } } -defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>, VEX_WIG; +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG; //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions @@ -5793,22 +6032,23 @@ multiclass avx512_fma3p_213_rm opc, string OpcodeStr, SDNode OpNode, defm r: AVX512_maskable_3src, + (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>, AVX512FMA3Base, Sched<[WriteFMA]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; + (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), + NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; defm mb: AVX512_maskable_3src, - AVX512FMA3Base, EVEX_B, Sched<[WriteFMA, ReadAfterLd]>; + _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), + NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B, + Sched<[WriteFMALd, ReadAfterLd]>; } } @@ -5818,8 +6058,8 @@ multiclass avx512_fma3_213_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; + (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), + NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } multiclass avx512_fma3p_213_common opc, string OpcodeStr, SDNode OpNode, @@ -5860,14 +6100,14 @@ multiclass avx512_fma3p_231_rm opc, string OpcodeStr, SDNode OpNode, defm r: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA]>; + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1, + vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; + (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), + NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode _.RC:$src2, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B, - Sched<[WriteFMA, ReadAfterLd]>; + _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B, + Sched<[WriteFMALd, ReadAfterLd]>; } } @@ -5886,8 +6126,8 @@ multiclass avx512_fma3_231_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, + (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), + NoItinerary, 1, 1, vselect, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } @@ -5928,16 +6168,16 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, defm r: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA]>; + (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary, + 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. defm m: AVX512_maskable_3src, - AVX512FMA3Base, Sched<[WriteFMA, ReadAfterLd]>; + (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), + NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -5946,8 +6186,8 @@ multiclass avx512_fma3p_132_rm opc, string OpcodeStr, SDNode OpNode, OpcodeStr, "${src3}"##_.BroadcastStr##", $src2", "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - _.RC:$src1, _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B, - Sched<[WriteFMA, ReadAfterLd]>; + _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>, + AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>; } } @@ -5957,8 +6197,8 @@ multiclass avx512_fma3_132_round opc, string OpcodeStr, SDNode OpNode, defm rb: AVX512_maskable_3src, + (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), + NoItinerary, 1, 1, vselect, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>; } @@ -6000,18 +6240,19 @@ multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, let Constraints = "$src1 = $dst", hasSideEffects = 0 in { defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base, - Sched<[WriteFMA]>; + "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>, + AVX512FMA3Base, Sched<[WriteFMA]>; defm m_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base, - Sched<[WriteFMA, ReadAfterLd]>; + "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>, + AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>; defm rb_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA, ReadAfterLd]>; + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, + NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, + Sched<[WriteFMA]>; let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512FMA3S, Sched<[WriteFMA, ReadAfterLd]>; + [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>; }// isCodeGenOnly = 1 }// Constraints = "$src1 = $dst" } @@ -6102,21 +6343,21 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1, //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { multiclass avx512_pmadd52_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { // NOTE: The SDNode have the multiply operands first with the add last. // This enables commuted load patterns to be autogenerated by tablegen. let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable_3src, - AVX512FMA3Base; + (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>, + AVX512FMA3Base, Sched<[itins.Sched]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base; + (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), + itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, !strconcat("$src2, ${src3}", _.BroadcastStr ), (OpNode _.RC:$src2, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), - _.RC:$src1)>, - AVX512FMA3Base, EVEX_B; + _.RC:$src1), itins.rm>, + AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } } // Constraints = "$src1 = $dst" multiclass avx512_pmadd52_common opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo _> { + OpndItins itins, AVX512VLVectorVTInfo _> { let Predicates = [HasIFMA] in { - defm Z : avx512_pmadd52_rm, + defm Z : avx512_pmadd52_rm, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasIFMA] in { - defm Z256 : avx512_pmadd52_rm, + defm Z256 : avx512_pmadd52_rm, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_pmadd52_rm, + defm Z128 : avx512_pmadd52_rm, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, - avx512vl_i64_info>, VEX_W; + SSE_PMADD, avx512vl_i64_info>, VEX_W; defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, - avx512vl_i64_info>, VEX_W; + SSE_PMADD, avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { +multiclass avx512_vcvtsi opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, + X86MemOperand x86memop, PatFrag ld_frag, string asm> { let hasSideEffects = 0 in { def rr : SI, - EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [], + itins.rr>, EVEX_4V, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : SI, - EVEX_4V; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [], + itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; } // hasSideEffects = 0 let isCodeGenOnly = 1 in { def rr_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, - (i32 FROUND_CURRENT)))]>, EVEX_4V; + (i32 FROUND_CURRENT)))], itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; def rm_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2), - (i32 FROUND_CURRENT)))]>, EVEX_4V; + (i32 FROUND_CURRENT)))], itins.rm>, + EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; }//isCodeGenOnly = 1 } -multiclass avx512_vcvtsi_round opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, string asm> { +multiclass avx512_vcvtsi_round opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> { def rrb_Int : SI opc, SDNode OpNode, RegisterClass SrcRC, [(set DstVT.RC:$dst, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, - (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC; + (i32 imm:$rc)))], itins.rr>, + EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } -multiclass avx512_vcvtsi_common opc, SDNode OpNode, RegisterClass SrcRC, - X86VectorVTInfo DstVT, X86MemOperand x86memop, - PatFrag ld_frag, string asm> { - defm NAME : avx512_vcvtsi_round, - avx512_vcvtsi, - VEX_LIG; +multiclass avx512_vcvtsi_common opc, SDNode OpNode, OpndItins itins, + RegisterClass SrcRC, X86VectorVTInfo DstVT, + X86MemOperand x86memop, PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round, + avx512_vcvtsi, VEX_LIG; } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32, +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64, +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6243,16 +6487,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32, v4f32x_info, i32mem, loadi32, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64, +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6283,71 +6527,94 @@ def : Pat<(f64 (uint_to_fp GR64:$src)), //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from float/double to integer //===----------------------------------------------------------------------===// -multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT , - X86VectorVTInfo DstVT, SDNode OpNode, string asm> { + +multiclass avx512_cvt_s_int_round opc, X86VectorVTInfo SrcVT, + X86VectorVTInfo DstVT, SDNode OpNode, + OpndItins itins, string asm, + string aliasStr, + bit CodeGenOnly = 1> { let Predicates = [HasAVX512] in { - def rr : SI, - EVEX, VEX_LIG; - def rb : SI, - EVEX, VEX_LIG, EVEX_B, EVEX_RC; - def rm : SI, EVEX, VEX_LIG, Sched<[itins.Sched]>; + def rrb_Int : SI, EVEX, VEX_LIG, EVEX_B, EVEX_RC, + Sched<[itins.Sched]>; + let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in + def rm_Int : SI, - EVEX, VEX_LIG; + (i32 FROUND_CURRENT)))], itins.rm>, + EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + + def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", + (!cast(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>; + def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", + (!cast(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0>; + } // Predicates = [HasAVX512] +} + +multiclass avx512_cvt_s_int_round_aliases opc, X86VectorVTInfo SrcVT, + X86VectorVTInfo DstVT, SDNode OpNode, + OpndItins itins, string asm, + string aliasStr> : + avx512_cvt_s_int_round { + let Predicates = [HasAVX512] in { + def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", + (!cast(NAME # "rm_Int") DstVT.RC:$dst, + SrcVT.IntScalarMemOp:$src), 0>; } // Predicates = [HasAVX512] } // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, - X86cvts2si, "cvtss2si">, + X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, - X86cvts2si, "cvtss2si">, + X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, - X86cvts2usi, "cvtss2usi">, +defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, + X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, - X86cvts2usi, "cvtss2usi">, XS, VEX_W, - EVEX_CD8<32, CD8VT1>; +defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, + X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">, + XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, - X86cvts2si, "cvtsd2si">, + X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, - X86cvts2si, "cvtsd2si">, + X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, - X86cvts2usi, "cvtsd2usi">, +defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, + X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, - X86cvts2usi, "cvtsd2usi">, XD, VEX_W, - EVEX_CD8<64, CD8VT1>; +defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, + X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">, + XD, VEX_W, EVEX_CD8<64, CD8VT1>; // The SSE version of these instructions are disabled for AVX512. // Therefore, the SSE intrinsics are mapped to the AVX512 instructions. let Predicates = [HasAVX512] in { def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))), - (VCVTSS2SIZrr VR128X:$src)>; + (VCVTSS2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)), - (VCVTSS2SIZrm sse_load_f32:$src)>; + (VCVTSS2SIZrm_Int sse_load_f32:$src)>; def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))), - (VCVTSS2SI64Zrr VR128X:$src)>; + (VCVTSS2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)), - (VCVTSS2SI64Zrm sse_load_f32:$src)>; + (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>; def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))), - (VCVTSD2SIZrr VR128X:$src)>; + (VCVTSD2SIZrr_Int VR128X:$src)>; def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)), - (VCVTSD2SIZrm sse_load_f64:$src)>; + (VCVTSD2SIZrm_Int sse_load_f64:$src)>; def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))), - (VCVTSD2SI64Zrr VR128X:$src)>; + (VCVTSD2SI64Zrr_Int VR128X:$src)>; def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)), - (VCVTSD2SI64Zrm sse_load_f64:$src)>; + (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>; } // HasAVX512 let Predicates = [HasAVX512] in { @@ -6400,74 +6667,86 @@ def : Pat<(v2f64 (X86Movsd // Convert float/double to signed/unsigned int 32/64 with truncation multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeRnd, string aliasStr>{ + SDNode OpNodeRnd, OpndItins itins, string aliasStr, + bit CodeGenOnly = 1>{ let Predicates = [HasAVX512] in { + let isCodeGenOnly = 1 in { def rr : AVX512, EVEX; - let hasSideEffects = 0 in - def rb : AVX512, EVEX, EVEX_B; + [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>, + EVEX, Sched<[itins.Sched]>; def rm : AVX512, - EVEX; + [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))], + itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>; + } + + def rr_Int : AVX512, + EVEX, VEX_LIG, Sched<[itins.Sched]>; + def rrb_Int : AVX512, + EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>; + let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in + def rm_Int : AVX512, + EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; def : InstAlias(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>; - def : InstAlias(NAME # "rb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>; - def : InstAlias(NAME # "rm") _DstRC.RC:$dst, - _SrcRC.ScalarMemOp:$src), 0>; - - let isCodeGenOnly = 1 in { - def rr_Int : AVX512, EVEX, VEX_LIG; - def rb_Int : AVX512, - EVEX,VEX_LIG , EVEX_B; - let mayLoad = 1, hasSideEffects = 0 in - def rm_Int : AVX512, EVEX, VEX_LIG; - - } // isCodeGenOnly = 1 + (!cast(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>; + def : InstAlias(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>; } //HasAVX512 } +multiclass avx512_cvt_s_all_unsigned opc, string asm, + X86VectorVTInfo _SrcRC, + X86VectorVTInfo _DstRC, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins, + string aliasStr> : + avx512_cvt_s_all { +let Predicates = [HasAVX512] in { + def : InstAlias(NAME # "rm_Int") _DstRC.RC:$dst, + _SrcRC.IntScalarMemOp:$src), 0>; +} +} defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, "{l}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, "{q}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, "{l}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, "{q}">, + fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, "{l}">, +defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, "{q}">, +defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, "{l}">, +defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, "{q}">, +defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, + fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; + let Predicates = [HasAVX512] in { def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))), (VCVTTSS2SIZrr_Int VR128X:$src)>; @@ -6486,87 +6765,92 @@ let Predicates = [HasAVX512] in { def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)), (VCVTTSD2SI64Zrm_Int sdmem:$src)>; } // HasAVX512 + //===----------------------------------------------------------------------===// // AVX-512 Convert form float to double and back //===----------------------------------------------------------------------===// + multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode> { + X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> { defm rr_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + (i32 FROUND_CURRENT))), itins.rr>, + EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; defm rm_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + (i32 FROUND_CURRENT))), itins.rm>, + EVEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>; + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>; + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + itins.rm>, EVEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { defm rrb_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, EVEX_B; + (i32 FROUND_NO_EXC))), itins.rr>, + EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>; } // Scalar Conversion with rounding control (RC) multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { defm rrb_Int : AVX512_maskable_scalar, - EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>, + (_Src.VT _Src.RC:$src2), (i32 imm:$rc))), + itins.rr>, + EVEX_4V, VEX_LIG, Sched<[itins.Sched]>, EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_sd2ss opc, string OpcodeStr, - SDNode OpNodeRnd, X86VectorVTInfo _src, - X86VectorVTInfo _dst> { + SDNode OpNodeRnd, OpndItins itins, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, + defm Z : avx512_cvt_fp_scalar, avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, XD; + OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; } } multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, - SDNode OpNodeRnd, X86VectorVTInfo _src, - X86VectorVTInfo _dst> { + SDNode OpNodeRnd, OpndItins itins, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar, - avx512_cvt_fp_sae_scalar, + defm Z : avx512_cvt_fp_scalar, + avx512_cvt_fp_sae_scalar, EVEX_CD8<32, CD8VT1>, XS; } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, f64x_info, f32x_info>, - NotMemoryFoldable; + X86froundRnd, SSE_CVT_SD2SS, f64x_info, + f32x_info>, NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd,f32x_info, f64x_info >, - NotMemoryFoldable; + X86fpextRnd, SSE_CVT_SS2SD, f32x_info, + f64x_info>, NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, @@ -6607,74 +6891,81 @@ def : Pat<(v2f64 (X86Movsd //===----------------------------------------------------------------------===// multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode, + X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp> { defm rr : AVX512_maskable, EVEX; + (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>, + EVEX, Sched<[itins.Sched]>; defm rm : AVX512_maskable, EVEX; + (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>, + EVEX, Sched<[itins.Sched.Folded]>; defm rmb : AVX512_maskable, EVEX, EVEX_B; + )), itins.rm>, EVEX, EVEX_B, + Sched<[itins.Sched.Folded]>; } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, + OpndItins itins> { defm rrb : AVX512_maskable, - EVEX, EVEX_B; + (i32 FROUND_NO_EXC))), itins.rr>, + EVEX, EVEX_B, Sched<[itins.Sched]>; } // Conversion with rounding control (RC) multiclass avx512_vcvt_fp_rc opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, + OpndItins itins> { defm rrb : AVX512_maskable, - EVEX, EVEX_B, EVEX_RC; + (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))), + itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; } // Extend Float to Double -multiclass avx512_cvtps2pd opc, string OpcodeStr> { +multiclass avx512_cvtps2pd opc, string OpcodeStr, + OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + X86vfpextRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Truncate Double to Float -multiclass avx512_cvtpd2ps opc, string OpcodeStr> { +multiclass avx512_cvtpd2ps opc, string OpcodeStr, OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + X86vfproundRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; + X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; + itins, "{1to4}", "{y}">, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -6687,9 +6978,9 @@ multiclass avx512_cvtpd2ps opc, string OpcodeStr> { } } -defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">, +defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">, +defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>, PS, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), @@ -6712,75 +7003,80 @@ let Predicates = [HasVLX] in { // Convert Signed/Unsigned Doubleword to Double multiclass avx512_cvtdq2pd opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128> { + SDNode OpNode128, OpndItins itins> { // No rounding in this op let Predicates = [HasAVX512] in - defm Z : avx512_vcvt_fp, - EVEX_V512; + defm Z : avx512_vcvt_fp, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Doubleword to Float multiclass avx512_cvtdq2ps opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd> { + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasAVX512] in - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; let Predicates = [HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword with truncation -multiclass avx512_cvttps2dq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvttps2dq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword -multiclass avx512_cvtps2dq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvtps2dq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd> { + SDNode OpNode128, SDNode OpNodeRnd, + OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -6788,9 +7084,9 @@ multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + OpNode128, itins, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; + itins, "{1to4}", "{y}">, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -6804,12 +7100,13 @@ multiclass avx512_cvttpd2dq opc, string OpcodeStr, SDNode OpNode, } // Convert Double to Signed/Unsigned Doubleword -multiclass avx512_cvtpd2dq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvtpd2dq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -6817,9 +7114,9 @@ multiclass avx512_cvtpd2dq opc, string OpcodeStr, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + itins, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; + itins, "{1to4}", "{y}">, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -6833,96 +7130,102 @@ multiclass avx512_cvtpd2dq opc, string OpcodeStr, } // Convert Double to Signed/Unsigned Quardword -multiclass avx512_cvtpd2qq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvtpd2qq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd,itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Double to Signed/Unsigned Quardword with truncation -multiclass avx512_cvttpd2qq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvttpd2qq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Double -multiclass avx512_cvtqq2pd opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvtqq2pd opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { - defm Z128 : avx512_vcvt_fp, - EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + defm Z128 : avx512_vcvt_fp, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword -multiclass avx512_cvtps2qq opc, string OpcodeStr, - SDNode OpNode, SDNode OpNodeRnd> { +multiclass avx512_cvtps2qq opc, string OpcodeStr, SDNode OpNode, + SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + itins, "{1to2}", "", f64mem>, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd> { + SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_sae, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp, EVEX_V128; - defm Z256 : avx512_vcvt_fp, - EVEX_V256; + itins, "{1to2}", "", f64mem>, EVEX_V128; + defm Z256 : avx512_vcvt_fp, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Float multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd> { + SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp, + defm Z : avx512_vcvt_fp, avx512_vcvt_fp_rc, EVEX_V512; + OpNodeRnd, itins>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -6930,9 +7233,9 @@ multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp, EVEX_V128; + itins, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp, EVEX_V256; + itins, "{1to4}", "{y}">, EVEX_V256; def : InstAlias(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -6945,89 +7248,100 @@ multiclass avx512_cvtqq2ps opc, string OpcodeStr, SDNode OpNode, } } -defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP>, - XS, EVEX_CD8<32, CD8VH>; +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, + SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, - X86VSintToFpRnd>, + X86VSintToFpRnd, SSE_CVT_I2PS>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, - X86cvttp2siRnd>, + X86cvttp2siRnd, SSE_CVT_PS2I>, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si, - X86cvttp2siRnd>, + X86cvttp2siRnd, SSE_CVT_PD2I>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, - X86cvttp2uiRnd>, PS, + X86cvttp2uiRnd, SSE_CVT_PS2I>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, - X86cvttp2ui, X86cvttp2uiRnd>, PS, VEX_W, - EVEX_CD8<64, CD8VF>; + X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>, + PS, VEX_W, EVEX_CD8<64, CD8VF>; -defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP>, - XS, EVEX_CD8<32, CD8VH>; +defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, + X86VUintToFP, SSE_CVT_I2PD>, XS, + EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, - X86VUintToFpRnd>, XD, + X86VUintToFpRnd, SSE_CVT_I2PS>, XD, EVEX_CD8<32, CD8VF>; defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, - X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VF>; + X86cvtp2IntRnd, SSE_CVT_PS2I>, PD, + EVEX_CD8<32, CD8VF>; defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, - X86cvtp2IntRnd>, XD, VEX_W, - EVEX_CD8<64, CD8VF>; + X86cvtp2IntRnd, SSE_CVT_PD2I>, XD, + VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, - X86cvtp2UIntRnd>, + X86cvtp2UIntRnd, SSE_CVT_PS2I>, PS, EVEX_CD8<32, CD8VF>; + defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, - X86cvtp2UIntRnd>, VEX_W, + X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, - X86cvtp2IntRnd>, VEX_W, + X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, - X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VH>; + X86cvtp2IntRnd, SSE_CVT_PS2I>, PD, + EVEX_CD8<32, CD8VH>; defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, - X86cvtp2UIntRnd>, VEX_W, + X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, - X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>; + X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD, + EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, - X86cvttp2siRnd>, VEX_W, + X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si, - X86cvttp2siRnd>, PD, EVEX_CD8<32, CD8VH>; + X86cvttp2siRnd, SSE_CVT_PS2I>, PD, + EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, - X86cvttp2uiRnd>, VEX_W, + X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui, - X86cvttp2uiRnd>, PD, EVEX_CD8<32, CD8VH>; + X86cvttp2uiRnd, SSE_CVT_PS2I>, PD, + EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, - X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS, + EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, - X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>; + X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS, + EVEX_CD8<64, CD8VF>; defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, - X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>; + X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS, + EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, - X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>; + X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD, + EVEX_CD8<64, CD8VF>; let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), @@ -7045,11 +7359,6 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; -def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))), - (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr - (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src, sub_xmm)))), sub_xmm)>; - def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), @@ -7187,37 +7496,45 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), //===----------------------------------------------------------------------===// // Half precision conversion instructions //===----------------------------------------------------------------------===// + multiclass avx512_cvtph2ps { + X86MemOperand x86memop, PatFrag ld_frag, + OpndItins itins> { defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", - (X86cvtph2ps (_src.VT _src.RC:$src))>, T8PD; + (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>, + T8PD, Sched<[itins.Sched]>; defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), "vcvtph2ps", "$src", "$src", (X86cvtph2ps (_src.VT (bitconvert - (ld_frag addr:$src))))>, T8PD; + (ld_frag addr:$src)))), itins.rm>, + T8PD, Sched<[itins.Sched.Folded]>; } -multiclass avx512_cvtph2ps_sae { - defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), - (ins _src.RC:$src), "vcvtph2ps", - "{sae}, $src", "$src, {sae}", - (X86cvtph2psRnd (_src.VT _src.RC:$src), - (i32 FROUND_NO_EXC))>, T8PD, EVEX_B; - +multiclass avx512_cvtph2ps_sae { + defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), + (ins _src.RC:$src), "vcvtph2ps", + "{sae}, $src", "$src, {sae}", + (X86cvtph2psRnd (_src.VT _src.RC:$src), + (i32 FROUND_NO_EXC)), itins.rr>, + T8PD, EVEX_B, Sched<[itins.Sched]>; } let Predicates = [HasAVX512] in - defm VCVTPH2PSZ : avx512_cvtph2ps, - avx512_cvtph2ps_sae, + defm VCVTPH2PSZ : avx512_cvtph2ps, + avx512_cvtph2ps_sae, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPH2PSZ256 : avx512_cvtph2ps,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; + loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256, + EVEX_CD8<32, CD8VH>; defm VCVTPH2PSZ128 : avx512_cvtph2ps, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128, + EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), @@ -7230,41 +7547,48 @@ let Predicates = [HasVLX] in { } multiclass avx512_cvtps2ph { + X86MemOperand x86memop, OpndItins itins> { defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, $src1", "$src1, $src2", (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)), - NoItinerary, 0, 0>, AVX512AIi8Base; + itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>; let hasSideEffects = 0, mayStore = 1 in { def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", - []>, EVEX_K; + [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_cvtps2ph_sae { + +multiclass avx512_cvtps2ph_sae { let hasSideEffects = 0 in - defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, + defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", - []>, EVEX_B, AVX512AIi8Base; + [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>; } + let Predicates = [HasAVX512] in { - defm VCVTPS2PHZ : avx512_cvtps2ph, - avx512_cvtps2ph_sae, - EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ : avx512_cvtps2ph, + avx512_cvtps2ph_sae, EVEX, EVEX_V512, + EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { - defm VCVTPS2PHZ256 : avx512_cvtps2ph, - EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; - defm VCVTPS2PHZ128 : avx512_cvtps2ph, - EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ256 : avx512_cvtps2ph, EVEX, EVEX_V256, + EVEX_CD8<32, CD8VH>; + defm VCVTPS2PHZ128 : avx512_cvtps2ph, EVEX, EVEX_V128, + EVEX_CD8<32, CD8VH>; } def : Pat<(store (f64 (extractelt @@ -7303,228 +7627,246 @@ let Predicates = [HasVLX] in { // Unordered/Ordered scalar fp compare with Sea and set EFLAGS multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, - string OpcodeStr> { + string OpcodeStr, OpndItins itins> { let hasSideEffects = 0 in - def rb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, - Sched<[WriteFAdd]>; + def rrb: AVX512, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[itins.Sched]>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS, EVEX, VEX_LIG, + "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD, EVEX, + "ucomisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss">, PS, EVEX, VEX_LIG, + "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd">, PD, EVEX, + "comisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD, EVEX, + sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD, EVEX, + sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar, EVEX_4V; + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable_scalar, EVEX_4V; + _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable, EVEX, T8PD; + (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD, + Sched<[itins.Sched]>; defm m: AVX512_maskable, EVEX, T8PD; + (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable, - EVEX, T8PD, EVEX_B; + (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>, + EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode> { - defm PSZ : avx512_fp14_p, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp14_p, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_fp14_p_vl_all opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm PSZ : avx512_fp14_p, EVEX_V512, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp14_p, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p, + OpNode, itins.s, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p, + OpNode, itins.s, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p, + OpNode, itins.d, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p, + OpNode, itins.d, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar; + (i32 FROUND_CURRENT)), itins.rr>, + Sched<[itins.Sched]>; defm rb : AVX512_maskable_scalar, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B, + Sched<[itins.Sched]>; defm m : AVX512_maskable_scalar; + (i32 FROUND_CURRENT)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode> { - defm SS : avx512_fp28_s, +multiclass avx512_eri_s opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm SS : avx512_fp28_s, EVEX_CD8<32, CD8VT1>; - defm SD : avx512_fp28_s, + defm SD : avx512_fp28_s, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V; + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>, + T8PD, EVEX_4V; + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>, + T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V; +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>, + T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable; + (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)), + itins.rr>, Sched<[itins.Sched]>; defm m : AVX512_maskable; + (i32 FROUND_CURRENT)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable, EVEX_B; + (i32 FROUND_CURRENT)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable, EVEX_B; + (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)), + itins.rr>, EVEX_B, Sched<[itins.Sched]>; } -multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { - defm PS : avx512_fp28_p, - avx512_fp28_p_round, +multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode, + SizeItins itins> { + defm PS : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PD : avx512_fp28_p, - avx512_fp28_p_round, + defm PD : avx512_fp28_p, + avx512_fp28_p_round, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, SizeItins itins> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p, + defm PSZ128 : avx512_fp28_p, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p, + defm PSZ256 : avx512_fp28_p, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p, + defm PDZ128 : avx512_fp28_p, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p, + defm PDZ256 : avx512_fp28_p, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX; } -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, - avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, + SSE_ALU_ITINS_P>, EVEX; multiclass avx512_sqrt_packed_round opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _>{ @@ -7588,61 +7930,60 @@ multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr> { multiclass avx512_sqrt_scalar opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo _, string SUFF, Intrinsic Intr> { let ExeDomain = _.ExeDomain in { - defm r_Int : AVX512_maskable_scalar, Sched<[itins.Sched]>; - defm m_Int : AVX512_maskable_scalar, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - defm rb_Int : AVX512_maskable_scalar, + Sched<[itins.Sched.Folded, ReadAfterLd]>; + defm rb_Int : AVX512_maskable_scalar, - EVEX_B, EVEX_RC, Sched<[itins.Sched.Folded, ReadAfterLd]>; - - let isCodeGenOnly = 1, hasSideEffects = 0 in { - def r : I, - Sched<[itins.Sched]>; - let mayLoad = 1 in - def m : I, - Sched<[itins.Sched.Folded, ReadAfterLd]>; - } + EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + + let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { + def r : I, Sched<[itins.Sched]>; + let mayLoad = 1 in + def m : I, Sched<[itins.Sched.Folded, ReadAfterLd]>; + } } -let Predicates = [HasAVX512] in { - def : Pat<(_.EltVT (fsqrt _.FRC:$src)), - (!cast(NAME#SUFF#Zr) - (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; + let Predicates = [HasAVX512] in { + def : Pat<(_.EltVT (fsqrt _.FRC:$src)), + (!cast(NAME#SUFF#Zr) + (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; - def : Pat<(Intr VR128X:$src), - (!cast(NAME#SUFF#Zr_Int) VR128X:$src, + def : Pat<(Intr VR128X:$src), + (!cast(NAME#SUFF#Zr_Int) VR128X:$src, VR128X:$src)>; -} - -let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(_.EltVT (fsqrt (load addr:$src))), - (!cast(NAME#SUFF#Zm) - (_.EltVT (IMPLICIT_DEF)), addr:$src)>; + } - def : Pat<(Intr _.ScalarIntMemCPat:$src2), - (!cast(NAME#SUFF#Zm_Int) - (_.VT (IMPLICIT_DEF)), addr:$src2)>; -} + let Predicates = [HasAVX512, OptForSize] in { + def : Pat<(_.EltVT (fsqrt (load addr:$src))), + (!cast(NAME#SUFF#Zm) + (_.EltVT (IMPLICIT_DEF)), addr:$src)>; + def : Pat<(Intr _.ScalarIntMemCPat:$src2), + (!cast(NAME#SUFF#Zm_Int) + (_.VT (IMPLICIT_DEF)), addr:$src2)>; + } } multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { @@ -7660,40 +8001,42 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">, defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; -multiclass -avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { - +multiclass avx512_rndscale_scalar opc, string OpcodeStr, + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar; + (i32 imm:$src3))), itins.rr>, + Sched<[itins.Sched]>; defm rb_Int : AVX512_maskable_scalar, EVEX_B; + (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B, + Sched<[itins.Sched]>; defm m_Int : AVX512_maskable_scalar; + _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; - let isCodeGenOnly = 1, hasSideEffects = 0 in { + let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { def r : I; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in def m : I; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -7734,36 +8077,47 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { } } -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S, + f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S, + f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, + EVEX_CD8<64, CD8VT1>; //------------------------------------------------- // Integer truncate and extend operations //------------------------------------------------- +let Sched = WriteShuffle256 in +def AVX512_EXTEND : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +let Sched = WriteShuffle256 in +def AVX512_TRUNCATE : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + multiclass avx512_trunc_common opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, - X86MemOperand x86memop> { + OpndItins itins, X86VectorVTInfo SrcInfo, + X86VectorVTInfo DestInfo, X86MemOperand x86memop> { let ExeDomain = DestInfo.ExeDomain in defm rr : AVX512_maskable, - EVEX, T8XS; + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>; let mayStore = 1, mayLoad = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { def mr : AVX512XS8I, EVEX; + [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>; def mrk : AVX512XS8I, EVEX, EVEX_K; + [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>; }//mayStore = 1, mayLoad = 1, hasSideEffects = 0 } @@ -7781,281 +8135,290 @@ multiclass avx512_trunc_mr_lowering; } -multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, - X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, - X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, - X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, - Predicate prd = HasAVX512>{ +multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, + SDNode OpNode256, SDNode OpNode512, OpndItins itins, + AVX512VLVectorVTInfo VTSrcInfo, + X86VectorVTInfo DestInfoZ128, + X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, + X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, + X86MemOperand x86memopZ, PatFrag truncFrag, + PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, + defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; - defm Z256: avx512_trunc_common, + defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common, + defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; } multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; } multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; } multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; } multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; + OpndItins itins, PatFrag StoreNode, + PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, - truncstorevi8, masked_truncstorevi8>; -defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE, + truncstorevi8, masked_truncstorevi8, X86vtrunc>; +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, - truncstorevi16, masked_truncstorevi16>; -defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE, + truncstorevi16, masked_truncstorevi16, X86vtrunc>; +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, - truncstorevi32, masked_truncstorevi32>; -defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE, + truncstorevi32, masked_truncstorevi32, X86vtrunc>; +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi32, masked_truncstore_s_vi32>; -defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi32, masked_truncstore_us_vi32>; -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, - truncstorevi8, masked_truncstorevi8>; -defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE, + truncstorevi8, masked_truncstorevi8, X86vtrunc>; +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, - truncstorevi16, masked_truncstorevi16>; -defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE, + truncstorevi16, masked_truncstorevi16, X86vtrunc>; +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, - truncstorevi8, masked_truncstorevi8>; -defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE, + truncstorevi8, masked_truncstorevi8, X86vtrunc>; +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; -def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))), - (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>; -def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))), - (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>; - let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), +def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), (v8i16 (EXTRACT_SUBREG (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; -def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))), +def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), (v4i32 (EXTRACT_SUBREG (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; } let Predicates = [HasBWI, NoVLX] in { -def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))), +def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm))), sub_xmm))>; } -multiclass avx512_extend_common opc, string OpcodeStr, +multiclass avx512_extend_common opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{ let ExeDomain = DestInfo.ExeDomain in { defm rr : AVX512_maskable, - EVEX; + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>, + EVEX, Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX; + (DestInfo.VT (LdFrag addr:$src)), itins.rm>, + EVEX, Sched<[itins.Sched.Folded]>; } } multiclass avx512_extend_BW opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasBWI] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_BD opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_BQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_WD opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_WQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_DQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; - defm Z256: avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } -defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">; -defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">; -defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">; -defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">; -defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">; -defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">; +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">; -defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">; -defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">; -defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">; -defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">; -defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">; +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>; multiclass AVX512_pmovx_patterns; //===----------------------------------------------------------------------===// // GATHER - SCATTER Operations +// FIXME: Improve scheduling of gather/scatter instructions. multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, X86MemOperand memop, PatFrag GatherNode, RegisterClass MaskRC = _.KRCWM> { @@ -8217,7 +8581,7 @@ multiclass avx512_gather opc, string OpcodeStr, X86VectorVTInfo _, [(set _.RC:$dst, MaskRC:$mask_wb, (GatherNode (_.VT _.RC:$src1), MaskRC:$mask, vectoraddr:$src2))]>, EVEX, EVEX_K, - EVEX_CD8<_.EltSize, CD8VT1>; + EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>; } multiclass avx512_gather_q_pd dopc, bits<8> qopc, @@ -8265,17 +8629,19 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; multiclass avx512_scatter opc, string OpcodeStr, X86VectorVTInfo _, - X86MemOperand memop, PatFrag ScatterNode> { + X86MemOperand memop, PatFrag ScatterNode, + RegisterClass MaskRC = _.KRCWM> { let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in - def mr : AVX5128I, - EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src), + MaskRC:$mask, vectoraddr:$dst))]>, + EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[WriteStore]>; } multiclass avx512_scatter_q_pd dopc, bits<8> qopc, @@ -8310,7 +8676,8 @@ let Predicates = [HasVLX] in { defm NAME##D##SUFF##Z128: avx512_scatter, EVEX_V128; defm NAME##Q##SUFF##Z128: avx512_scatter, EVEX_V128; + vx64xmem, mscatterv2i64, VK2WM>, + EVEX_V128; } } @@ -8326,7 +8693,7 @@ multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeSt let Predicates = [HasPFI], hasSideEffects = 1 in def m : AVX5128I, EVEX, EVEX_K; + [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>; } defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", @@ -8380,18 +8747,8 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd multiclass cvt_by_vec_width opc, X86VectorVTInfo Vec, string OpcodeStr > { def rr : AVX512XS8I, EVEX; -} - -// Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_convert_mask_to_vector_lowering { - - def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))), - (X86Info.VT (EXTRACT_SUBREG - (_.VT (!cast(NAME#"Zrr") - (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))), - X86Info.SubRegIdx))>; + [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))], + IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>; } multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, @@ -8403,11 +8760,6 @@ let Predicates = [prd] in defm Z256 : cvt_by_vec_width, EVEX_V256; defm Z128 : cvt_by_vec_width, EVEX_V128; } -let Predicates = [prd, NoVLX] in { - defm Z256_Alt : avx512_convert_mask_to_vector_lowering; - defm Z128_Alt : avx512_convert_mask_to_vector_lowering; - } - } defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; @@ -8418,14 +8770,15 @@ defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI multiclass convert_vector_to_mask_common opc, X86VectorVTInfo _, string OpcodeStr > { def rr : AVX512XS8I, EVEX; + [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))], + IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. multiclass convert_vector_to_mask_lowering { - def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))), + def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), (_.KVT (COPY_TO_REGCLASS (!cast(NAME#"Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), @@ -8464,27 +8817,39 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", // AVX-512 - COMPRESS and EXPAND // +// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND? +let Sched = WriteShuffle256 in { +def AVX512_COMPRESS : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; +def AVX512_EXPAND : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; +} + multiclass compress_by_vec_width_common opc, X86VectorVTInfo _, - string OpcodeStr> { + string OpcodeStr, OpndItins itins> { defm rr : AVX512_maskable, AVX5128IBase; + (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase, + Sched<[itins.Sched]>; let mayStore = 1, hasSideEffects = 0 in def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>; + []>, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded]>; def mrk : AVX5128I, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded]>; } multiclass compress_by_vec_width_lowering { - def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(NAME#_.ZSuffix##mrk) @@ -8492,41 +8857,44 @@ multiclass compress_by_vec_width_lowering { } multiclass compress_by_elt_width opc, string OpcodeStr, + OpndItins itins, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in - defm Z : compress_by_vec_width_common, + defm Z : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V512; let Predicates = [Pred, HasVLX] in { - defm Z256 : compress_by_vec_width_common, + defm Z256 : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V256; - defm Z128 : compress_by_vec_width_common, + defm Z128 : compress_by_vec_width_common, compress_by_vec_width_lowering, EVEX_V128; } } -defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>, - EVEX; -defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>, - EVEX, VEX_W; -defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>, - EVEX; -defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>, - EVEX, VEX_W; +defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS, + avx512vl_i32_info>, EVEX; +defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS, + avx512vl_i64_info>, EVEX, VEX_W; +defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS, + avx512vl_f32_info>, EVEX; +defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS, + avx512vl_f64_info>, EVEX, VEX_W; // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, - string OpcodeStr> { + string OpcodeStr, OpndItins itins> { defm rr : AVX512_maskable, AVX5128IBase; + (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; + (_.LdFrag addr:$src1))))), itins.rm>, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass expand_by_vec_width_lowering { @@ -8542,58 +8910,62 @@ multiclass expand_by_vec_width_lowering { } multiclass expand_by_elt_width opc, string OpcodeStr, + OpndItins itins, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in - defm Z : expand_by_vec_width, + defm Z : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V512; let Predicates = [Pred, HasVLX] in { - defm Z256 : expand_by_vec_width, + defm Z256 : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V256; - defm Z128 : expand_by_vec_width, + defm Z128 : expand_by_vec_width, expand_by_vec_width_lowering, EVEX_V128; } } -defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>, - EVEX; -defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>, - EVEX, VEX_W; -defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>, - EVEX; -defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>, - EVEX, VEX_W; +defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND, + avx512vl_i32_info>, EVEX; +defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND, + avx512vl_i64_info>, EVEX, VEX_W; +defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND, + avx512vl_f32_info>, EVEX; +defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND, + avx512vl_f64_info>, EVEX, VEX_W; //handle instruction reg_vec1 = op(reg_vec,imm) // op(mem_vec,imm) // op(broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_unary_fp_packed_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable; + (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>; defm rmi : AVX512_maskable; + (i32 imm:$src2)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable, EVEX_B; + (i32 imm:$src2)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_unary_fp_sae_packed_imm opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _>{ + SDNode OpNode, OpndItins itins, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable opc, string OpcodeStr, "$src1, {sae}, $src2", (OpNode (_.VT _.RC:$src1), (i32 imm:$src2), - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } multiclass avx512_common_unary_fp_sae_packed_imm opc, SDNode OpNode, - SDNode OpNodeRnd, Predicate prd>{ + SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_unary_fp_packed_imm, - avx512_unary_fp_sae_packed_imm, - EVEX_V512; + defm Z : avx512_unary_fp_packed_imm, + avx512_unary_fp_sae_packed_imm, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_unary_fp_packed_imm, - EVEX_V128; - defm Z256 : avx512_unary_fp_packed_imm, - EVEX_V256; + defm Z128 : avx512_unary_fp_packed_imm, EVEX_V128; + defm Z256 : avx512_unary_fp_packed_imm, EVEX_V256; } } @@ -8625,48 +8999,54 @@ multiclass avx512_common_unary_fp_sae_packed_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + OpndItins itins, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable; + (i32 imm:$src3)), itins.rr>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable; + (i32 imm:$src3)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable, EVEX_B; + (i32 imm:$src3)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ + OpndItins itins, X86VectorVTInfo DestInfo, + X86VectorVTInfo SrcInfo>{ let ExeDomain = DestInfo.ExeDomain in { defm rri : AVX512_maskable; + (i8 imm:$src3))), itins.rr>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable; + (i8 imm:$src3))), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -8674,8 +9054,8 @@ multiclass avx512_3Op_rm_imm8 opc, string OpcodeStr, SDNode OpNode, // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) multiclass avx512_3Op_imm8 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>: - avx512_3Op_rm_imm8{ + OpndItins itins, X86VectorVTInfo _>: + avx512_3Op_rm_imm8{ let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, "$src1, ${src2}"##_.BroadcastStr##", $src3", (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - (i8 imm:$src3))>, EVEX_B; + (i8 imm:$src3)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_scalar,imm) multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_scalar; + (i32 imm:$src3)), itins.rr>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable_scalar; + (i32 imm:$src3)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_packed_imm opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _>{ + SDNode OpNode, OpndItins itins, + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable opc, string OpcodeStr, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3), - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } + //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} -multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _> { +multiclass avx512_fp_sae_scalar_imm opc, string OpcodeStr, SDNode OpNode, + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm NAME#rrib : AVX512_maskable_scalar opc, string OpcodeStr, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3), - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, - SDNode OpNodeRnd, Predicate prd>{ + SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_fp_packed_imm, - avx512_fp_sae_packed_imm, + defm Z : avx512_fp_packed_imm, + avx512_fp_sae_packed_imm, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_fp_packed_imm, + defm Z128 : avx512_fp_packed_imm, EVEX_V128; - defm Z256 : avx512_fp_packed_imm, + defm Z256 : avx512_fp_packed_imm, EVEX_V256; } } multiclass avx512_common_3Op_rm_imm8 opc, SDNode OpNode, string OpStr, - AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, - Predicate Pred = HasBWI> { + OpndItins itins, AVX512VLVectorVTInfo DestInfo, + AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { - defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; - defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; } } multiclass avx512_common_3Op_imm8 opc, SDNode OpNode, + bits<8> opc, SDNode OpNode, OpndItins itins, Predicate Pred = HasAVX512> { let Predicates = [Pred] in { - defm Z : avx512_3Op_imm8, EVEX_V512; + defm Z : avx512_3Op_imm8, EVEX_V512; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_imm8, EVEX_V128; - defm Z256 : avx512_3Op_imm8, EVEX_V256; + defm Z128 : avx512_3Op_imm8, EVEX_V128; + defm Z256 : avx512_3Op_imm8, EVEX_V256; } } multiclass avx512_common_fp_sae_scalar_imm opc, SDNode OpNode, - SDNode OpNodeRnd, Predicate prd>{ + SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ let Predicates = [prd] in { - defm Z128 : avx512_fp_scalar_imm, - avx512_fp_sae_scalar_imm; + defm Z128 : avx512_fp_scalar_imm, + avx512_fp_sae_scalar_imm; } } multiclass avx512_common_unary_fp_sae_packed_imm_all opcPs, bits<8> opcPd, SDNode OpNode, - SDNode OpNodeRnd, Predicate prd>{ + SDNode OpNodeRnd, SizeItins itins, Predicate prd>{ defm PS : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<32, CD8VF>; + opcPs, OpNode, OpNodeRnd, itins.s, prd>, + EVEX_CD8<32, CD8VF>; defm PD : avx512_common_unary_fp_sae_packed_imm, EVEX_CD8<64, CD8VF>, VEX_W; + opcPd, OpNode, OpNodeRnd, itins.d, prd>, + EVEX_CD8<64, CD8VF>, VEX_W; } - defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, - X86VReduce, X86VReduceRnd, HasDQI>, + X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, - X86VRndScale, X86VRndScaleRnd, HasAVX512>, + X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, - X86VGetMant, X86VGetMantRnd, HasAVX512>, + X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>, AVX512AIi8Base, EVEX; - defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, - 0x50, X86VRange, - X86VRangeRnd, HasDQI>, + 0x50, X86VRange, X86VRangeRnd, + SSE_ALU_F64P, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, - 0x50, X86VRange, - X86VRangeRnd, HasDQI>, + 0x50, X86VRange, X86VRangeRnd, + SSE_ALU_F32P, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info, - 0x51, X86Ranges, X86RangesRnd, - HasDQI>, +defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", + f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, - 0x51, X86Ranges, X86RangesRnd, - HasDQI>, + 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, - 0x57, X86Reduces, - X86ReducesRnd, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, - 0x57, X86Reduces, - X86ReducesRnd, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, - 0x27, X86GetMants, - X86GetMantsRnd, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, - 0x27, X86GetMants, - X86GetMantsRnd, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; let Predicates = [HasAVX512] in { @@ -8915,25 +9296,25 @@ def : Pat<(v4f64 (ftrunc VR256X:$src)), (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>; } -multiclass avx512_shuff_packed_128 opc>{ +multiclass avx512_shuff_packed_128 opc>{ let Predicates = [HasAVX512] in { - defm Z : avx512_3Op_imm8, EVEX_V512; + defm Z : avx512_3Op_imm8, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_3Op_imm8, EVEX_V256; + defm Z256 : avx512_3Op_imm8, EVEX_V256; } } -defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP, + avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP, + avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP, + avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP, + avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasAVX512] in { // Provide fallback in case the load node that is used in the broadcast @@ -8968,17 +9349,18 @@ def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 0)>; } -multiclass avx512_valign { - defm NAME: avx512_common_3Op_imm8, +multiclass avx512_valign { + defm NAME: avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V; } -defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>, +defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>, +defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" , +defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN, avx512vl_i8_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; @@ -9099,88 +9481,98 @@ let Predicates = [HasVLX, HasBWI] in { v16i8x_info, ValigndImm8XForm>; } -defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" , - avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; +defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", + SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>, + EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX, AVX5128IBase; + (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>; + (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>, + EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded]>; } } multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> : - avx512_unary_rm { + OpndItins itins, X86VectorVTInfo _> : + avx512_unary_rm { defm rmb : AVX512_maskable, - EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + (_.ScalarLdFrag addr:$src1)))), itins.rm>, + EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded]>; } multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rm, EVEX_V512; + defm Z : avx512_unary_rm, + EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_unary_rm, + defm Z128 : avx512_unary_rm, EVEX_V128; } } multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rmb, + defm Z : avx512_unary_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rmb, + defm Z256 : avx512_unary_rmb, EVEX_V256; - defm Z128 : avx512_unary_rmb, + defm Z128 : avx512_unary_rmb, EVEX_V128; } } multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, Predicate prd> { - defm Q : avx512_unary_rmb_vl, VEX_W; - defm D : avx512_unary_rmb_vl; + SDNode OpNode, OpndItins itins, Predicate prd> { + defm Q : avx512_unary_rmb_vl, VEX_W; + defm D : avx512_unary_rmb_vl; } multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, Predicate prd> { - defm W : avx512_unary_rm_vl, VEX_WIG; - defm B : avx512_unary_rm_vl, VEX_WIG; + SDNode OpNode, OpndItins itins, Predicate prd> { + defm W : avx512_unary_rm_vl, VEX_WIG; + defm B : avx512_unary_rm_vl, VEX_WIG; } multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, - string OpcodeStr, SDNode OpNode> { - defm NAME : avx512_unary_rm_vl_dq { + defm NAME : avx512_unary_rm_vl_dq, - avx512_unary_rm_vl_bw; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -9196,122 +9588,103 @@ let Predicates = [HasAVX512, NoVLX] in { sub_xmm)>; } -multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ +// Use 512bit version to implement 128/256 bit. +multiclass avx512_unary_lowering { + let Predicates = [prd, NoVLX] in { + def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + (EXTRACT_SUBREG + (!cast(InstrStr # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info256.RC:$src1, + _.info256.SubRegIdx)), + _.info256.SubRegIdx)>; - defm NAME : avx512_unary_rm_vl_dq; + def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + (EXTRACT_SUBREG + (!cast(InstrStr # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info128.RC:$src1, + _.info128.SubRegIdx)), + _.info128.SubRegIdx)>; + } } -defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; -defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; +// FIXME: Is there a better scheduler itinerary for VPLZCNT? +defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, + SSE_INTALU_ITINS_P, HasCDI>; -// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. -let Predicates = [HasCDI, NoVLX] in { - def : Pat<(v4i64 (ctlz VR256X:$src)), - (EXTRACT_SUBREG - (VPLZCNTQZrr - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), - sub_ymm)>; - def : Pat<(v2i64 (ctlz VR128X:$src)), - (EXTRACT_SUBREG - (VPLZCNTQZrr - (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), - sub_xmm)>; +// FIXME: Is there a better scheduler itinerary for VPCONFLICT? +defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, + SSE_INTALU_ITINS_P, HasCDI>; - def : Pat<(v8i32 (ctlz VR256X:$src)), - (EXTRACT_SUBREG - (VPLZCNTDZrr - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), - sub_ymm)>; - def : Pat<(v4i32 (ctlz VR128X:$src)), - (EXTRACT_SUBREG - (VPLZCNTDZrr - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), - sub_xmm)>; -} +// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. +defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; +defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; //===---------------------------------------------------------------------===// // Counts number of ones - VPOPCNTD and VPOPCNTQ //===---------------------------------------------------------------------===// -multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { - let Predicates = [HasVPOPCNTDQ] in - defm Z : avx512_unary_rmb, EVEX_V512; -} - -// Use 512bit version to implement 128/256 bit. -multiclass avx512_unary_lowering { - let Predicates = [prd] in { - def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), - (EXTRACT_SUBREG - (!cast(NAME # "Zrr") - (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), - _.info256.RC:$src1, - _.info256.SubRegIdx)), - _.info256.SubRegIdx)>; - - def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), - (EXTRACT_SUBREG - (!cast(NAME # "Zrr") - (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), - _.info128.RC:$src1, - _.info128.SubRegIdx)), - _.info128.SubRegIdx)>; - } -} +// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ? +defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, + SSE_INTALU_ITINS_P, HasVPOPCNTDQ>; -defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>, - avx512_unary_lowering; -defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>, - avx512_unary_lowering, VEX_W; +defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; +defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// -multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{ - defm NAME: avx512_unary_rm_vl, XS; +multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm NAME: avx512_unary_rm_vl, XS; } -defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>; -defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>; +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable, EVEX; + (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX, + Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX, EVEX_CD8<_.EltSize, CD8VH>; + (_.ScalarLdFrag addr:$src))))), + itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, + Sched<[itins.Sched.Folded]>; } } multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo> { + OpndItins itins, AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_unary_rm, EVEX_V512; + defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_movddup_128, + defm Z128 : avx512_movddup_128, EVEX_V128; } } -multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{ - defm NAME: avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm NAME: avx512_movddup_common, XD, VEX_W; } -defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>; +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>; let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), @@ -9381,7 +9754,7 @@ multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), addr:$dst)]>, - EVEX, EVEX_CD8<_.EltSize, CD8VT1>; + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>; } multiclass avx512_extract_elt_b { @@ -9391,7 +9764,7 @@ multiclass avx512_extract_elt_b { OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>, - EVEX, TAPD; + EVEX, TAPD, Sched<[WriteShuffle]>; defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; } @@ -9403,14 +9776,15 @@ multiclass avx512_extract_elt_w { (ins _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32orGR64:$dst, - (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>, - EVEX, PD; + (X86pextrw (_.VT _.RC:$src1), imm:$src2))], + IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>; let hasSideEffects = 0 in def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), - OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD, FoldGenData; + OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], + IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData, + Sched<[WriteShuffle]>; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } @@ -9424,14 +9798,15 @@ multiclass avx512_extract_elt_dq, - EVEX, TAPD; + EVEX, TAPD, Sched<[WriteShuffle]>; def mr : AVX512Ii8<0x16, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (_.VT _.RC:$src1), imm:$src2),addr:$dst)]>, - EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD; + EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, + Sched<[WriteShuffleLd]>; } } @@ -9447,7 +9822,7 @@ multiclass avx512_insert_elt_m opc, string OpcodeStr, SDNode OpNode, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>; + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, @@ -9457,7 +9832,8 @@ multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, - (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V; + (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V, + Sched<[WriteShuffle]>; defm NAME : avx512_insert_elt_m; } @@ -9471,7 +9847,7 @@ multiclass avx512_insert_elt_dq opc, string OpcodeStr, OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, - EVEX_4V, TAPD; + EVEX_4V, TAPD, Sched<[WriteShuffle]>; defm NAME : avx512_insert_elt_m, TAPD; @@ -9484,87 +9860,104 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, extloadi16>, PD, VEX_WIG; defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; + //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations //===----------------------------------------------------------------------===// + multiclass avx512_shufp{ - defm NAME: avx512_common_3Op_imm8, - EVEX_CD8, - AVX512AIi8Base, EVEX_4V; + defm NAME: avx512_common_3Op_imm8, EVEX_CD8, + AVX512AIi8Base, EVEX_4V; } defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - Byte shift Left/Right //===----------------------------------------------------------------------===// +let Sched = WriteVecShift in +def AVX512_BYTESHIFT : OpndItins< + IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI +>; + multiclass avx512_shift_packed opc, SDNode OpNode, Format MRMr, - Format MRMm, string OpcodeStr, X86VectorVTInfo _>{ + Format MRMm, string OpcodeStr, + OpndItins itins, X86VectorVTInfo _>{ def rr : AVX512; + [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))], + itins.rr>, Sched<[itins.Sched]>; def rm : AVX512; + (i8 imm:$src2))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, - Format MRMm, string OpcodeStr, Predicate prd>{ + Format MRMm, string OpcodeStr, + OpndItins itins, Predicate prd>{ let Predicates = [prd] in - defm Z512 : avx512_shift_packed, EVEX_V512; + defm Z : avx512_shift_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_packed, EVEX_V256; + OpcodeStr, itins, v32i8x_info>, EVEX_V256; defm Z128 : avx512_shift_packed, EVEX_V128; + OpcodeStr, itins, v16i8x_info>, EVEX_V128; } } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", - HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; + AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base, + EVEX_4V, VEX_WIG; defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", - HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; + AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base, + EVEX_4V, VEX_WIG; multiclass avx512_psadbw_packed opc, SDNode OpNode, - string OpcodeStr, X86VectorVTInfo _dst, - X86VectorVTInfo _src>{ + string OpcodeStr, OpndItins itins, + X86VectorVTInfo _dst, X86VectorVTInfo _src> { def rr : AVX512BI; + (_src.VT _src.RC:$src2))))], itins.rr>, + Sched<[itins.Sched]>; def rm : AVX512BI; + (_src.LdFrag addr:$src2))))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass avx512_psadbw_packed_all opc, SDNode OpNode, - string OpcodeStr, Predicate prd> { + string OpcodeStr, OpndItins itins, + Predicate prd> { let Predicates = [prd] in - defm Z512 : avx512_psadbw_packed, EVEX_V512; + defm Z : avx512_psadbw_packed, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_psadbw_packed, EVEX_V256; - defm Z128 : avx512_psadbw_packed, EVEX_V128; } } defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", - HasBWI>, EVEX_4V, VEX_WIG; + SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG; // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. @@ -9629,7 +10022,7 @@ def VPTERNLOG312_imm8 : SDNodeXForm; multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + OpndItins itins, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V; + (i8 imm:$src4)), itins.rr, 1, 1>, + AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; defm rmi : AVX512_maskable_3src, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (i8 imm:$src4)), itins.rm, 1, 0>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), 1, 0>, EVEX_B, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. @@ -9793,24 +10189,27 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; } -multiclass avx512_common_ternlog{ +multiclass avx512_common_ternlog { let Predicates = [HasAVX512] in - defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512; + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128; - defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256; + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256; } } -defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; -defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P, + avx512vl_i32_info>; +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P, + avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - FixupImm //===----------------------------------------------------------------------===// multiclass avx512_fixupimm_packed opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + OpndItins itins, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.IntVT _.RC:$src3), (i32 imm:$src4), - (i32 FROUND_CURRENT))>; + (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>; defm rmi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.IntVT (bitconvert (_.LdFrag addr:$src3))), (i32 imm:$src4), - (i32 FROUND_CURRENT))>; + (i32 FROUND_CURRENT)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), (i32 imm:$src4), - (i32 FROUND_CURRENT))>, EVEX_B; + (i32 FROUND_CURRENT)), itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Constraints = "$src1 = $dst" } multiclass avx512_fixupimm_packed_sae opc, string OpcodeStr, - SDNode OpNode, X86VectorVTInfo _>{ + SDNode OpNode, OpndItins itins, + X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rrib : AVX512_maskable_3src, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } } multiclass avx512_fixupimm_scalar opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, X86VectorVTInfo _src3VT> { + OpndItins itins, X86VectorVTInfo _, + X86VectorVTInfo _src3VT> { let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src_scalar opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), (i32 imm:$src4), - (i32 FROUND_CURRENT))>; - + (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>; defm rrib : AVX512_maskable_3src_scalar opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), (i32 imm:$src4), - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmi : AVX512_maskable_3src_scalar opc, string OpcodeStr, SDNode OpNode, (_src3VT.VT (scalar_to_vector (_src3VT.ScalarLdFrag addr:$src3))), (i32 imm:$src4), - (i32 FROUND_CURRENT))>; + (i32 FROUND_CURRENT)), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } -multiclass avx512_fixupimm_packed_all{ +multiclass avx512_fixupimm_packed_all { let Predicates = [HasAVX512] in - defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>, - avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>, - AVX512AIi8Base, EVEX_4V, EVEX_V512; + defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + _Vec.info512>, + avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins, + _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>, - AVX512AIi8Base, EVEX_4V, EVEX_V128; - defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>, - AVX512AIi8Base, EVEX_4V, EVEX_V256; + defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128; + defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256; } } defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - f32x_info, v4i32x_info>, + SSE_ALU_F32S, f32x_info, v4i32x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - f64x_info, v2i64x_info>, + SSE_ALU_F64S, f64x_info, v2i64x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -defm VFIXUPIMMPS : avx512_fixupimm_packed_all, +defm VFIXUPIMMPS : avx512_fixupimm_packed_all, EVEX_CD8<32, CD8VF>; -defm VFIXUPIMMPD : avx512_fixupimm_packed_all, +defm VFIXUPIMMPD : avx512_fixupimm_packed_all, EVEX_CD8<64, CD8VF>, VEX_W; @@ -10075,26 +10481,27 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; //===----------------------------------------------------------------------===// multiclass VBMI2_shift_var_rm Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI> { + OpndItins itins, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src, - AVX512FMA3Base; + (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)), + itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>; defm m: AVX512_maskable_3src, - AVX512FMA3Base; + (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))), + itins.rm>, AVX512FMA3Base, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass VBMI2_shift_var_rmb Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI> - : VBMI2_shift_var_rm { + OpndItins itins, X86VectorVTInfo VTI> + : VBMI2_shift_var_rm { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src Op, string OpStr, SDNode OpNode, "${src3}"##VTI.BroadcastStr##", $src2", "$src2, ${src3}"##VTI.BroadcastStr, (OpNode VTI.RC:$src1, VTI.RC:$src2, - (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, - AVX512FMA3Base, EVEX_B; + (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))), + itins.rm>, AVX512FMA3Base, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass VBMI2_shift_var_rm_common Op, string OpStr, SDNode OpNode, - AVX512VLVectorVTInfo VTI> { + OpndItins itins, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rm, EVEX_V512; + defm Z : VBMI2_shift_var_rm, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rm, EVEX_V256; - defm Z128 : VBMI2_shift_var_rm, EVEX_V128; + defm Z256 : VBMI2_shift_var_rm, EVEX_V256; + defm Z128 : VBMI2_shift_var_rm, EVEX_V128; } } multiclass VBMI2_shift_var_rmb_common Op, string OpStr, SDNode OpNode, - AVX512VLVectorVTInfo VTI> { + OpndItins itins, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rmb, EVEX_V512; + defm Z : VBMI2_shift_var_rmb, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rmb, EVEX_V256; - defm Z128 : VBMI2_shift_var_rmb, EVEX_V128; + defm Z256 : VBMI2_shift_var_rmb, EVEX_V256; + defm Z128 : VBMI2_shift_var_rmb, EVEX_V128; } } multiclass VBMI2_shift_var wOp, bits<8> dqOp, string Prefix, - SDNode OpNode> { - defm W : VBMI2_shift_var_rm_common { + defm W : VBMI2_shift_var_rm_common, VEX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass VBMI2_shift_imm wOp, bits<8> dqOp, string Prefix, - SDNode OpNode> { - defm W : avx512_common_3Op_rm_imm8, VEX_W, EVEX_CD8<16, CD8VF>; + SDNode OpNode, OpndItins itins> { + defm W : avx512_common_3Op_rm_imm8, + VEX_W, EVEX_CD8<16, CD8VF>; defm D : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm Q : avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; } // Concat & Shift -defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv>; -defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv>; -defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld>; -defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd>; +defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>; +defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>; +defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>; +defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>; + // Compress -defm VPCOMPRESSB : compress_by_elt_width <0x63, "vpcompressb", avx512vl_i8_info, - HasVBMI2>, EVEX; -defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", avx512vl_i16_info, - HasVBMI2>, EVEX, VEX_W; +defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS, + avx512vl_i8_info, HasVBMI2>, EVEX; +defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS, + avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; // Expand -defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info, - HasVBMI2>, EVEX; -defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info, - HasVBMI2>, EVEX, VEX_W; +defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND, + avx512vl_i8_info, HasVBMI2>, EVEX; +defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND, + avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; //===----------------------------------------------------------------------===// // VNNI @@ -10167,81 +10577,89 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info, let Constraints = "$src1 = $dst" in multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI> { + OpndItins itins, X86VectorVTInfo VTI> { defm r : AVX512_maskable_3src, - EVEX_4V, T8PD; + VTI.RC:$src2, VTI.RC:$src3)), + itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>; defm m : AVX512_maskable_3src, - EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD; + (VTI.LdFrag addr:$src3))))), + itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable_3src, - EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD; + (VTI.ScalarLdFrag addr:$src3)))), + itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, + T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass VNNI_common Op, string OpStr, SDNode OpNode> { +multiclass VNNI_common Op, string OpStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasVNNI] in - defm Z : VNNI_rmb, EVEX_V512; + defm Z : VNNI_rmb, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { - defm Z256 : VNNI_rmb, EVEX_V256; - defm Z128 : VNNI_rmb, EVEX_V128; + defm Z256 : VNNI_rmb, EVEX_V256; + defm Z128 : VNNI_rmb, EVEX_V128; } } -defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd>; -defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds>; -defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>; -defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>; +// FIXME: Is there a better scheduler itinerary for VPDP? +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>; //===----------------------------------------------------------------------===// // Bit Algorithms //===----------------------------------------------------------------------===// -defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, - avx512vl_i8_info, HasBITALG>, - avx512_unary_lowering; -defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, - avx512vl_i16_info, HasBITALG>, - avx512_unary_lowering, VEX_W; +// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW? +defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P, + avx512vl_i8_info, HasBITALG>; +defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P, + avx512vl_i16_info, HasBITALG>, VEX_W; -multiclass VPSHUFBITQMB_rm { +defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; +defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; + +multiclass VPSHUFBITQMB_rm { defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.RC:$src2), "vpshufbitqmb", "$src2, $src1", "$src1, $src2", (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), - (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD; + (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD, + Sched<[itins.Sched]>; defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.MemOp:$src2), "vpshufbitqmb", "$src2, $src1", "$src1, $src2", (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), - (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, - EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD; + (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))), + itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass VPSHUFBITQMB_common { +multiclass VPSHUFBITQMB_common { let Predicates = [HasBITALG] in - defm Z : VPSHUFBITQMB_rm, EVEX_V512; + defm Z : VPSHUFBITQMB_rm, EVEX_V512; let Predicates = [HasBITALG, HasVLX] in { - defm Z256 : VPSHUFBITQMB_rm, EVEX_V256; - defm Z128 : VPSHUFBITQMB_rm, EVEX_V128; + defm Z256 : VPSHUFBITQMB_rm, EVEX_V256; + defm Z128 : VPSHUFBITQMB_rm, EVEX_V128; } } -defm VPSHUFBITQMB : VPSHUFBITQMB_common; +// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB? +defm VPSHUFBITQMB : VPSHUFBITQMB_common; //===----------------------------------------------------------------------===// // GFNI @@ -10259,13 +10677,13 @@ multiclass GF2P8MULB_avx512_common Op, string OpStr, SDNode OpNode> { } } -defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, - EVEX_CD8<8, CD8VF>, T8PD; +defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, + EVEX_CD8<8, CD8VF>, T8PD; multiclass GF2P8AFFINE_avx512_rmb_imm Op, string OpStr, SDNode OpNode, - X86VectorVTInfo VTI, + OpndItins itins, X86VectorVTInfo VTI, X86VectorVTInfo BcstVTI> - : avx512_3Op_rm_imm8 { + : avx512_3Op_rm_imm8 { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable Op, string OpStr, SDNode OpNode, "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), - (i8 imm:$src3))>, EVEX_B; + (i8 imm:$src3)), itins.rm>, EVEX_B, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode> { +multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode, + OpndItins itins> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in - defm Z : GF2P8AFFINE_avx512_rmb_imm, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { - defm Z256 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V256; - defm Z128 : GF2P8AFFINE_avx512_rmb_imm, EVEX_V128; } } -defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", - X86GF2P8affineinvqb>, - EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; -defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", - X86GF2P8affineqb>, - EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; +defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", + X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>, + EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; +defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", + X86GF2P8affineqb, SSE_INTMUL_ITINS_P>, + EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 95f9e84af819..d09deb5b7584 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -104,7 +104,8 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg; + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg, + Requires<[In64BitMode]>; } let hasSideEffects = 0 in { @@ -143,7 +144,8 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg; + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg, + Requires<[In64BitMode]>; } } // hasSideEffects @@ -326,7 +328,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), "div{q}\t$src", [], IIC_DIV64>, - SchedLoadReg; + SchedLoadReg, Requires<[In64BitMode]>; } // Signed division/remainder. @@ -362,7 +364,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), "idiv{q}\t$src", [], IIC_IDIV64>, - SchedLoadReg; + SchedLoadReg, Requires<[In64BitMode]>; } } // hasSideEffects = 0 @@ -407,7 +409,8 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32; def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), - (implicit EFLAGS)], IIC_UNARY_MEM>; + (implicit EFLAGS)], IIC_UNARY_MEM>, + Requires<[In64BitMode]>; } // SchedRW } // Defs = [EFLAGS] @@ -444,7 +447,8 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>, OpSize32; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", - [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; + [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>, + Requires<[In64BitMode]>; } // SchedRW } // CodeSize @@ -481,7 +485,8 @@ def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), } // CodeSize = 1, hasSideEffects = 0 } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { +let Predicates = [UseIncDec] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +496,12 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", [(store (add (loadi32 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32; +} // Predicates +let Predicates = [UseIncDec, In64BitMode] in { def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // Predicates } // CodeSize = 2, SchedRW let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { @@ -528,7 +536,8 @@ def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), } // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { +let Predicates = [UseIncDec] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -538,9 +547,12 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize32; +} // Predicates +let Predicates = [UseIncDec, In64BitMode] in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // Predicates } // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -992,11 +1004,13 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : BinOpMI8_RMW; def NAME#32mi8 : BinOpMI8_RMW; + let Predicates = [In64BitMode] in def NAME#64mi8 : BinOpMI8_RMW; def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>; def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>; + let Predicates = [In64BitMode] in def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but @@ -1075,11 +1089,13 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : BinOpMI8_RMW_FF; def NAME#32mi8 : BinOpMI8_RMW_FF; + let Predicates = [In64BitMode] in def NAME#64mi8 : BinOpMI8_RMW_FF; def NAME#8mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi16, opnode, MemMRM>; def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>; + let Predicates = [In64BitMode] in def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but @@ -1154,11 +1170,13 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, // first so that they are slightly preferred to the mi forms. def NAME#16mi8 : BinOpMI8_F; def NAME#32mi8 : BinOpMI8_F; + let Predicates = [In64BitMode] in def NAME#64mi8 : BinOpMI8_F; def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>; def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>; def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>; + let Predicates = [In64BitMode] in def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>; // These are for the disassembler since 0x82 opcode behaves like 0x80, but @@ -1231,11 +1249,13 @@ let isCompare = 1 in { def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>; def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>; + let Predicates = [In64BitMode] in def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>; def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>; def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>; def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>; + let Predicates = [In64BitMode] in def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>; // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index b85abfb9ca7f..8dd5e1c0626b 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -113,6 +113,6 @@ defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than // SALC is an undocumented instruction. Information for this instruction can be found // here http://www.rcollins.org/secrets/opcodes/SALC.html // Set AL if carry. -let Uses = [EFLAGS], Defs = [AL] in { - def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>; +let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in { + def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", [], IIC_AHF>, Requires<[Not64BitMode]>; } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 82885687bb42..56c24322e6af 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -32,9 +32,10 @@ def GetLo8XForm : SDNodeXForm; + "", [], IIC_CALL_RI>; // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into @@ -42,16 +43,15 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP] in // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. -let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP] in { +let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP], SchedRW = [WriteALU] in { def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), - "#ADJCALLSTACKDOWN", - []>, - Requires<[NotLP64]>; + "#ADJCALLSTACKDOWN", [], IIC_ALU_NONMEM>, + Requires<[NotLP64]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[NotLP64]>; + [(X86callseq_end timm:$amt1, timm:$amt2)], + IIC_ALU_NONMEM>, Requires<[NotLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>; @@ -62,20 +62,20 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), // pointer before prolog-epilog rewriting occurs. // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. -let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP] in { +let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP], SchedRW = [WriteALU] in { def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", - []>, - Requires<[IsLP64]>; + [], IIC_ALU_NONMEM>, Requires<[IsLP64]>; def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[IsLP64]>; + [(X86callseq_end timm:$amt1, timm:$amt2)], + IIC_ALU_NONMEM>, Requires<[IsLP64]>; } def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>; +let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. let usesCustomInserter = 1, Defs = [EFLAGS] in { @@ -141,7 +141,19 @@ def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), "# dynamic stack allocation", [(X86WinAlloca GR64:$size)]>, Requires<[In64BitMode]>; +} // SchedRW +// These instructions XOR the frame pointer into a GPR. They are used in some +// stack protection schemes. These are post-RA pseudos because we only know the +// frame register after register allocation. +let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in { + def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), + "xorl\t$$FP, $src", [], IIC_BIN_NONMEM>, + Requires<[NotLP64]>, Sched<[WriteALU]>; + def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src), + "xorq\t$$FP $src", [], IIC_BIN_NONMEM>, + Requires<[In64BitMode]>, Sched<[WriteALU]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions @@ -207,17 +219,17 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Requires<[In64BitMode]>; } } -} // SchedRW let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } +} // SchedRW //===----------------------------------------------------------------------===// // Pseudo instructions used by unwind info. // -let isPseudo = 1 in { +let isPseudo = 1, SchedRW = [WriteSystem] in { def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), "#SEH_PushReg $reg", []>; def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), @@ -243,15 +255,15 @@ let isPseudo = 1 in { // This is lowered into a RET instruction by MCInstLower. We need // this so that we don't have to have a MachineBasicBlock which ends // with a RET and also has successors. -let isPseudo = 1 in { +let isPseudo = 1, SchedRW = [WriteJumpLd] in { def MORESTACK_RET: I<0, Pseudo, (outs), (ins), - "", []>; + "", [], IIC_RET>; // This instruction is lowered to a RET followed by a MOV. The two // instructions are not generated on a higher level since then the // verifier sees a MachineBasicBlock ending with a non-terminator. def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), - "", []>; + "", [], IIC_RET>; } //===----------------------------------------------------------------------===// @@ -275,37 +287,40 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; let Predicates = [OptForSize, Not64BitMode], AddedComplexity = 10 in { + let SchedRW = [WriteALU] in { // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, // which only require 3 bytes compared to MOV32ri which requires 5. let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 1)]>; + [(set GR32:$dst, 1)], IIC_ALU_NONMEM>; def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, -1)]>; + [(set GR32:$dst, -1)], IIC_ALU_NONMEM>; } + } // SchedRW // MOV16ri is 4 bytes, so the instructions above are smaller. def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; } -let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5 in { +let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5, + SchedRW = [WriteALU] in { // AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. -// FIXME: Add itinerary class and Schedule. def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", - [(set GR32:$dst, i32immSExt8:$src)]>, - Requires<[OptForMinSize, NotWin64WithoutFP]>; + [(set GR32:$dst, i32immSExt8:$src)], IIC_ALU_NONMEM>, + Requires<[OptForMinSize, NotWin64WithoutFP]>; def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", - [(set GR64:$dst, i64immSExt8:$src)]>, - Requires<[OptForMinSize, NotWin64WithoutFP]>; + [(set GR64:$dst, i64immSExt8:$src)], IIC_ALU_NONMEM>, + Requires<[OptForMinSize, NotWin64WithoutFP]>; } // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. let isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1, hasSideEffects = 0 in -def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", []>; + isPseudo = 1, hasSideEffects = 0, SchedRW = [WriteALU] in +def MOV32ri64 : I<0, Pseudo, (outs GR32:$dst), (ins i64i32imm:$src), "", [], + IIC_ALU_NONMEM>; // This 64-bit pseudo-move can be used for both a 64-bit constant that is // actually the zero-extension of a 32-bit constant and for labels in the @@ -448,6 +463,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { //===----------------------------------------------------------------------===// // Thread Local Storage Instructions // +let SchedRW = [WriteSystem] in { // ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as @@ -513,7 +529,7 @@ def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLSCall_64", [(X86TLSCall addr:$sym)]>, Requires<[In64BitMode]>; - +} // SchedRW //===----------------------------------------------------------------------===// // Conditional Move Pseudo Instructions @@ -528,7 +544,7 @@ multiclass CMOVrr_PSEUDO { EFLAGS)))]>; } -let usesCustomInserter = 1, Uses = [EFLAGS] in { +let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { // X86 doesn't have 8-bit conditional moves. Use a customInserter to // emit control flow. An alternative to this is to mark i8 SELECT as Promote, // however that requires promoting the operands, and can induce additional @@ -566,7 +582,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in { defm _V16I1 : CMOVrr_PSEUDO; defm _V32I1 : CMOVrr_PSEUDO; defm _V64I1 : CMOVrr_PSEUDO; -} // usesCustomInserter = 1, Uses = [EFLAGS] +} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions @@ -789,7 +805,7 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", // register and the register allocator will ignore any use/def of // it. In other words, the register will not fix the clobbering of // RBX that will happen when setting the arguments for the instrucion. -// +// // Unlike the actual related instuction, we mark that this one // defines EBX (instead of using EBX). // The rationale is that we will define RBX during the expansion of @@ -917,7 +933,7 @@ multiclass RELEASE_BINOP_MI { [(atomic_store_64 addr:$dst, (op (atomic_load_64 addr:$dst), GR64:$src))]>; } -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], SchedRW = [WriteMicrocoded] in { defm RELEASE_ADD : RELEASE_BINOP_MI; defm RELEASE_AND : RELEASE_BINOP_MI; defm RELEASE_OR : RELEASE_BINOP_MI; @@ -930,20 +946,20 @@ let Defs = [EFLAGS] in { // FIXME: imm version. // FIXME: Version that doesn't clobber $src, using AVX's VADDSS. // FIXME: This could also handle SIMD operations with *ps and *pd instructions. -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in { multiclass RELEASE_FP_BINOP_MI { def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src), "#BINOP "#NAME#"32mr PSEUDO!", [(atomic_store_32 addr:$dst, - (i32 (bitconvert (op + (i32 (bitconvert (op (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))), - FR32:$src))))]>, Requires<[HasSSE1]>; + FR32:$src))))]>, Requires<[HasSSE1]>; def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src), "#BINOP "#NAME#"64mr PSEUDO!", [(atomic_store_64 addr:$dst, - (i64 (bitconvert (op + (i64 (bitconvert (op (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))), - FR64:$src))))]>, Requires<[HasSSE2]>; + FR64:$src))))]>, Requires<[HasSSE2]>; } defm RELEASE_FADD : RELEASE_FP_BINOP_MI; // FIXME: Add fsub, fmul, fdiv, ... @@ -964,7 +980,7 @@ multiclass RELEASE_UNOP { [(atomic_store_64 addr:$dst, dag64)]>; } -let Defs = [EFLAGS], Predicates = [UseIncDec] in { +let Defs = [EFLAGS], Predicates = [UseIncDec], SchedRW = [WriteMicrocoded] in { defm RELEASE_INC : RELEASE_UNOP< (add (atomic_load_8 addr:$dst), (i8 1)), (add (atomic_load_16 addr:$dst), (i16 1)), @@ -994,18 +1010,19 @@ defm RELEASE_NOT : RELEASE_UNOP< (not (atomic_load_64 addr:$dst))>; */ +let SchedRW = [WriteMicrocoded] in { def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src), - "#RELEASE_MOV8mi PSEUDO!", - [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; + "#RELEASE_MOV8mi PSEUDO!", + [(atomic_store_8 addr:$dst, (i8 imm:$src))]>; def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src), - "#RELEASE_MOV16mi PSEUDO!", - [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; + "#RELEASE_MOV16mi PSEUDO!", + [(atomic_store_16 addr:$dst, (i16 imm:$src))]>; def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src), - "#RELEASE_MOV32mi PSEUDO!", - [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; + "#RELEASE_MOV32mi PSEUDO!", + [(atomic_store_32 addr:$dst, (i32 imm:$src))]>; def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src), - "#RELEASE_MOV64mi32 PSEUDO!", - [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; + "#RELEASE_MOV64mi32 PSEUDO!", + [(atomic_store_64 addr:$dst, i64immSExt32:$src)]>; def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src), "#RELEASE_MOV8mr PSEUDO!", @@ -1032,6 +1049,7 @@ def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src), def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src), "#ACQUIRE_MOV64rm PSEUDO!", [(set GR64:$dst, (atomic_load_64 addr:$src))]>; +} // SchedRW //===----------------------------------------------------------------------===// // DAG Pattern Matching Rules @@ -1128,14 +1146,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[Not64BitMode]>; + Requires<[Not64BitMode, NotUseRetpoline]>; // FIXME: This is disabled for 32-bit PIC mode because the global base // register which is part of the address mode may be assigned a // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[Not64BitMode, IsNotPIC]>; + Requires<[Not64BitMode, IsNotPIC, NotUseRetpoline]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi tglobaladdr:$dst, imm:$off)>, @@ -1147,13 +1165,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotUseRetpoline]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotUseRetpoline]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseRetpoline]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[Not64BitMode, UseRetpoline]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, @@ -1496,6 +1522,10 @@ def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; +def immff00_ffff : ImmLeaf= 0xff00 && Imm <= 0xffff; +}]>; + // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, @@ -1516,7 +1546,7 @@ def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), +def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // h-register tricks. diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 5581fd462a1d..de3b37091044 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -211,11 +211,12 @@ let isCall = 1 in Sched<[WriteJumpLd]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, - OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>; + OpSize32, Requires<[Not64BitMode,NotUseRetpoline]>, + Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, OpSize32, - Requires<[Not64BitMode,FavorMemIndirectCall]>, + Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>, Sched<[WriteJumpLd]>; let Predicates = [Not64BitMode] in { @@ -298,19 +299,19 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)], IIC_CALL_RI>, - Requires<[In64BitMode]>; + Requires<[In64BitMode,NotUseRetpoline]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, - Requires<[In64BitMode,FavorMemIndirectCall]>; + Requires<[In64BitMode,FavorMemIndirectCall, + NotUseRetpoline]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1, Uses = [RSP, SSP], usesCustomInserter = 1, - SchedRW = [WriteJump] in { + isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; @@ -341,6 +342,27 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, } } +let isPseudo = 1, isCall = 1, isCodeGenOnly = 1, + Uses = [RSP, SSP], + usesCustomInserter = 1, + SchedRW = [WriteJump] in { + def RETPOLINE_CALL32 : + PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>, + Requires<[Not64BitMode,UseRetpoline]>; + + def RETPOLINE_CALL64 : + PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, + Requires<[In64BitMode,UseRetpoline]>; + + // Retpoline variant of indirect tail calls. + let isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def RETPOLINE_TCRETURN64 : + PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; + def RETPOLINE_TCRETURN32 : + PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; + } +} + // Conditional tail calls are similar to the above, but they are branches // rather than barriers, and they use EFLAGS. let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1, diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index bb391fd9c817..2a8ab0069b1e 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -9,36 +9,36 @@ // // This file describes the sign and zero extension operations. // -//===----------------------------------------------------------------------===// - -let hasSideEffects = 0 in { - let Defs = [AX], Uses = [AL] in // AX = signext(AL) - def CBW : I<0x98, RawFrm, (outs), (ins), - "{cbtw|cbw}", [], IIC_CBW>, OpSize16, Sched<[WriteALU]>; - let Defs = [EAX], Uses = [AX] in // EAX = signext(AX) - def CWDE : I<0x98, RawFrm, (outs), (ins), - "{cwtl|cwde}", [], IIC_CBW>, OpSize32, Sched<[WriteALU]>; - - let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX) - def CWD : I<0x99, RawFrm, (outs), (ins), - "{cwtd|cwd}", [], IIC_CBW>, OpSize16, Sched<[WriteALU]>; - let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX) - def CDQ : I<0x99, RawFrm, (outs), (ins), - "{cltd|cdq}", [], IIC_CBW>, OpSize32, Sched<[WriteALU]>; - - - let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) - def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", [], IIC_CBW>, Sched<[WriteALU]>; - - let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX) - def CQO : RI<0x99, RawFrm, (outs), (ins), - "{cqto|cqo}", [], IIC_CBW>, Sched<[WriteALU]>; -} - -// Sign/Zero extenders -let hasSideEffects = 0 in { -def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0 in { + let Defs = [AX], Uses = [AL] in // AX = signext(AL) + def CBW : I<0x98, RawFrm, (outs), (ins), + "{cbtw|cbw}", [], IIC_CBW>, OpSize16, Sched<[WriteALU]>; + let Defs = [EAX], Uses = [AX] in // EAX = signext(AX) + def CWDE : I<0x98, RawFrm, (outs), (ins), + "{cwtl|cwde}", [], IIC_CBW>, OpSize32, Sched<[WriteALU]>; + + let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX) + def CWD : I<0x99, RawFrm, (outs), (ins), + "{cwtd|cwd}", [], IIC_CBW>, OpSize16, Sched<[WriteALU]>; + let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX) + def CDQ : I<0x99, RawFrm, (outs), (ins), + "{cltd|cdq}", [], IIC_CBW>, OpSize32, Sched<[WriteALU]>; + + + let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX) + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", [], IIC_CBW>, Sched<[WriteALU]>; + + let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX) + def CQO : RI<0x99, RawFrm, (outs), (ins), + "{cqto|cqo}", [], IIC_CBW>, Sched<[WriteALU]>; +} + +// Sign/Zero extenders +let hasSideEffects = 0 in { +def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, TB, OpSize16, Sched<[WriteALU]>; let mayLoad = 1 in diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 1b706674a4d0..35fa45590fc6 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -51,7 +51,7 @@ multiclass fma3p_rm_213 opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, (MemFrag addr:$src3))))]>, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; } multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, @@ -70,7 +70,7 @@ multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3), - RC:$src1)))]>, Sched<[WriteFMA, ReadAfterLd]>; + RC:$src1)))]>, Sched<[WriteFMALd, ReadAfterLd]>; } multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, @@ -91,7 +91,7 @@ multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1, - RC:$src2)))]>, Sched<[WriteFMA, ReadAfterLd]>; + RC:$src2)))]>, Sched<[WriteFMALd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in @@ -184,7 +184,7 @@ multiclass fma3s_rm_213 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; } multiclass fma3s_rm_231 opc, string OpcodeStr, @@ -204,7 +204,7 @@ multiclass fma3s_rm_231 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; } multiclass fma3s_rm_132 opc, string OpcodeStr, @@ -226,7 +226,7 @@ multiclass fma3s_rm_132 opc, string OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; } let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in @@ -270,7 +270,7 @@ multiclass fma3s_rm_int opc, string OpcodeStr, (ins RC:$src1, RC:$src2, memopr:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>, Sched<[WriteFMA, ReadAfterLd]>; + []>, Sched<[WriteFMALd, ReadAfterLd]>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -374,14 +374,14 @@ multiclass fma4s opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2, (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; def mr : FMA4S, VEX_LIG, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : FMA4S, VEX_W, VEX_LIG, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; def mr_Int : FMA4S_Int, - VEX_LIG, Sched<[WriteFMA, ReadAfterLd]>; + VEX_LIG, Sched<[WriteFMALd, ReadAfterLd]>; let hasSideEffects = 0 in def rr_Int_REV : FMA4S_Int opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2, (ld_frag128 addr:$src3)))]>, VEX_W, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; def mr : FMA4, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; let isCommutable = 1 in def Yrr : FMA4 opc, string OpcodeStr, SDNode OpNode, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2, (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; def Ymr : FMA4, VEX_L, - Sched<[WriteFMA, ReadAfterLd]>; + Sched<[WriteFMALd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; -// Some 'special' instructions -let usesCustomInserter = 1 in { // Expanded after instruction selection. +// Some 'special' instructions - expanded after instruction selection. +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src), [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src), @@ -118,10 +118,12 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1. // f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2. // f80 instructions cannot use SSE and use neither of these. -class FpIf32 pattern> : - FpI_, Requires<[FPStackf32]>; -class FpIf64 pattern> : - FpI_, Requires<[FPStackf64]>; +class FpIf32 pattern, + InstrItinClass itin = NoItinerary> : + FpI_, Requires<[FPStackf32]>; +class FpIf64 pattern, + InstrItinClass itin = NoItinerary> : + FpI_, Requires<[FPStackf64]>; // Factoring for arithmetic. multiclass FPBinary_rr { @@ -139,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // These instructions cannot address 80-bit memory. multiclass FPBinary { +let mayLoad = 1, hasSideEffects = 1 in { // ST(0) = ST(0) + [mem] def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, @@ -175,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst), (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))), (set RFP80:$dst, (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>; -let mayLoad = 1 in def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), !strconcat("f", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), !strconcat("f", asmstring, "{l}\t$src")>; // ST(0) = ST(0) + [memint] @@ -224,30 +225,34 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), (OpNode RFP80:$src1, (X86fild addr:$src2, i32))), (set RFP80:$dst, (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>; -let mayLoad = 1 in def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), !strconcat("fi", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), !strconcat("fi", asmstring, "{l}\t$src")>; +} // mayLoad = 1, hasSideEffects = 1 } let Defs = [FPSW] in { // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. +let hasNoSchedulingInfo = 1 in { defm ADD : FPBinary_rr; defm SUB : FPBinary_rr; defm MUL : FPBinary_rr; defm DIV : FPBinary_rr; +} + // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { defm ADD : FPBinary; defm SUB : FPBinary; defm SUBR: FPBinary; } + let SchedRW = [WriteFMulLd] in { defm MUL : FPBinary; } + let SchedRW = [WriteFDivLd] in { defm DIV : FPBinary; defm DIVR: FPBinary; @@ -274,6 +279,8 @@ def SUB_FPrST0 : FPrST0PInst; def SUB_FST0r : FPST0rInst ; def SUBR_FrST0 : FPrST0Inst ; def SUBR_FPrST0 : FPrST0PInst; +def COM_FST0r : FPST0rInst ; +def COMP_FST0r : FPST0rInst ; } // SchedRW let SchedRW = [WriteFMul] in { def MUL_FST0r : FPST0rInst ; @@ -289,84 +296,98 @@ def DIVR_FrST0 : FPrST0Inst ; def DIVR_FPrST0 : FPrST0PInst; } // SchedRW -def COM_FST0r : FPST0rInst ; -def COMP_FST0r : FPST0rInst ; - // Unary operations. -multiclass FPUnary { +multiclass FPUnary { def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW, - [(set RFP32:$dst, (OpNode RFP32:$src))]>; + [(set RFP32:$dst, (OpNode RFP32:$src))], itin>; def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW, - [(set RFP64:$dst, (OpNode RFP64:$src))]>; + [(set RFP64:$dst, (OpNode RFP64:$src))], itin>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, - [(set RFP80:$dst, (OpNode RFP80:$src))]>; -def _F : FPI<0xD9, fp, (outs), (ins), asmstring>; + [(set RFP80:$dst, (OpNode RFP80:$src))], itin>; +def _F : FPI<0xD9, fp, (outs), (ins), asmstring, itin>; } let Defs = [FPSW] in { -defm CHS : FPUnary; -defm ABS : FPUnary; -let SchedRW = [WriteFSqrt] in { -defm SQRT: FPUnary; + +let SchedRW = [WriteVecLogic] in { +defm CHS : FPUnary; +defm ABS : FPUnary; } -defm SIN : FPUnary; -defm COS : FPUnary; +let SchedRW = [WriteFSqrt] in +defm SQRT: FPUnary; + +let SchedRW = [WriteMicrocoded] in { +defm SIN : FPUnary; +defm COS : FPUnary; +} + +let SchedRW = [WriteFAdd] in { let hasSideEffects = 0 in { def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>; def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; -} -def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; +} // hasSideEffects + +def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst", IIC_FCOMI>; +} // SchedRW } // Defs = [FPSW] // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. +let SchedRW = [WriteFAddLd] in { def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; -def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; -def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">; +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">; + +def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">; +def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">; def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; +} // SchedRW -def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">; -def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">; +let SchedRW = [WriteMicrocoded] in { +def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; +def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">; def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">; def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">; -def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">; -def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">; - def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">; def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\t$dst">; +} // SchedRW // Floating point cmovs. -class FpIf32CMov pattern> : - FpI_, Requires<[FPStackf32, HasCMov]>; -class FpIf64CMov pattern> : - FpI_, Requires<[FPStackf64, HasCMov]>; +class FpIf32CMov pattern, + InstrItinClass itin> : + FpI_, Requires<[FPStackf32, HasCMov]>; +class FpIf64CMov pattern, + InstrItinClass itin> : + FpI_, Requires<[FPStackf64, HasCMov]>; multiclass FPCMov { def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), CondMovFP, [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, - cc, EFLAGS))]>; + cc, EFLAGS))], IIC_FCMOV>; def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), CondMovFP, [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, - cc, EFLAGS))]>; + cc, EFLAGS))], IIC_FCMOV>; def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), CondMovFP, [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, - cc, EFLAGS))]>, + cc, EFLAGS))], IIC_FCMOV>, Requires<[HasCMov]>; } let Defs = [FPSW] in { +let SchedRW = [WriteFAdd] in { let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov; defm CMOVBE : FPCMov; @@ -381,24 +402,26 @@ defm CMOVNP : FPCMov; let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op), - "fcmovb\t{$op, %st(0)|st(0), $op}">; + "fcmovb\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op), - "fcmovbe\t{$op, %st(0)|st(0), $op}">; + "fcmovbe\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op), - "fcmove\t{$op, %st(0)|st(0), $op}">; + "fcmove\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op), - "fcmovu\t{$op, %st(0)|st(0), $op}">; + "fcmovu\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op), - "fcmovnb\t{$op, %st(0)|st(0), $op}">; + "fcmovnb\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op), - "fcmovnbe\t{$op, %st(0)|st(0), $op}">; + "fcmovnbe\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op), - "fcmovne\t{$op, %st(0)|st(0), $op}">; + "fcmovne\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op), - "fcmovnu\t{$op, %st(0)|st(0), $op}">; + "fcmovnu\t{$op, %st(0)|st(0), $op}", IIC_FCMOV>; } // Predicates = [HasCMov] +} // SchedRW // Floating point loads & stores. +let SchedRW = [WriteLoad] in { let canFoldAsLoad = 1 in { def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; @@ -407,7 +430,7 @@ let isReMaterializable = 1 in [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, [(set RFP80:$dst, (loadf80 addr:$src))]>; -} +} // canFoldAsLoad def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>; def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP, @@ -432,7 +455,9 @@ def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP, [(set RFP80:$dst, (X86fild addr:$src, i32))]>; def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP, [(set RFP80:$dst, (X86fild addr:$src, i64))]>; +} // SchedRW +let SchedRW = [WriteStore] in { def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, [(store RFP32:$src, addr:$op)]>; def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, @@ -451,9 +476,11 @@ def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>; def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>; def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>; def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>; -} +} // mayStore + def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP, [(store RFP80:$src, addr:$op)]>; + let mayStore = 1, hasSideEffects = 0 in { def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>; def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>; @@ -464,7 +491,8 @@ def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>; def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; -} +} // mayStore +} // SchedRW let mayLoad = 1, SchedRW = [WriteLoad] in { def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src", @@ -504,7 +532,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst", } // FISTTP requires SSE3 even though it's a FPStack op. -let Predicates = [HasSSE3] in { +let Predicates = [HasSSE3], SchedRW = [WriteStore] in { def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, [(X86fp_to_i16mem RFP32:$src, addr:$op)]>; def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, @@ -543,7 +571,7 @@ def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op", IIC_FXCH>; } // Floating point constant loads. -let isReMaterializable = 1 in { +let isReMaterializable = 1, SchedRW = [WriteZero] in { def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, [(set RFP32:$dst, fpimm0)]>; def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP, @@ -667,19 +695,18 @@ def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>; def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>; } // Defs = [FPSW] -let Predicates = [HasFXSR] in { - def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB; - def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), - "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], - IIC_FXSAVE>, TB, Requires<[In64BitMode]>; - def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, - TB; - def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], - IIC_FXRSTOR>, TB, Requires<[In64BitMode]>; -} // Predicates = [FeatureFXSR] +def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB, + Requires<[HasFXSR]>; +def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst), + "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], + IIC_FXSAVE>, TB, Requires<[HasFXSR, In64BitMode]>; +def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, + TB, Requires<[HasFXSR]>; +def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)], + IIC_FXRSTOR>, TB, Requires<[HasFXSR, In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 2a6ed02fadab..0b266e5591b4 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -349,8 +349,9 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{54} = hasEVEX_RC; } -class PseudoI pattern> - : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> { +class PseudoI pattern, + InstrItinClass itin = NoItinerary> + : X86Inst<0, Pseudo, NoImm, oops, iops, "", itin> { let Pattern = pattern; } @@ -423,9 +424,8 @@ class FPI o, Format F, dag outs, dag ins, string asm, // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_ pattern, InstrItinClass itin = NoItinerary> - : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> { + : PseudoI { let FPForm = fp; - let Pattern = pattern; } // Templates for instructions that use a 16- or 32-bit segmented address as diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index cb27fcce3493..63a62ed636af 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -465,9 +465,10 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; -def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, - [SDTCisVec<1>, - SDTCisPtrTy<2>]>, []>; +def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCVecEltisVT<1, i1>, + SDTCisPtrTy<2>]>>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; @@ -670,8 +671,6 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTCisOpSmallerThanOp<0, 1>, SDTCisVT<2, i32>]>>; -def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; - // galois field arithmetic def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index bd8d447fb883..de1a3b479704 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -47,8 +47,9 @@ using namespace llvm; #include "X86GenInstrInfo.inc" static cl::opt -NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); + NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions"), + cl::Hidden); static cl::opt PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" @@ -349,6 +350,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, + { X86::CALL16r, X86::CALL16m, TB_FOLDED_LOAD }, { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, @@ -361,6 +363,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, + { X86::CMP8ri8, X86::CMP8mi8, TB_FOLDED_LOAD }, { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, @@ -375,6 +378,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, + { X86::JMP16r, X86::JMP16m, TB_FOLDED_LOAD }, { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, @@ -538,8 +542,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }, // F16C foldable instructions - { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE }, - { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE } + { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }, + { X86::VCVTPS2PHZ256rr, X86::VCVTPS2PHZ256mr, TB_FOLDED_STORE }, + { X86::VCVTPS2PHZrr, X86::VCVTPS2PHZmr, TB_FOLDED_STORE }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) { @@ -558,14 +563,30 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, + { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, + { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 }, + { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 }, + { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE }, + { X86::CVTSD2SI64rr_Int, X86::CVTSD2SI64rm_Int, TB_NO_REVERSE }, + { X86::CVTSD2SIrr_Int, X86::CVTSD2SIrm_Int, TB_NO_REVERSE }, { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI642SDrr, X86::CVTSI642SDrm, 0 }, { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI642SSrr, X86::CVTSI642SSrm, 0 }, { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTSS2SI64rr_Int, X86::CVTSS2SI64rm_Int, TB_NO_REVERSE }, + { X86::CVTSS2SIrr_Int, X86::CVTSS2SIrm_Int, TB_NO_REVERSE }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SI64rr_Int,X86::CVTTSD2SI64rm_Int, TB_NO_REVERSE }, { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSD2SIrr_Int, X86::CVTTSD2SIrm_Int, TB_NO_REVERSE }, + { X86::CVTTSS2SI64rr_Int,X86::CVTTSS2SI64rm_Int, TB_NO_REVERSE }, + { X86::CVTTSS2SIrr_Int, X86::CVTTSS2SIrm_Int, TB_NO_REVERSE }, { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, { X86::IMUL16rri, X86::IMUL16rmi, 0 }, @@ -576,22 +597,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE }, { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE }, - { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE }, - { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE }, - { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE }, - { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE }, - { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, - { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 }, - { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 }, - { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 }, - { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE }, - { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, - { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE }, { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE }, { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE }, { X86::MOV16rr, X86::MOV16rm, 0 }, @@ -667,11 +672,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, // MMX version of foldable instructions - { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, + { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, TB_ALIGN_16 }, { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, - { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, - { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, - { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, + { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, TB_NO_REVERSE }, + { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, TB_ALIGN_16 }, + { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, TB_NO_REVERSE }, { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, @@ -693,17 +698,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE }, { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE }, { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSD2SI64rr_Int,X86::VCVTTSD2SI64rm_Int,TB_NO_REVERSE }, { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE }, + { X86::VCVTTSD2SIrr_Int,X86::VCVTTSD2SIrm_Int, TB_NO_REVERSE }, { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE }, + { X86::VCVTTSS2SI64rr_Int,X86::VCVTTSS2SI64rm_Int,TB_NO_REVERSE }, { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE }, - { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE }, - { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE }, + { X86::VCVTTSS2SIrr_Int,X86::VCVTTSS2SIrm_Int, TB_NO_REVERSE }, + { X86::VCVTSD2SI64rr_Int, X86::VCVTSD2SI64rm_Int, TB_NO_REVERSE }, + { X86::VCVTSD2SIrr_Int, X86::VCVTSD2SIrm_Int, TB_NO_REVERSE }, + { X86::VCVTSS2SI64rr_Int, X86::VCVTSS2SI64rm_Int, TB_NO_REVERSE }, + { X86::VCVTSS2SIrr_Int, X86::VCVTSS2SIrm_Int, TB_NO_REVERSE }, { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE }, { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 }, { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 }, @@ -971,19 +976,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, + { X86::VPOPCNTBZrr, X86::VPOPCNTBZrm, 0 }, { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 }, { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 }, + { X86::VPOPCNTWZrr, X86::VPOPCNTWZrm, 0 }, { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, - { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, + { X86::VPSLLDQZrr, X86::VPSLLDQZrm, 0 }, { X86::VPSLLDZri, X86::VPSLLDZmi, 0 }, { X86::VPSLLQZri, X86::VPSLLQZmi, 0 }, { X86::VPSLLWZri, X86::VPSLLWZmi, 0 }, { X86::VPSRADZri, X86::VPSRADZmi, 0 }, { X86::VPSRAQZri, X86::VPSRAQZmi, 0 }, { X86::VPSRAWZri, X86::VPSRAWZmi, 0 }, - { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, + { X86::VPSRLDQZrr, X86::VPSRLDQZrm, 0 }, { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, @@ -1028,6 +1035,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 }, { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 }, { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE }, + { X86::VPOPCNTBZ256rr, X86::VPOPCNTBZ256rm, 0 }, + { X86::VPOPCNTDZ256rr, X86::VPOPCNTDZ256rm, 0 }, + { X86::VPOPCNTQZ256rr, X86::VPOPCNTQZ256rm, 0 }, + { X86::VPOPCNTWZ256rr, X86::VPOPCNTWZ256rm, 0 }, { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, @@ -1080,6 +1091,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE }, { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE }, { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE }, + { X86::VPOPCNTBZ128rr, X86::VPOPCNTBZ128rm, 0 }, + { X86::VPOPCNTDZ128rr, X86::VPOPCNTDZ128rm, 0 }, + { X86::VPOPCNTQZ128rr, X86::VPOPCNTQZ128rm, 0 }, + { X86::VPOPCNTWZ128rr, X86::VPOPCNTWZ128rm, 0 }, { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, @@ -1096,8 +1111,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, // F16C foldable instructions - { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, + { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, TB_NO_REVERSE }, { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 }, + { X86::VCVTPH2PSZ128rr, X86::VCVTPH2PSZ128rm, TB_NO_REVERSE }, + { X86::VCVTPH2PSZ256rr, X86::VCVTPH2PSZ256rm, 0 }, + { X86::VCVTPH2PSZrr, X86::VCVTPH2PSZrm, 0 }, // AES foldable instructions { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, @@ -1114,8 +1132,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) } static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { + { X86::ADC16rr, X86::ADC16rm, 0 }, { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADC8rr, X86::ADC8rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, { X86::ADD32rr, X86::ADD32rm, 0 }, @@ -1194,9 +1214,16 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSDrr_Int, X86::CMPSDrm_Int, TB_NO_REVERSE }, { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::CMPSSrr_Int, X86::CMPSSrm_Int, TB_NO_REVERSE }, + { X86::CRC32r32r16, X86::CRC32r32m16, 0 }, { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, + { X86::CRC32r32r8, X86::CRC32r32m8, 0 }, { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, + { X86::CRC32r64r8, X86::CRC32r64m8, 0 }, + { X86::CVTSD2SSrr_Int, X86::CVTSD2SSrm_Int, TB_NO_REVERSE }, + { X86::CVTSS2SDrr_Int, X86::CVTSS2SDrm_Int, TB_NO_REVERSE }, { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, { X86::DIVSDrr, X86::DIVSDrm, 0 }, @@ -1212,14 +1239,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE }, + { X86::CVTSI642SDrr_Int,X86::CVTSI642SDrm_Int, 0 }, + { X86::CVTSI2SDrr_Int, X86::CVTSI2SDrm_Int, 0 }, + { X86::CVTSI642SSrr_Int,X86::CVTSI642SSrm_Int, 0 }, + { X86::CVTSI2SSrr_Int, X86::CVTSI2SSrm_Int, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, @@ -1346,8 +1369,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE }, { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE }, + { X86::SBB16rr, X86::SBB16rm, 0 }, { X86::SBB32rr, X86::SBB32rm, 0 }, { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SBB8rr, X86::SBB8rm, 0 }, { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, { X86::SUB16rr, X86::SUB16rm, 0 }, @@ -1464,14 +1489,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, // AVX 128-bit versions of foldable instructions - { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, - { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, + { X86::VCVTSI642SDrr, X86::VCVTSI642SDrm, 0 }, + { X86::VCVTSI642SDrr_Int, X86::VCVTSI642SDrm_Int, 0 }, { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, - { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, - { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, - { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, + { X86::VCVTSI2SDrr_Int, X86::VCVTSI2SDrm_Int, 0 }, + { X86::VCVTSI642SSrr, X86::VCVTSI642SSrm, 0 }, + { X86::VCVTSI642SSrr_Int, X86::VCVTSI642SSrm_Int, 0 }, { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, - { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, + { X86::VCVTSI2SSrr_Int, X86::VCVTSI2SSrm_Int, 0 }, { X86::VADDPDrr, X86::VADDPDrm, 0 }, { X86::VADDPSrr, X86::VADDPSrm, 0 }, { X86::VADDSDrr, X86::VADDSDrm, 0 }, @@ -1491,7 +1516,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, + { X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE }, { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, + { X86::VCMPSSrr_Int, X86::VCMPSSrm_Int, TB_NO_REVERSE }, { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, @@ -1504,8 +1531,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, - { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE }, - { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE }, { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 }, { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 }, { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 }, @@ -2041,7 +2066,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, { X86::VPORDZrr, X86::VPORDZrm, 0 }, { X86::VPORQZrr, X86::VPORQZrm, 0 }, - { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, + { X86::VPSADBWZrr, X86::VPSADBWZrm, 0 }, { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, @@ -2079,6 +2104,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 }, { X86::VPXORDZrr, X86::VPXORDZrm, 0 }, { X86::VPXORQZrr, X86::VPXORQZrm, 0 }, + { X86::VSHUFF32X4Zrri, X86::VSHUFF32X4Zrmi, 0 }, + { X86::VSHUFF64X2Zrri, X86::VSHUFF64X2Zrmi, 0 }, + { X86::VSHUFI64X2Zrri, X86::VSHUFI64X2Zrmi, 0 }, + { X86::VSHUFI32X4Zrri, X86::VSHUFI32X4Zrmi, 0 }, { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, @@ -2355,6 +2384,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, + { X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi, 0 }, + { X86::VSHUFF64X2Z256rri, X86::VSHUFF64X2Z256rmi, 0 }, + { X86::VSHUFI32X4Z256rri, X86::VSHUFI32X4Z256rmi, 0 }, + { X86::VSHUFI64X2Z256rri, X86::VSHUFI64X2Z256rmi, 0 }, { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, @@ -2403,8 +2436,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + { X86::VPOPCNTBZrrkz, X86::VPOPCNTBZrmkz, 0 }, { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 }, { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 }, + { X86::VPOPCNTWZrrkz, X86::VPOPCNTWZrmkz, 0 }, { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, @@ -2445,6 +2480,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 }, { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 }, { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE }, + { X86::VPOPCNTBZ256rrkz, X86::VPOPCNTBZ256rmkz, 0 }, + { X86::VPOPCNTDZ256rrkz, X86::VPOPCNTDZ256rmkz, 0 }, + { X86::VPOPCNTQZ256rrkz, X86::VPOPCNTQZ256rmkz, 0 }, + { X86::VPOPCNTWZ256rrkz, X86::VPOPCNTWZ256rmkz, 0 }, { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 }, { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 }, { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 }, @@ -2482,6 +2521,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE }, + { X86::VPOPCNTBZ128rrkz, X86::VPOPCNTBZ128rmkz, 0 }, + { X86::VPOPCNTDZ128rrkz, X86::VPOPCNTDZ128rmkz, 0 }, + { X86::VPOPCNTQZ128rrkz, X86::VPOPCNTQZ128rmkz, 0 }, + { X86::VPOPCNTWZ128rrkz, X86::VPOPCNTWZ128rmkz, 0 }, { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 }, { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 }, { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 }, @@ -2655,14 +2698,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, - { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 }, - { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 }, + { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, TB_NO_REVERSE }, { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 }, { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 }, { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, - { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 }, - { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 }, + { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, TB_NO_REVERSE }, + { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, TB_NO_REVERSE }, { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE }, @@ -2758,6 +2801,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, + { X86::VSHUFF32X4Zrrikz, X86::VSHUFF32X4Zrmikz, 0 }, + { X86::VSHUFF64X2Zrrikz, X86::VSHUFF64X2Zrmikz, 0 }, + { X86::VSHUFI32X4Zrrikz, X86::VSHUFI32X4Zrmikz, 0 }, + { X86::VSHUFI64X2Zrrikz, X86::VSHUFI64X2Zrmikz, 0 }, { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, @@ -2887,6 +2934,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, + { X86::VSHUFF32X4Z256rrikz, X86::VSHUFF32X4Z256rmikz, 0 }, + { X86::VSHUFF64X2Z256rrikz, X86::VSHUFF64X2Z256rmikz, 0 }, + { X86::VSHUFI32X4Z256rrikz, X86::VSHUFI32X4Z256rmikz, 0 }, + { X86::VSHUFI64X2Z256rrikz, X86::VSHUFI64X2Z256rmikz, 0 }, { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, @@ -3044,8 +3095,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + { X86::VPOPCNTBZrrk, X86::VPOPCNTBZrmk, 0 }, { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 }, { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 }, + { X86::VPOPCNTWZrrk, X86::VPOPCNTWZrmk, 0 }, { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, @@ -3086,6 +3139,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 }, { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 }, { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE }, + { X86::VPOPCNTBZ256rrk, X86::VPOPCNTBZ256rmk, 0 }, + { X86::VPOPCNTDZ256rrk, X86::VPOPCNTDZ256rmk, 0 }, + { X86::VPOPCNTQZ256rrk, X86::VPOPCNTQZ256rmk, 0 }, + { X86::VPOPCNTWZ256rrk, X86::VPOPCNTWZ256rmk, 0 }, { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 }, { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 }, { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 }, @@ -3123,6 +3180,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE }, { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE }, { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE }, + { X86::VPOPCNTBZ128rrk, X86::VPOPCNTBZ128rmk, 0 }, + { X86::VPOPCNTDZ128rrk, X86::VPOPCNTDZ128rmk, 0 }, + { X86::VPOPCNTQZ128rrk, X86::VPOPCNTQZ128rmk, 0 }, + { X86::VPOPCNTWZ128rrk, X86::VPOPCNTWZ128rmk, 0 }, { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 }, { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 }, { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 }, @@ -3352,6 +3413,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 }, { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 }, { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 }, + { X86::VPSUBWZrrk, X86::VPSUBWZrmk, 0 }, { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 }, { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 }, { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 }, @@ -3364,6 +3426,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, + { X86::VSHUFF32X4Zrrik, X86::VSHUFF32X4Zrmik, 0 }, + { X86::VSHUFF64X2Zrrik, X86::VSHUFF64X2Zrmik, 0 }, + { X86::VSHUFI32X4Zrrik, X86::VSHUFI32X4Zrmik, 0 }, + { X86::VSHUFI64X2Zrrik, X86::VSHUFI64X2Zrmik, 0 }, { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, @@ -3509,6 +3575,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, + { X86::VSHUFF32X4Z256rrik, X86::VSHUFF32X4Z256rmik, 0 }, + { X86::VSHUFF64X2Z256rrik, X86::VSHUFF64X2Z256rmik, 0 }, + { X86::VSHUFI32X4Z256rrik, X86::VSHUFI32X4Z256rmik, 0 }, + { X86::VSHUFI64X2Z256rrik, X86::VSHUFI64X2Z256rmik, 0 }, { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, @@ -4468,7 +4538,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA( unsigned leaInReg2 = 0; MachineInstr *InsMI2 = nullptr; if (Src == Src2) { - // ADD16rr %reg1028, %reg1028 + // ADD16rr killed %reg1028, %reg1028 // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { @@ -5196,7 +5266,6 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, WorkingMI.setDesc(get(Opc)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); - break; } case X86::BLENDPDrri: case X86::BLENDPSrri: @@ -7632,7 +7701,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI, /// This is used for mapping: /// %xmm4 = V_SET0 /// to: -/// %xmm4 = PXORrr %xmm4, %xmm4 +/// %xmm4 = PXORrr undef %xmm4, undef %xmm4 /// static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc) { @@ -7725,7 +7794,7 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); bool NeedsDwarfCFI = !IsWin64Prologue && - (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry()); + (MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry()); bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI; if (EmitCFI) { TFL->BuildCFI(MBB, I, DL, @@ -7761,6 +7830,18 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); } +static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { + MachineBasicBlock &MBB = *MIB->getParent(); + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &Subtarget = MF.getSubtarget(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + unsigned XorOp = + MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr; + MIB->setDesc(TII.get(XorOp)); + MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef); + return true; +} + // This is used to handle spills for 128/256-bit registers when we have AVX512, // but not VLX. If it uses an extended register we need to use an instruction // that loads the lower 128/256-bit, but is available with only AVX512F. @@ -7829,6 +7910,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return Expand2AddrUndef(MIB, get(X86::SBB32rr)); case X86::SETB_C64r: return Expand2AddrUndef(MIB, get(X86::SBB64rr)); + case X86::MMX_SET0: + return Expand2AddrUndef(MIB, get(X86::MMX_PXORirr)); case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: @@ -7955,6 +8038,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; + case X86::XOR64_FP: + case X86::XOR32_FP: + return expandXorFP(MIB, *this); } return false; } @@ -7975,16 +8061,17 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { /// /// FIXME: This should be turned into a TSFlags. /// -static bool hasPartialRegUpdate(unsigned Opcode) { +static bool hasPartialRegUpdate(unsigned Opcode, + const X86Subtarget &Subtarget) { switch (Opcode) { case X86::CVTSI2SSrr: case X86::CVTSI2SSrm: - case X86::CVTSI2SS64rr: - case X86::CVTSI2SS64rm: + case X86::CVTSI642SSrr: + case X86::CVTSI642SSrm: case X86::CVTSI2SDrr: case X86::CVTSI2SDrm: - case X86::CVTSI2SD64rr: - case X86::CVTSI2SD64rm: + case X86::CVTSI642SDrr: + case X86::CVTSI642SDrm: case X86::CVTSD2SSrr: case X86::CVTSD2SSrm: case X86::CVTSS2SDrr: @@ -8014,17 +8101,32 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::SQRTSDr_Int: case X86::SQRTSDm_Int: return true; + // GPR + case X86::POPCNT32rm: + case X86::POPCNT32rr: + case X86::POPCNT64rm: + case X86::POPCNT64rr: + return Subtarget.hasPOPCNTFalseDeps(); + case X86::LZCNT32rm: + case X86::LZCNT32rr: + case X86::LZCNT64rm: + case X86::LZCNT64rr: + case X86::TZCNT32rm: + case X86::TZCNT32rr: + case X86::TZCNT64rm: + case X86::TZCNT64rr: + return Subtarget.hasLZCNTFalseDeps(); } return false; } -/// Inform the ExecutionDepsFix pass how many idle +/// Inform the BreakFalseDeps pass how many idle /// instructions we would like before a partial register update. unsigned X86InstrInfo::getPartialRegUpdateClearance( const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode())) + if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget)) return 0; // If MI is marked as reading Reg, the partial register update is wanted. @@ -8050,28 +8152,28 @@ static bool hasUndefRegUpdate(unsigned Opcode) { switch (Opcode) { case X86::VCVTSI2SSrr: case X86::VCVTSI2SSrm: - case X86::Int_VCVTSI2SSrr: - case X86::Int_VCVTSI2SSrm: - case X86::VCVTSI2SS64rr: - case X86::VCVTSI2SS64rm: - case X86::Int_VCVTSI2SS64rr: - case X86::Int_VCVTSI2SS64rm: + case X86::VCVTSI2SSrr_Int: + case X86::VCVTSI2SSrm_Int: + case X86::VCVTSI642SSrr: + case X86::VCVTSI642SSrm: + case X86::VCVTSI642SSrr_Int: + case X86::VCVTSI642SSrm_Int: case X86::VCVTSI2SDrr: case X86::VCVTSI2SDrm: - case X86::Int_VCVTSI2SDrr: - case X86::Int_VCVTSI2SDrm: - case X86::VCVTSI2SD64rr: - case X86::VCVTSI2SD64rm: - case X86::Int_VCVTSI2SD64rr: - case X86::Int_VCVTSI2SD64rm: + case X86::VCVTSI2SDrr_Int: + case X86::VCVTSI2SDrm_Int: + case X86::VCVTSI642SDrr: + case X86::VCVTSI642SDrm: + case X86::VCVTSI642SDrr_Int: + case X86::VCVTSI642SDrm_Int: case X86::VCVTSD2SSrr: case X86::VCVTSD2SSrm: - case X86::Int_VCVTSD2SSrr: - case X86::Int_VCVTSD2SSrm: + case X86::VCVTSD2SSrr_Int: + case X86::VCVTSD2SSrm_Int: case X86::VCVTSS2SDrr: case X86::VCVTSS2SDrm: - case X86::Int_VCVTSS2SDrr: - case X86::Int_VCVTSS2SDrm: + case X86::VCVTSS2SDrr_Int: + case X86::VCVTSS2SDrm_Int: case X86::VRCPSSr: case X86::VRCPSSr_Int: case X86::VRCPSSm: @@ -8176,17 +8278,17 @@ static bool hasUndefRegUpdate(unsigned Opcode) { return false; } -/// Inform the ExecutionDepsFix pass how many idle instructions we would like +/// Inform the BreakFalseDeps pass how many idle instructions we would like /// before certain undef register reads. /// /// This catches the VCVTSI2SD family of instructions: /// -/// vcvtsi2sdq %rax, %xmm0, %xmm14 +/// vcvtsi2sdq %rax, undef %xmm0, %xmm14 /// /// We should to be careful *not* to catch VXOR idioms which are presumably /// handled specially in the pipeline: /// -/// vxorps %xmm1, %xmm1, %xmm1 +/// vxorps undef %xmm1, undef %xmm1, %xmm1 /// /// Like getPartialRegUpdateClearance, this makes a strong assumption that the /// high bits that are passed-through are not live. @@ -8230,6 +8332,20 @@ void X86InstrInfo::breakPartialRegDependency( .addReg(XReg, RegState::Undef) .addReg(Reg, RegState::ImplicitDefine); MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::GR64RegClass.contains(Reg)) { + // Using XOR32rr because it has shorter encoding and zeros up the upper bits + // as well. + unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit); + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg) + .addReg(XReg, RegState::Undef) + .addReg(XReg, RegState::Undef) + .addReg(Reg, RegState::ImplicitDefine); + MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::GR32RegClass.contains(Reg)) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MI.addRegisterKilled(Reg, TRI, true); } } @@ -8393,7 +8509,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // For CPUs that favor the register form of a call or push, // do not fold loads into calls or pushes, unless optimizing for size // aggressively. - if (isSlowTwoMemOps && !MF.getFunction()->optForMinSize() && + if (isSlowTwoMemOps && !MF.getFunction().optForMinSize() && (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || MI.getOpcode() == X86::PUSH64r)) @@ -8401,7 +8517,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // Avoid partial register update stalls unless optimizing for size. // TODO: we should block undef reg update as well. - if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) + if (!MF.getFunction().optForSize() && + hasPartialRegUpdate(MI.getOpcode(), Subtarget)) return nullptr; unsigned NumOps = MI.getDesc().getNumOperands(); @@ -8570,7 +8687,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, // Unless optimizing for size, don't fold to avoid partial // register update stalls // TODO: we should block undef reg update as well. - if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) + if (!MF.getFunction().optForSize() && + hasPartialRegUpdate(MI.getOpcode(), Subtarget)) return nullptr; // Don't fold subreg spills, or reloads that use a high subreg. @@ -8645,7 +8763,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, // instruction isn't scalar (SS). switch (UserOpc) { case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int: - case X86::Int_CMPSSrr: case X86::Int_VCMPSSrr: case X86::VCMPSSZrr_Int: + case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int: case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int: case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int: case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int: @@ -8696,7 +8814,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, // instruction isn't scalar (SD). switch (UserOpc) { case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int: - case X86::Int_CMPSDrr: case X86::Int_VCMPSDrr: case X86::VCMPSDZrr_Int: + case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int: case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int: case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int: case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int: @@ -8769,7 +8887,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( // Avoid partial register update stalls unless optimizing for size. // TODO: we should block undef reg update as well. - if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) + if (!MF.getFunction().optForSize() && + hasPartialRegUpdate(MI.getOpcode(), Subtarget)) return nullptr; // Determine the alignment of the load. @@ -8793,6 +8912,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::AVX512_128_SET0: Alignment = 16; break; + case X86::MMX_SET0: case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: Alignment = 8; @@ -8826,6 +8946,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( SmallVector MOs; switch (LoadMI.getOpcode()) { + case X86::MMX_SET0: case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX2_SETALLONES: @@ -8865,16 +8986,18 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Type *Ty; unsigned Opc = LoadMI.getOpcode(); if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS) - Ty = Type::getFloatTy(MF.getFunction()->getContext()); + Ty = Type::getFloatTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) - Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + Ty = Type::getDoubleTy(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) - Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16); + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES) - Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 8); + else if (Opc == X86::MMX_SET0) + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 2); else - Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()), 4); bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES || Opc == X86::AVX512_512_SETALLONES || @@ -9610,8 +9733,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr}, { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}, { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, - { X86::VBLENDPSrri, X86::VBLENDPSrri, X86::VPBLENDDrri }, - { X86::VBLENDPSrmi, X86::VBLENDPSrmi, X86::VPBLENDDrmi }, { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri }, { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi }, { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi }, @@ -9865,6 +9986,24 @@ static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = { X86::VPXORQZrmbkz, X86::VPXORDZrmbkz }, }; +// NOTE: These should only be used by the custom domain methods. +static const uint16_t ReplaceableCustomInstrs[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi }, + { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri }, + { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi }, + { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri }, + { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi }, + { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri }, +}; +static const uint16_t ReplaceableCustomAVX2Instrs[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi }, + { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri }, + { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi }, + { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri }, +}; + // FIXME: Some shuffle and unpack instructions have equivalents in different // domains, but they require a bit more work than just switching opcodes. @@ -9885,13 +10024,177 @@ static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain, return nullptr; } +// Helper to attempt to widen/narrow blend masks. +static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, + unsigned NewWidth, unsigned *pNewMask = nullptr) { + assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) && + "Illegal blend mask scale"); + unsigned NewMask = 0; + + if ((OldWidth % NewWidth) == 0) { + unsigned Scale = OldWidth / NewWidth; + unsigned SubMask = (1u << Scale) - 1; + for (unsigned i = 0; i != NewWidth; ++i) { + unsigned Sub = (OldMask >> (i * Scale)) & SubMask; + if (Sub == SubMask) + NewMask |= (1u << i); + else if (Sub != 0x0) + return false; + } + } else { + unsigned Scale = NewWidth / OldWidth; + unsigned SubMask = (1u << Scale) - 1; + for (unsigned i = 0; i != OldWidth; ++i) { + if (OldMask & (1 << i)) { + NewMask |= (SubMask << (i * Scale)); + } + } + } + + if (pNewMask) + *pNewMask = NewMask; + return true; +} + +uint16_t X86InstrInfo::getExecutionDomainCustom(const MachineInstr &MI) const { + unsigned Opcode = MI.getOpcode(); + unsigned NumOperands = MI.getNumOperands(); + + auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) { + uint16_t validDomains = 0; + if (MI.getOperand(NumOperands - 1).isImm()) { + unsigned Imm = MI.getOperand(NumOperands - 1).getImm(); + if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4)) + validDomains |= 0x2; // PackedSingle + if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2)) + validDomains |= 0x4; // PackedDouble + if (!Is256 || Subtarget.hasAVX2()) + validDomains |= 0x8; // PackedInt + } + return validDomains; + }; + + switch (Opcode) { + case X86::BLENDPDrmi: + case X86::BLENDPDrri: + case X86::VBLENDPDrmi: + case X86::VBLENDPDrri: + return GetBlendDomains(2, false); + case X86::VBLENDPDYrmi: + case X86::VBLENDPDYrri: + return GetBlendDomains(4, true); + case X86::BLENDPSrmi: + case X86::BLENDPSrri: + case X86::VBLENDPSrmi: + case X86::VBLENDPSrri: + case X86::VPBLENDDrmi: + case X86::VPBLENDDrri: + return GetBlendDomains(4, false); + case X86::VBLENDPSYrmi: + case X86::VBLENDPSYrri: + case X86::VPBLENDDYrmi: + case X86::VPBLENDDYrri: + return GetBlendDomains(8, true); + case X86::PBLENDWrmi: + case X86::PBLENDWrri: + case X86::VPBLENDWrmi: + case X86::VPBLENDWrri: + // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks. + case X86::VPBLENDWYrmi: + case X86::VPBLENDWYrri: + return GetBlendDomains(8, false); + } + return 0; +} + +bool X86InstrInfo::setExecutionDomainCustom(MachineInstr &MI, + unsigned Domain) const { + assert(Domain > 0 && Domain < 4 && "Invalid execution domain"); + uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; + assert(dom && "Not an SSE instruction"); + + unsigned Opcode = MI.getOpcode(); + unsigned NumOperands = MI.getNumOperands(); + + auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) { + if (MI.getOperand(NumOperands - 1).isImm()) { + unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255; + Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm); + unsigned NewImm = Imm; + + const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs); + if (!table) + table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + + if (Domain == 1) { // PackedSingle + AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); + } else if (Domain == 2) { // PackedDouble + AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm); + } else if (Domain == 3) { // PackedInt + if (Subtarget.hasAVX2()) { + // If we are already VPBLENDW use that, else use VPBLENDD. + if ((ImmWidth / (Is256 ? 2 : 1)) != 8) { + table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs); + AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm); + } + } else { + assert(!Is256 && "128-bit vector expected"); + AdjustBlendMask(Imm, ImmWidth, 8, &NewImm); + } + } + + assert(table && table[Domain - 1] && "Unknown domain op"); + MI.setDesc(get(table[Domain - 1])); + MI.getOperand(NumOperands - 1).setImm(NewImm & 255); + } + return true; + }; + + switch (Opcode) { + case X86::BLENDPDrmi: + case X86::BLENDPDrri: + case X86::VBLENDPDrmi: + case X86::VBLENDPDrri: + return SetBlendDomain(2, false); + case X86::VBLENDPDYrmi: + case X86::VBLENDPDYrri: + return SetBlendDomain(4, true); + case X86::BLENDPSrmi: + case X86::BLENDPSrri: + case X86::VBLENDPSrmi: + case X86::VBLENDPSrri: + case X86::VPBLENDDrmi: + case X86::VPBLENDDrri: + return SetBlendDomain(4, false); + case X86::VBLENDPSYrmi: + case X86::VBLENDPSYrri: + case X86::VPBLENDDYrmi: + case X86::VPBLENDDYrri: + return SetBlendDomain(8, true); + case X86::PBLENDWrmi: + case X86::PBLENDWrri: + case X86::VPBLENDWrmi: + case X86::VPBLENDWrri: + return SetBlendDomain(8, false); + case X86::VPBLENDWYrmi: + case X86::VPBLENDWYrri: + return SetBlendDomain(16, true); + } + return false; +} + std::pair X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; unsigned opcode = MI.getOpcode(); uint16_t validDomains = 0; if (domain) { - if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) { + // Attempt to match for custom instructions. + validDomains = getExecutionDomainCustom(MI); + if (validDomains) + return std::make_pair(domain, validDomains); + + if (lookup(opcode, domain, ReplaceableInstrs)) { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; @@ -9923,6 +10226,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); + + // Attempt to match for custom instructions. + if (setExecutionDomainCustom(MI, Domain)) + return; + const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs); if (!table) { // try the other table assert((Subtarget.hasAVX2() || Domain < 3) && @@ -10043,9 +10351,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVPDZ256rr: case X86::VDIVPDZ256rrk: case X86::VDIVPDZ256rrkz: - case X86::VDIVPDZrb: - case X86::VDIVPDZrbk: - case X86::VDIVPDZrbkz: + case X86::VDIVPDZrrb: + case X86::VDIVPDZrrbk: + case X86::VDIVPDZrrbkz: case X86::VDIVPDZrm: case X86::VDIVPDZrmb: case X86::VDIVPDZrmbk: @@ -10073,9 +10381,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVPSZ256rr: case X86::VDIVPSZ256rrk: case X86::VDIVPSZ256rrkz: - case X86::VDIVPSZrb: - case X86::VDIVPSZrbk: - case X86::VDIVPSZrbkz: + case X86::VDIVPSZrrb: + case X86::VDIVPSZrrbk: + case X86::VDIVPSZrrbkz: case X86::VDIVPSZrm: case X86::VDIVPSZrmb: case X86::VDIVPSZrmbk: @@ -10093,9 +10401,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVSDZrr_Int: case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz: - case X86::VDIVSDZrrb: - case X86::VDIVSDZrrbk: - case X86::VDIVSDZrrbkz: + case X86::VDIVSDZrrb_Int: + case X86::VDIVSDZrrb_Intk: + case X86::VDIVSDZrrb_Intkz: case X86::VDIVSSZrm: case X86::VDIVSSZrr: case X86::VDIVSSZrm_Int: @@ -10104,9 +10412,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVSSZrr_Int: case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz: - case X86::VDIVSSZrrb: - case X86::VDIVSSZrrbk: - case X86::VDIVSSZrrbkz: + case X86::VDIVSSZrrb_Int: + case X86::VDIVSSZrrb_Intk: + case X86::VDIVSSZrrb_Intkz: case X86::VSQRTPDZ128m: case X86::VSQRTPDZ128mb: case X86::VSQRTPDZ128mbk: @@ -10675,7 +10983,7 @@ namespace { LDTLSCleanup() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; X86MachineFunctionInfo *MFI = MF.getInfo(); @@ -10836,16 +11144,16 @@ X86InstrInfo::getOutlininingCandidateInfo( bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // Does the function use a red zone? If it does, then we can't risk messing // with the stack. - if (!F->hasFnAttribute(Attribute::NoRedZone)) + if (!F.hasFnAttribute(Attribute::NoRedZone)) return false; // If we *don't* want to outline from things that could potentially be deduped // then return false. - if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage()) + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) return false; // This function is viable for outlining, so return true. @@ -10853,8 +11161,8 @@ bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, } X86GenInstrInfo::MachineOutlinerInstrType -X86InstrInfo::getOutliningType(MachineInstr &MI) const { - +X86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { + MachineInstr &MI = *MIT; // Don't allow debug values to impact outlining type. if (MI.isDebugValue() || MI.isIndirectDebugValue()) return MachineOutlinerInstrType::Invisible; @@ -10879,7 +11187,7 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const { // FIXME: There are instructions which are being manually built without // explicit uses/defs so we also have to check the MCInstrDesc. We should be // able to remove the extra checks once those are fixed up. For example, - // sometimes we might get something like %RAX = POP64r 1. This won't be + // sometimes we might get something like %rax = POP64r 1. This won't be // caught by modifiesRegister or readsRegister even though the instruction // really ought to be formed so that modifiesRegister/readsRegister would // catch it. diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 02a09c340cef..b1b5a4a421d9 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -490,8 +490,12 @@ class X86InstrInfo final : public X86GenInstrInfo { std::pair getExecutionDomain(const MachineInstr &MI) const override; + uint16_t getExecutionDomainCustom(const MachineInstr &MI) const; + void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override; + bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override; @@ -568,7 +572,7 @@ class X86InstrInfo final : public X86GenInstrInfo { bool OutlineFromLinkOnceODRs) const override; llvm::X86GenInstrInfo::MachineOutlinerInstrType - getOutliningType(MachineInstr &MI) const override; + getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, const MachineOutlinerInfo &MInfo) const override; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0a6f93bbc23c..0129e11d5824 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -807,27 +807,19 @@ def NoAVX : Predicate<"!Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">; -def HasAVX512 : Predicate<"Subtarget->hasAVX512()">, - AssemblerPredicate<"FeatureAVX512", "AVX-512 ISA">; +def HasAVX512 : Predicate<"Subtarget->hasAVX512()">; def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">; def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; -def HasCDI : Predicate<"Subtarget->hasCDI()">, - AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; -def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, - AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; -def HasPFI : Predicate<"Subtarget->hasPFI()">, - AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; -def HasERI : Predicate<"Subtarget->hasERI()">, - AssemblerPredicate<"FeatureERI", "AVX-512 ER ISA">; -def HasDQI : Predicate<"Subtarget->hasDQI()">, - AssemblerPredicate<"FeatureDQI", "AVX-512 DQ ISA">; +def HasCDI : Predicate<"Subtarget->hasCDI()">; +def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">; +def HasPFI : Predicate<"Subtarget->hasPFI()">; +def HasERI : Predicate<"Subtarget->hasERI()">; +def HasDQI : Predicate<"Subtarget->hasDQI()">; def NoDQI : Predicate<"!Subtarget->hasDQI()">; -def HasBWI : Predicate<"Subtarget->hasBWI()">, - AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">; +def HasBWI : Predicate<"Subtarget->hasBWI()">; def NoBWI : Predicate<"!Subtarget->hasBWI()">; -def HasVLX : Predicate<"Subtarget->hasVLX()">, - AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; +def HasVLX : Predicate<"Subtarget->hasVLX()">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; @@ -864,17 +856,18 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">; -def HasVBMI : Predicate<"Subtarget->hasVBMI()">, - AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; +def HasVBMI : Predicate<"Subtarget->hasVBMI()">; def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">; -def HasIFMA : Predicate<"Subtarget->hasIFMA()">, - AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">; +def HasIFMA : Predicate<"Subtarget->hasIFMA()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; +def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">; +def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; +def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; @@ -885,6 +878,7 @@ def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">; def HasIBT : Predicate<"Subtarget->hasIBT()">; def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">; def HasCLWB : Predicate<"Subtarget->hasCLWB()">; +def HasRDPID : Predicate<"Subtarget->hasRDPID()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; def Not64BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; @@ -918,11 +912,11 @@ def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def OptForSize : Predicate<"MF->getFunction()->optForSize()">; - def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; - def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; + def OptForSize : Predicate<"MF->getFunction().optForSize()">; + def OptForMinSize : Predicate<"MF->getFunction().optForMinSize()">; + def OptForSpeed : Predicate<"!MF->getFunction().optForSize()">; def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " - "MF->getFunction()->optForSize()">; + "MF->getFunction().optForSize()">; } def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; @@ -932,6 +926,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; +def UseRetpoline : Predicate<"Subtarget->useRetpoline()">; +def NotUseRetpoline : Predicate<"!Subtarget->useRetpoline()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -1124,14 +1120,16 @@ let hasSideEffects = 0, SchedRW = [WriteZero] in { def NOOPL : I<0x1f, MRMXm, (outs), (ins i32mem:$zero), "nop{l}\t$zero", [], IIC_NOP>, TB, OpSize32; def NOOPQ : RI<0x1f, MRMXm, (outs), (ins i64mem:$zero), - "nop{q}\t$zero", [], IIC_NOP>, TB; + "nop{q}\t$zero", [], IIC_NOP>, TB, + Requires<[In64BitMode]>; // Also allow register so we can assemble/disassemble def NOOPWr : I<0x1f, MRMXr, (outs), (ins GR16:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize16; def NOOPLr : I<0x1f, MRMXr, (outs), (ins GR32:$zero), "nop{l}\t$zero", [], IIC_NOP>, TB, OpSize32; def NOOPQr : RI<0x1f, MRMXr, (outs), (ins GR64:$zero), - "nop{q}\t$zero", [], IIC_NOP>, TB; + "nop{q}\t$zero", [], IIC_NOP>, TB, + Requires<[In64BitMode]>; } @@ -1155,7 +1153,8 @@ def LEAVE64 : I<0xC9, RawFrm, // Miscellaneous Instructions. // -let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in +let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1, + SchedRW = [WriteSystem] in def Int_eh_sjlj_setup_dispatch : PseudoI<(outs), (ins), [(X86eh_sjlj_setup_dispatch)]>; @@ -1380,7 +1379,8 @@ def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), def MOVSL : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src), "movs{l|d}\t{$src, $dst|$dst, $src}", [], IIC_MOVS>, OpSize32; def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), - "movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>; + "movsq\t{$src, $dst|$dst, $src}", [], IIC_MOVS>, + Requires<[In64BitMode]>; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI @@ -1395,7 +1395,8 @@ def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst), "stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32; let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst), - "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>; + "stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>, + Requires<[In64BitMode]>; // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,EFLAGS], Uses = [AL,EDI,EFLAGS] in @@ -1409,7 +1410,8 @@ def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst), "scas{l|d}\t{$dst, %eax|eax, $dst}", [], IIC_SCAS>, OpSize32; let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,EFLAGS] in def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst), - "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>; + "scasq\t{$dst, %rax|rax, $dst}", [], IIC_SCAS>, + Requires<[In64BitMode]>; // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,EFLAGS] in { @@ -1420,7 +1422,8 @@ def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src), def CMPSL : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src), "cmps{l|d}\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, OpSize32; def CMPSQ : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src), - "cmpsq\t{$dst, $src|$src, $dst}", [], IIC_CMPS>; + "cmpsq\t{$dst, $src|$src, $dst}", [], IIC_CMPS>, + Requires<[In64BitMode]>; } } // SchedRW @@ -1485,7 +1488,8 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(store i64immSExt32_su:$src, addr:$dst)], IIC_MOV_MEM>; + [(store i64immSExt32_su:$src, addr:$dst)], IIC_MOV_MEM>, + Requires<[In64BitMode]>; } // SchedRW let hasSideEffects = 0 in { @@ -1559,33 +1563,39 @@ def MOV32o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_32:$dst), let mayLoad = 1 in { let Defs = [AL] in def MOV8ao64 : RIi64_NOREX<0xA0, RawFrmMemOffs, (outs), (ins offset64_8:$src), - "movabs{b}\t{$src, %al|al, $src}", []>, AdSize64; + "movabs{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>, + AdSize64; let Defs = [AX] in def MOV16ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_16:$src), - "movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize16, AdSize64; + "movabs{w}\t{$src, %ax|ax, $src}", [], IIC_MOV_MEM>, + OpSize16, AdSize64; let Defs = [EAX] in def MOV32ao64 : RIi64_NOREX<0xA1, RawFrmMemOffs, (outs), (ins offset64_32:$src), - "movabs{l}\t{$src, %eax|eax, $src}", []>, OpSize32, - AdSize64; + "movabs{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>, + OpSize32, AdSize64; let Defs = [RAX] in def MOV64ao64 : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64_64:$src), - "movabs{q}\t{$src, %rax|rax, $src}", []>, AdSize64; + "movabs{q}\t{$src, %rax|rax, $src}", [], IIC_MOV_MEM>, + AdSize64; } let mayStore = 1 in { let Uses = [AL] in def MOV8o64a : RIi64_NOREX<0xA2, RawFrmMemOffs, (outs), (ins offset64_8:$dst), - "movabs{b}\t{%al, $dst|$dst, al}", []>, AdSize64; + "movabs{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>, + AdSize64; let Uses = [AX] in def MOV16o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs), (ins offset64_16:$dst), - "movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize16, AdSize64; + "movabs{w}\t{%ax, $dst|$dst, ax}", [], IIC_MOV_MEM>, + OpSize16, AdSize64; let Uses = [EAX] in def MOV32o64a : RIi64_NOREX<0xA3, RawFrmMemOffs, (outs), (ins offset64_32:$dst), - "movabs{l}\t{%eax, $dst|$dst, eax}", []>, OpSize32, - AdSize64; + "movabs{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>, + OpSize32, AdSize64; let Uses = [RAX] in def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), - "movabs{q}\t{%rax, $dst|$dst, rax}", []>, AdSize64; + "movabs{q}\t{%rax, $dst|$dst, rax}", [], IIC_MOV_MEM>, + AdSize64; } } // hasSideEffects = 0 @@ -1739,7 +1749,8 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), - i64immSExt8:$src2))], IIC_BT_MI>, TB; + i64immSExt8:$src2))], IIC_BT_MI>, TB, + Requires<[In64BitMode]>; } // SchedRW let hasSideEffects = 0 in { @@ -1786,7 +1797,8 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB, + Requires<[In64BitMode]>; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1831,7 +1843,8 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB, + Requires<[In64BitMode]>; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1877,7 +1890,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize32, TB; def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB, + Requires<[In64BitMode]>; } } // hasSideEffects = 0 } // Defs = [EFLAGS] @@ -1947,13 +1961,7 @@ def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), let Uses = [EAX], Defs = [EAX] in def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), "xchg{l}\t{$src, %eax|eax, $src}", [], IIC_XCHG_REG>, - OpSize32, Requires<[Not64BitMode]>; -let Uses = [EAX], Defs = [EAX] in -// Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. -// xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. -def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), - "xchg{l}\t{$src, %eax|eax, $src}", [], IIC_XCHG_REG>, - OpSize32, Requires<[In64BitMode]>; + OpSize32; let Uses = [RAX], Defs = [RAX] in def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|rax, $src}", [], IIC_XCHG_REG>; @@ -2024,35 +2032,38 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>, - TB, Requires<[HasCmpxchg16b]>; + TB, Requires<[HasCmpxchg16b, In64BitMode]>; } // SchedRW // Lock instruction prefix +let SchedRW = [WriteMicrocoded] in def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; +let SchedRW = [WriteNop] in { + // Rex64 instruction prefix -def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>, +def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", [], IIC_NOP>, Requires<[In64BitMode]>; // Data16 instruction prefix -def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>, +def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", [], IIC_NOP>, Requires<[Not16BitMode]>; // Data instruction prefix -def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>, +def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", [], IIC_NOP>, Requires<[In16BitMode]>; +} // SchedRW // Repeat string operation instruction prefixes -// These uses the DF flag in the EFLAGS register to inc or dec ECX -let Defs = [ECX], Uses = [ECX,EFLAGS] in { +// These use the DF flag in the EFLAGS register to inc or dec ECX +let Defs = [ECX], Uses = [ECX,EFLAGS], SchedRW = [WriteMicrocoded] in { // Repeat (used with INS, OUTS, MOVS, LODS and STOS) def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; // Repeat while not equal (used with CMPS and SCAS) def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; } - // String manipulation instructions let SchedRW = [WriteMicrocoded] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI @@ -2067,7 +2078,8 @@ def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src), "lods{l|d}\t{$src, %eax|eax, $src}", [], IIC_LODS>, OpSize32; let Defs = [RAX,ESI], Uses = [ESI,EFLAGS] in def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src), - "lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>; + "lodsq\t{$src, %rax|rax, $src}", [], IIC_LODS>, + Requires<[In64BitMode]>; } let SchedRW = [WriteSystem] in { @@ -2198,31 +2210,35 @@ let Predicates = [HasMOVBE] in { //===----------------------------------------------------------------------===// // RDRAND Instruction // -let Predicates = [HasRDRAND], Defs = [EFLAGS] in { +let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in { def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), "rdrand{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize16, PS; + [(set GR16:$dst, EFLAGS, (X86rdrand))], IIC_RDRAND>, + OpSize16, PS; def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), "rdrand{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86rdrand))]>, OpSize32, PS; + [(set GR32:$dst, EFLAGS, (X86rdrand))], IIC_RDRAND>, + OpSize32, PS; def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), "rdrand{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86rdrand))]>, PS; + [(set GR64:$dst, EFLAGS, (X86rdrand))], IIC_RDRAND>, PS; } //===----------------------------------------------------------------------===// // RDSEED Instruction // -let Predicates = [HasRDSEED], Defs = [EFLAGS] in { +let Predicates = [HasRDSEED], Defs = [EFLAGS], SchedRW = [WriteSystem] in { def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS; + [(set GR16:$dst, EFLAGS, (X86rdseed))], IIC_RDSEED>, + OpSize16, PS; def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS; + [(set GR32:$dst, EFLAGS, (X86rdseed))], IIC_RDSEED>, + OpSize32, PS; def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS; + [(set GR64:$dst, EFLAGS, (X86rdseed))], IIC_RDSEED>, PS; } //===----------------------------------------------------------------------===// @@ -2231,30 +2247,33 @@ let Predicates = [HasRDSEED], Defs = [EFLAGS] in { let Predicates = [HasLZCNT], Defs = [EFLAGS] in { def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>, XS, - OpSize16; + [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)], + IIC_LZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>; def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz (loadi16 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize16; + (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize16, + Sched<[WriteIMulLd]>; def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>, XS, - OpSize32; + [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)], + IIC_LZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>; def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize32; + (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize32, + Sched<[WriteIMulLd]>; def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>, - XS; + [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)], + IIC_LZCNT_RR>, XS, Sched<[WriteIMul]>; def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_LZCNT_RM>, XS, + Sched<[WriteIMulLd]>; } //===----------------------------------------------------------------------===// @@ -2263,30 +2282,33 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { let Predicates = [HasBMI], Defs = [EFLAGS] in { def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>, XS, - OpSize16; + [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)], + IIC_TZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>; def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz (loadi16 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize16; + (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize16, + Sched<[WriteIMulLd]>; def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>, XS, - OpSize32; + [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)], + IIC_TZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>; def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz (loadi32 addr:$src))), - (implicit EFLAGS)]>, XS, OpSize32; + (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize32, + Sched<[WriteIMulLd]>; def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>, - XS; + [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)], + IIC_TZCNT_RR>, XS, Sched<[WriteIMul]>; def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz (loadi64 addr:$src))), - (implicit EFLAGS)]>, XS; + (implicit EFLAGS)], IIC_TZCNT_RM>, XS, + Sched<[WriteIMulLd]>; } multiclass bmi_bls, T8PS, VEX_4V; + [], IIC_UNARY_REG>, T8PS, VEX_4V, Sched<[WriteALU]>; let mayLoad = 1 in def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - []>, T8PS, VEX_4V; + [], IIC_UNARY_MEM>, T8PS, VEX_4V, Sched<[WriteALULd, ReadAfterLd]>; } } @@ -2333,18 +2355,18 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } - multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, PatFrag ld_frag> { def rr : I, - T8PS, VEX; + [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)], IIC_BIN_NONMEM>, + T8PS, VEX, Sched<[WriteALU]>; def rm : I, T8PS, VEX; + (implicit EFLAGS)], IIC_BIN_MEM>, T8PS, VEX, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -2361,7 +2383,6 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in { int_x86_bmi_bzhi_64, loadi64>, VEX_W; } - def CountTrailingOnes : SDNodeXFormgetZExtValue()), SDLoc(N)); @@ -2455,11 +2476,12 @@ multiclass bmi_pdep_pext { def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (Int RC:$src1, RC:$src2))]>, - VEX_4V; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], IIC_BIN_NONMEM>, + VEX_4V, Sched<[WriteALU]>; def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, VEX_4V; + [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))], + IIC_BIN_MEM>, VEX_4V, Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI2] in { @@ -2485,20 +2507,20 @@ multiclass tbm_ternary_imm_intr opc, RegisterClass RC, string OpcodeStr, def ri : Ii32, - XOP, XOPA; + [(set RC:$dst, (Int RC:$src1, immoperator:$cntl))], + IIC_BIN_NONMEM>, XOP, XOPA, Sched<[WriteALU]>; def mi : Ii32, - XOP, XOPA; + [(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))], + IIC_BIN_MEM>, XOP, XOPA, Sched<[WriteALULd, ReadAfterLd]>; } -defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32, +defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr{l}", i32mem, loadi32, int_x86_tbm_bextri_u32, i32imm, imm>; let ImmT = Imm32S in -defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64, +defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr{q}", i64mem, loadi64, int_x86_tbm_bextri_u64, i64i32imm, i64immSExt32>, VEX_W; @@ -2508,20 +2530,20 @@ multiclass tbm_binary_rm opc, Format FormReg, Format FormMem, let hasSideEffects = 0 in { def rr : I, XOP_4V, XOP9; + [], IIC_BIN_NONMEM>, XOP_4V, XOP9, Sched<[WriteALU]>; let mayLoad = 1 in def rm : I, XOP_4V, XOP9; + [], IIC_BIN_MEM>, XOP_4V, XOP9, Sched<[WriteALULd, ReadAfterLd]>; } } multiclass tbm_binary_intr opc, string OpcodeStr, Format FormReg, Format FormMem> { - defm NAME#32 : tbm_binary_rm; - defm NAME#64 : tbm_binary_rm, VEX_W; + defm NAME#32 : tbm_binary_rm; + defm NAME#64 : tbm_binary_rm, VEX_W; } defm BLCFILL : tbm_binary_intr<0x01, "blcfill", MRM1r, MRM1m>; @@ -2547,21 +2569,21 @@ let Predicates = [HasTBM] in { //===----------------------------------------------------------------------===// // Lightweight Profiling Instructions -let Predicates = [HasLWP] in { +let Predicates = [HasLWP], SchedRW = [WriteSystem] in { def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src", [(int_x86_llwpcb GR32:$src)], IIC_LWP>, - XOP, XOP9, Requires<[Not64BitMode]>; + XOP, XOP9; def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst", [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>, - XOP, XOP9, Requires<[Not64BitMode]>; + XOP, XOP9; def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src", [(int_x86_llwpcb GR64:$src)], IIC_LWP>, - XOP, XOP9, VEX_W, Requires<[In64BitMode]>; + XOP, XOP9, VEX_W; def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>, - XOP, XOP9, VEX_W, Requires<[In64BitMode]>; + XOP, XOP9, VEX_W; multiclass lwpins_intr { def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), @@ -2595,7 +2617,7 @@ multiclass lwpval_intr { defm LWPVAL32 : lwpval_intr; defm LWPVAL64 : lwpval_intr, VEX_W; -} // HasLWP +} // HasLWP, SchedRW //===----------------------------------------------------------------------===// // MONITORX/MWAITX Instructions @@ -2708,14 +2730,14 @@ let Predicates = [HasTBM] in { // Memory Instructions // -let Predicates = [HasCLFLUSHOPT] in +let Predicates = [HasCLFLUSHOPT], SchedRW = [WriteLoad] in def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD; + "clflushopt\t$src", [(int_x86_clflushopt addr:$src)], + IIC_SSE_PREFETCH>, PD; -let Predicates = [HasCLWB] in +let Predicates = [HasCLWB], SchedRW = [WriteLoad] in def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", - [(int_x86_clwb addr:$src)]>, PD; - + [(int_x86_clwb addr:$src)], IIC_SSE_PREFETCH>, PD; //===----------------------------------------------------------------------===// // Subsystems. @@ -2955,10 +2977,10 @@ def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}", (BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>; // clr aliases. -def : InstAlias<"clrb\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; -def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>; -def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>; -def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>; +def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>; +def : InstAlias<"clr{w}\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>; +def : InstAlias<"clr{l}\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>; +def : InstAlias<"clr{q}\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>; // lods aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit @@ -2971,10 +2993,10 @@ def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>; def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>; def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>; def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; -def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0>; -def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0>; -def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0>; -def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"lods{b}\t$src", (LODSB srcidx8:$src), 0>; +def : InstAlias<"lods{w}\t$src", (LODSW srcidx16:$src), 0>; +def : InstAlias<"lods{l}\t$src", (LODSL srcidx32:$src), 0>; +def : InstAlias<"lods{q}\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; // stos aliases. Accept the source being omitted because it's implicit in @@ -2988,10 +3010,10 @@ def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>; def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>; def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>; def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0>; -def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0>; -def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0>; -def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"stos{b}\t$dst", (STOSB dstidx8:$dst), 0>; +def : InstAlias<"stos{w}\t$dst", (STOSW dstidx16:$dst), 0>; +def : InstAlias<"stos{l}\t$dst", (STOSL dstidx32:$dst), 0>; +def : InstAlias<"stos{q}\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // scas aliases. Accept the destination being omitted because it's implicit @@ -3005,24 +3027,24 @@ def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>; def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>; def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>; def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; -def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0>; -def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0>; -def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0>; -def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"scas{b}\t$dst", (SCASB dstidx8:$dst), 0>; +def : InstAlias<"scas{w}\t$dst", (SCASW dstidx16:$dst), 0>; +def : InstAlias<"scas{l}\t$dst", (SCASL dstidx32:$dst), 0>; +def : InstAlias<"scas{q}\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; // cmps aliases. Mnemonic suffix being omitted because it's implicit // in the destination. -def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0>; -def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0>; -def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0>; -def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"cmps{b}\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0>; +def : InstAlias<"cmps{w}\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0>; +def : InstAlias<"cmps{l}\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0>; +def : InstAlias<"cmps{q}\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; // movs aliases. Mnemonic suffix being omitted because it's implicit // in the destination. -def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0>; -def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0>; -def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0>; -def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"movs{b}\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0>; +def : InstAlias<"movs{w}\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0>; +def : InstAlias<"movs{l}\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0>; +def : InstAlias<"movs{q}\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; // div and idiv aliases for explicit A register. def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>; @@ -3175,10 +3197,12 @@ def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV16ms i16mem:$mem, SEGMENT_RE // Match 'movq , ' as an alias for movabsq. def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>; -// Match 'movq GR64, MMX' as an alias for movd. -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas, +// which supports this due to an old AMD documentation bug when 64-bit mode was +// created. +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>; -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>; // movsx aliases @@ -3277,12 +3301,19 @@ def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", // xchg: We accept "xchgX , %eax" and "xchgX %eax, " as synonyms. def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>; -def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", - (XCHG32ar GR32:$src), 0>, Requires<[Not64BitMode]>; -def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", - (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src), 0>; def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>; +// In 64-bit mode, xchg %eax, %eax can't be encoded with the 0x90 opcode we +// would get by default because it's defined as NOP. But xchg %eax, %eax implies +// implicit zeroing of the upper 32 bits. So alias to the longer encoding. +def : InstAlias<"xchg{l}\t{%eax, %eax|eax, eax}", + (XCHG32rr EAX, EAX), 0>, Requires<[In64BitMode]>; + +// xchg %rax, %rax is a nop in x86-64 and can be encoded as such. Without this +// we emit an unneeded REX.w prefix. +def : InstAlias<"xchg{q}\t{%rax, %rax|rax, rax}", (NOOP), 0>; + // These aliases exist to get the parser to prioritize matching 8-bit // immediate encodings over matching the implicit ax/eax/rax encodings. By // explicitly mentioning the A register here, these entries will be ordered diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 039b4a248544..1a1f64e3a0bd 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -90,11 +90,21 @@ def MMX_CVT_PS_ITINS : OpndItins< >; } +// Alias instruction that maps zero vector to pxor mmx. +// This is expanded by ExpandPostRAPseudos to an pxor. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1, SchedRW = [WriteZero] in { +def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>; +} + let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - OpndItins itins, bit Commutable = 0> { + OpndItins itins, bit Commutable = 0, + X86MemOperand OType = i64mem> { def irr : MMXI; - let AddedComplexity = 20 in + let AddedComplexity = 20 in { + def : Pat<(x86mmx (MMX_X86movw2d (i32 0))), + (MMX_SET0)>; def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))), (MMX_MOVD64rm addr:$src)>; + } } let mayStore = 1 in @@ -253,13 +266,13 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), - "movd\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), - (ins i64mem:$src), "movd\t{$src, $dst|$dst, $src}", + (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RM>, Sched<[WriteLoad]>; // These are 64 bit moves, but since the OS X assembler doesn't @@ -268,7 +281,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), let SchedRW = [WriteMove], isBitcast = 1 in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), - "movd\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>; let hasSideEffects = 0 in @@ -285,7 +298,7 @@ def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>, Sched<[WriteStore]>; let SchedRW = [WriteLoad] in { @@ -524,13 +537,16 @@ defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", MMX_UNPCK_H_ITINS>; defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", int_x86_mmx_punpcklbw, - MMX_UNPCK_L_ITINS>; + MMX_UNPCK_L_ITINS, + 0, i32mem>; defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", int_x86_mmx_punpcklwd, - MMX_UNPCK_L_ITINS>; + MMX_UNPCK_L_ITINS, + 0, i32mem>; defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq", int_x86_mmx_punpckldq, - MMX_UNPCK_L_ITINS>; + MMX_UNPCK_L_ITINS, + 0, i32mem>; // -- Pack Instructions defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb, diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td index 104ba2a174db..cb2b47b4f0c9 100644 --- a/lib/Target/X86/X86InstrMPX.td +++ b/lib/Target/X86/X86InstrMPX.td @@ -13,13 +13,16 @@ // //===----------------------------------------------------------------------===// +// FIXME: Investigate a better scheduler itinerary once MPX is used inside LLVM. +let SchedRW = [WriteSystem] in { + multiclass mpx_bound_make opc, string OpcodeStr> { let mayLoad = 1 in { def 32rm: I, + OpcodeStr#"\t{$src, $dst|$dst, $src}", [], IIC_MPX>, Requires<[HasMPX, Not64BitMode]>; def 64rm: RI, + OpcodeStr#"\t{$src, $dst|$dst, $src}", [], IIC_MPX>, Requires<[HasMPX, In64BitMode]>; } } @@ -29,17 +32,17 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; multiclass mpx_bound_check opc, string OpcodeStr> { let mayLoad = 1 in { def 32rm: I, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", [], IIC_MPX>, Requires<[HasMPX, Not64BitMode]>; def 64rm: RI, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", [], IIC_MPX>, Requires<[HasMPX, In64BitMode]>; } def 32rr: I, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", [], IIC_MPX>, Requires<[HasMPX, Not64BitMode]>; def 64rr: RI, + OpcodeStr#"\t{$src2, $src1|$src1, $src2}", [], IIC_MPX>, Requires<[HasMPX, In64BitMode]>; } defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS; @@ -47,32 +50,33 @@ defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD; defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD; def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX]>; let mayLoad = 1 in { def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX, Not64BitMode]>; def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX, In64BitMode]>; } def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX]>; let mayStore = 1 in { def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX, Not64BitMode]>; def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src), - "bndmov\t{$src, $dst|$dst, $src}", []>, PD, + "bndmov\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PD, Requires<[HasMPX, In64BitMode]>; def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), - "bndstx\t{$src, $dst|$dst, $src}", []>, PS, + "bndstx\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PS, Requires<[HasMPX]>; } let mayLoad = 1 in -def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), - "bndldx\t{$src, $dst|$dst, $src}", []>, PS, +def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src), + "bndldx\t{$src, $dst|$dst, $src}", [], IIC_MPX>, PS, Requires<[HasMPX]>; +} // SchedRW diff --git a/lib/Target/X86/X86InstrSGX.td b/lib/Target/X86/X86InstrSGX.td index 84119ad5eb35..f4331c5e2d93 100644 --- a/lib/Target/X86/X86InstrSGX.td +++ b/lib/Target/X86/X86InstrSGX.td @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// // SGX instructions +let SchedRW = [WriteSystem] in { // ENCLS - Execute an Enclave System Function of Specified Leaf Number def ENCLS : I<0x01, MRM_CF, (outs), (ins), "encls", []>, TB; @@ -22,3 +23,4 @@ def ENCLS : I<0x01, MRM_CF, (outs), (ins), // ENCLU - Execute an Enclave User Function of Specified Leaf Number def ENCLU : I<0x01, MRM_D7, (outs), (ins), "enclu", []>, TB; +} // SchedRW diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ac465e3963ef..124bcc9c44bc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -25,9 +25,15 @@ class SizeItins { OpndItins d = arg_d; } +class MoveLoadStoreItins { + InstrItinClass rr = arg_rr; + InstrItinClass rm = arg_rm; + InstrItinClass mr = arg_mr; +} class ShiftOpndItins { + InstrItinClass arg_ri> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; InstrItinClass ri = arg_ri; @@ -139,6 +145,11 @@ def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; +// FIXME: Merge SSE_INTSHIFT_P + SSE_INTSHIFT_ITINS_P. +def SSE_INTSHIFT_P : OpndItins< + IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM +>; + def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI >; @@ -147,10 +158,18 @@ def SSE_MOVA_ITINS : OpndItins< IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM >; +def SSE_MOVA : MoveLoadStoreItins< + IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM, IIC_SSE_MOVA_P_MR +>; + def SSE_MOVU_ITINS : OpndItins< IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM >; +def SSE_MOVU : MoveLoadStoreItins< + IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM, IIC_SSE_MOVU_P_MR +>; + def SSE_DPPD_ITINS : OpndItins< IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM >; @@ -341,7 +360,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // Alias instruction that maps zero vector to pxor / xorp* for sse. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then -// swizzled by ExecutionDepsFix to pxor. +// swizzled by ExecutionDomainFix to pxor. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -1128,35 +1147,73 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// -def SSE_CVT_PD : OpndItins< +let Sched = WriteCvtF2I in { +def SSE_CVT_SS2SI_32 : OpndItins< + IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM +>; + +let Sched = WriteCvtF2I in +def SSE_CVT_SS2SI_64 : OpndItins< + IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM +>; + +def SSE_CVT_SD2SI : OpndItins< + IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM +>; + +def SSE_CVT_PS2I : OpndItins< + IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM +>; + +def SSE_CVT_PD2I : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +} + +let Sched = WriteCvtI2F in { +def SSE_CVT_SI2SS : OpndItins< + IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM +>; -let Sched = WriteCvtI2F in -def SSE_CVT_PS : OpndItins< +def SSE_CVT_SI2SD : OpndItins< + IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM +>; + +def SSE_CVT_I2PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; -let Sched = WriteCvtI2F in -def SSE_CVT_Scalar : OpndItins< +def SSE_CVT_I2PD : OpndItins< + IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM +>; +} + +let Sched = WriteCvtF2F in { +def SSE_CVT_SD2SS : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; -let Sched = WriteCvtF2I in -def SSE_CVT_SS2SI_32 : OpndItins< - IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM +def SSE_CVT_SS2SD : OpndItins< + IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; -let Sched = WriteCvtF2I in -def SSE_CVT_SS2SI_64 : OpndItins< - IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM +def SSE_CVT_PD2PS : OpndItins< + IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; -let Sched = WriteCvtF2I in -def SSE_CVT_SD2SI : OpndItins< - IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM +def SSE_CVT_PS2PD : OpndItins< + IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM +>; + +def SSE_CVT_PH2PS : OpndItins< + IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +def SSE_CVT_PS2PH : OpndItins< + IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM +>; +} + // FIXME: We probably want to match the rm form only when optimizing for // size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1188,16 +1245,16 @@ let hasSideEffects = 0 in { // FIXME: We probably want to match the rm form only when optimizing for // size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm> { + X86MemOperand x86memop, string asm, OpndItins itins> { let hasSideEffects = 0, Predicates = [UseAVX] in { def rr : SI, - Sched<[WriteCvtI2F]>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [], + itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : SI, - Sched<[WriteCvtI2FLd, ReadAfterLd]>; + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // hasSideEffects = 0 } @@ -1240,14 +1297,14 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, - XS, VEX_4V, VEX_LIG; -defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, - XS, VEX_4V, VEX_W, VEX_LIG; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, - XD, VEX_4V, VEX_LIG; -defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, - XD, VEX_4V, VEX_W, VEX_LIG; +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}", + SSE_CVT_SI2SS>, XS, VEX_4V, VEX_LIG; +defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}", + SSE_CVT_SI2SS>, XS, VEX_4V, VEX_W, VEX_LIG; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}", + SSE_CVT_SI2SD>, XD, VEX_4V, VEX_LIG; +defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}", + SSE_CVT_SI2SD>, XD, VEX_4V, VEX_W, VEX_LIG; let Predicates = [UseAVX] in { def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", @@ -1258,20 +1315,20 @@ let Predicates = [UseAVX] in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; + (VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), - (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; + (VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; def : Pat<(f32 (sint_to_fp GR32:$src)), (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f32 (sint_to_fp GR64:$src)), - (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; + (VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>; def : Pat<(f64 (sint_to_fp GR32:$src)), (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f64 (sint_to_fp GR64:$src)), - (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; + (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; } defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, @@ -1288,16 +1345,16 @@ defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_Scalar>, XS; -defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, + SSE_CVT_SI2SS>, XS; +defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", - SSE_CVT_Scalar>, XS, REX_W; + SSE_CVT_SI2SS>, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_Scalar>, XD; -defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, + SSE_CVT_SI2SD>, XD; +defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", - SSE_CVT_Scalar>, XD, REX_W; + SSE_CVT_SI2SD>, XD, REX_W; def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; @@ -1329,33 +1386,33 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, Operand memop, ComplexPattern mem_cpat, string asm, OpndItins itins> { - def rr : SI, - Sched<[itins.Sched]>; - def rm : SI, - Sched<[itins.Sched.Folded]>; + def rr_Int : SI, + Sched<[itins.Sched]>; + def rm_Int : SI, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, bit Is2Addr = 1> { - def rr : SI, Sched<[itins.Sched]>; - def rm : SI, Sched<[itins.Sched.Folded, ReadAfterLd]>; + def rr_Int : SI, Sched<[itins.Sched]>; + def rm_Int : SI, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [UseAVX] in { @@ -1374,34 +1431,34 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, let isCodeGenOnly = 1 in { let Predicates = [UseAVX] in { - defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", - SSE_CVT_Scalar, 0>, XS, VEX_4V; - defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + SSE_CVT_SI2SS, 0>, XS, VEX_4V; + defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", - SSE_CVT_Scalar, 0>, XS, VEX_4V, + SSE_CVT_SI2SS, 0>, XS, VEX_4V, VEX_W; - defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", - SSE_CVT_Scalar, 0>, XD, VEX_4V; - defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + SSE_CVT_SI2SD, 0>, XD, VEX_4V; + defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", - SSE_CVT_Scalar, 0>, XD, + SSE_CVT_SI2SD, 0>, XD, VEX_4V, VEX_W; } let Constraints = "$src1 = $dst" in { - defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; - defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + "cvtsi2ss{l}", SSE_CVT_SI2SS>, XS; + defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, - "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; - defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + "cvtsi2ss{q}", SSE_CVT_SI2SS>, XS, REX_W; + defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; - defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + "cvtsi2sd{l}", SSE_CVT_SI2SD>, XD; + defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; + "cvtsi2sd{q}", SSE_CVT_SI2SD>, XD, REX_W; } } // isCodeGenOnly = 1 @@ -1410,31 +1467,31 @@ let isCodeGenOnly = 1 in { // Aliases for intrinsics let isCodeGenOnly = 1 in { let Predicates = [UseAVX] in { -defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - ssmem, sse_load_f32, "cvttss2si", - SSE_CVT_SS2SI_32>, XS, VEX; -defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si", SSE_CVT_SS2SI_64>, - XS, VEX, VEX_W; -defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - sdmem, sse_load_f64, "cvttsd2si", - SSE_CVT_SD2SI>, XD, VEX; -defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX, VEX_W; -} -defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, +defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + ssmem, sse_load_f32, "cvttss2si", + SSE_CVT_SS2SI_32>, XS, VEX; +defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si", SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W; +defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD, VEX; +defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si", SSE_CVT_SD2SI>, + XD, VEX, VEX_W; +} +defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; -defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, +defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; -defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, +defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD; -defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, +defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; } // isCodeGenOnly = 1 @@ -1456,53 +1513,53 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, + SSEPackedSingle, SSE_CVT_I2PS>, PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG; defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, + SSEPackedSingle, SSE_CVT_I2PS>, PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG; defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64, "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, + SSEPackedSingle, SSE_CVT_I2PS>, PS, Requires<[UseSSE2]>; let Predicates = [UseAVX] in { def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", - (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; + (VCVTSS2SIrr_Int GR32:$dst, VR128:$src), 0>; def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", - (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; + (VCVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0>; def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", - (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; + (VCVTSD2SIrr_Int GR32:$dst, VR128:$src), 0>; def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", - (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; + (VCVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0>; def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", - (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; + (VCVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0>; def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", - (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; + (VCVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0>; def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", - (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; + (VCVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0>; def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", - (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; + (VCVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0>; } def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", - (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; + (CVTSS2SIrr_Int GR32:$dst, VR128:$src), 0>; def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", - (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; + (CVTSS2SIrm_Int GR32:$dst, ssmem:$src), 0>; def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", - (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; + (CVTSD2SIrr_Int GR32:$dst, VR128:$src), 0>; def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", - (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; + (CVTSD2SIrm_Int GR32:$dst, sdmem:$src), 0>; def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", - (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; + (CVTSS2SI64rr_Int GR64:$dst, VR128:$src), 0>; def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", - (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; + (CVTSS2SI64rm_Int GR64:$dst, ssmem:$src), 0>; def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", - (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; + (CVTSD2SI64rr_Int GR64:$dst, VR128:$src), 0>; def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", - (CVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; + (CVTSD2SI64rm_Int GR64:$dst, sdmem:$src), 0>; /// SSE 2 Only @@ -1537,14 +1594,14 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; let isCodeGenOnly = 1 in { -def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, +def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; -def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem, +def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss @@ -1553,14 +1610,14 @@ def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { -def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, +def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; -def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem, +def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss @@ -1620,14 +1677,14 @@ def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; let isCodeGenOnly = 1 in { -def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, +def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; -def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, +def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, @@ -1635,14 +1692,14 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix -def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, +def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; -def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, +def CVTSS2SDrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1660,33 +1717,33 @@ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), - (Int_VCVTSD2SSrr VR128:$dst, VR128:$src)>; + (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), - (Int_VCVTSS2SDrr VR128:$dst, VR128:$src)>; + (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), - (Int_VCVTSI2SS64rr VR128:$dst, GR64:$src)>; + (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), - (Int_VCVTSI2SSrr VR128:$dst, GR32:$src)>; + (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), - (Int_VCVTSI2SD64rr VR128:$dst, GR64:$src)>; + (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), - (Int_VCVTSI2SDrr VR128:$dst, GR32:$src)>; + (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; } // Predicates = [UseAVX] let Predicates = [UseSSE2] in { @@ -1694,35 +1751,35 @@ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), - (Int_CVTSD2SSrr VR128:$dst, VR128:$src)>; + (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), - (Int_CVTSS2SDrr VR128:$dst, VR128:$src)>; + (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), - (Int_CVTSI2SD64rr VR128:$dst, GR64:$src)>; + (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), - (Int_CVTSI2SDrr VR128:$dst, GR32:$src)>; + (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; } // Predicates = [UseSSE2] let Predicates = [UseSSE1] in { def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), - (Int_CVTSI2SS64rr VR128:$dst, GR64:$src)>; + (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), - (Int_CVTSI2SSrr VR128:$dst, GR32:$src)>; + (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; } // Predicates = [UseSSE1] // Convert packed single/double fp to doubleword @@ -2092,6 +2149,11 @@ let Predicates = [UseSSE2] in { // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// +let Sched = WriteFAdd in +def SSE_COMIS : OpndItins< + IIC_SSE_COMIS_RR, IIC_SSE_COMIS_RM +>; + // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar { - def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), + def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in - def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), + def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src, imm:$cc))], @@ -2168,21 +2230,21 @@ let mayLoad = 1 in let isCodeGenOnly = 1 in { // Aliases to match intrinsics which expect XMM operand(s). let ExeDomain = SSEPackedSingle in - defm Int_VCMPSS : sse12_cmp_scalar_int, XS, VEX_4V; let ExeDomain = SSEPackedDouble in - defm Int_VCMPSD : sse12_cmp_scalar_int, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in - defm Int_CMPSS : sse12_cmp_scalar_int, XS; let ExeDomain = SSEPackedDouble in - defm Int_CMPSD : sse12_cmp_scalar_int, XD; } @@ -2192,86 +2254,88 @@ let isCodeGenOnly = 1 in { // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, - PatFrag ld_frag, string OpcodeStr> { + PatFrag ld_frag, string OpcodeStr, + OpndItins itins> { let hasSideEffects = 0 in { def rr: SI, - Sched<[WriteFAdd]>; + itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm: SI, - Sched<[WriteFAddLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, ValueType vt, Operand memop, - ComplexPattern mem_cpat, string OpcodeStr> { + ComplexPattern mem_cpat, string OpcodeStr, + OpndItins itins> { def rr: SI, - Sched<[WriteFAdd]>; + itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm: SI, - Sched<[WriteFAddLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", SSE_COMIS>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", SSE_COMIS>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss">, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", SSE_COMIS>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd">, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", SSE_COMIS>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS, VEX, VEX_WIG; + sse_load_f32, "ucomiss", SSE_COMIS>, PS, VEX, VEX_WIG; defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD, VEX, VEX_WIG; + sse_load_f64, "ucomisd", SSE_COMIS>, PD, VEX, VEX_WIG; defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS, VEX, VEX_WIG; + sse_load_f32, "comiss", SSE_COMIS>, PS, VEX, VEX_WIG; defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD, VEX, VEX_WIG; + sse_load_f64, "comisd", SSE_COMIS>, PD, VEX, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss">, PS; + "ucomiss", SSE_COMIS>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd">, PD; + "ucomisd", SSE_COMIS>, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss">, PS; + "comiss", SSE_COMIS>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd">, PD; + "comisd", SSE_COMIS>, PD; } let isCodeGenOnly = 1 in { defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss">, PS; + sse_load_f32, "ucomiss", SSE_COMIS>, PS; defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd">, PD; + sse_load_f64, "ucomisd", SSE_COMIS>, PD; defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss">, PS; + sse_load_f32, "comiss", SSE_COMIS>, PS; defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd">, PD; + sse_load_f64, "comisd", SSE_COMIS>, PD; } } // Defs = [EFLAGS] @@ -2334,8 +2398,8 @@ let Constraints = "$src1 = $dst" in { } def CommutableCMPCC : PatLeaf<(imm), [{ - return (N->getZExtValue() == 0x00 || N->getZExtValue() == 0x03 || - N->getZExtValue() == 0x04 || N->getZExtValue() == 0x07); + uint64_t Imm = N->getZExtValue() & 0x7; + return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07); }]>; // Patterns to select compares with loads in first operand. @@ -2389,109 +2453,120 @@ let Predicates = [UseSSE1] in { // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// +let Sched = WriteFShuffle in +def SSE_SHUFP : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; + /// sse12_shuffle - sse 1 & 2 fp shuffle instructions multiclass sse12_shuffle { + OpndItins itins, Domain d> { def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + (i8 imm:$src3))))], itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, - Sched<[WriteFShuffle]>; + (i8 imm:$src3))))], itins.rr, d>, + Sched<[itins.Sched]>; } let Predicates = [HasAVX, NoVLX] in { defm VSHUFPS : sse12_shuffle, PS, VEX_4V, VEX_WIG; + loadv4f32, SSE_SHUFP, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VSHUFPSY : sse12_shuffle, PS, VEX_4V, VEX_L, VEX_WIG; + loadv8f32, SSE_SHUFP, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VSHUFPD : sse12_shuffle, PD, VEX_4V, VEX_WIG; + loadv2f64, SSE_SHUFP, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VSHUFPDY : sse12_shuffle, PD, VEX_4V, VEX_L, VEX_WIG; + loadv4f64, SSE_SHUFP, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, PS; + memopv4f32, SSE_SHUFP, SSEPackedSingle>, PS; defm SHUFPD : sse12_shuffle, PD; + memopv2f64, SSE_SHUFP, SSEPackedDouble>, PD; } //===----------------------------------------------------------------------===// // SSE 1 & 2 - Unpack FP Instructions //===----------------------------------------------------------------------===// +let Sched = WriteFShuffle in +def SSE_UNPCK : OpndItins< + IIC_SSE_UNPCK, IIC_SSE_UNPCK +>; + /// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave multiclass sse12_unpack_interleave opc, SDNode OpNode, ValueType vt, PatFrag mem_frag, RegisterClass RC, X86MemOperand x86memop, string asm, - Domain d, bit IsCommutable = 0> { + OpndItins itins, Domain d, bit IsCommutable = 0> { let isCommutable = IsCommutable in def rr : PI, Sched<[WriteFShuffle]>; + itins.rr, d>, Sched<[itins.Sched]>; def rm : PI, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SSE_UNPCK, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; }// Predicates = [HasAVX, NoVLX] + let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, PS; + SSE_UNPCK, SSEPackedSingle>, PS; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble, 1>, PD; + SSE_UNPCK, SSEPackedDouble, 1>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, PS; + SSE_UNPCK, SSEPackedSingle>, PS; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - SSEPackedDouble>, PD; + SSE_UNPCK, SSEPackedDouble>, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -3029,6 +3104,14 @@ def SSE_RSQRTSS : OpndItins< >; } +def SSE_RSQRT_P : SizeItins< + SSE_RSQRTPS, SSE_RSQRTPS +>; + +def SSE_RSQRT_S : SizeItins< + SSE_RSQRTSS, SSE_RSQRTSS +>; + let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM @@ -3039,6 +3122,14 @@ def SSE_RCPS : OpndItins< >; } +def SSE_RCP_P : SizeItins< + SSE_RCPP, SSE_RCPP +>; + +def SSE_RCP_S : SizeItins< + SSE_RCPS, SSE_RCPS +>; + /// sse_fp_unop_s - SSE1 unops in scalar form /// For the non-AVX defs, we need $src1 to be tied to $dst because /// the HW instructions are 2 operand / destructive. @@ -3127,7 +3218,7 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, // which has a clobber before the rcp, vs. // vrcpss mem, %xmm0, %xmm0 // TODO: In theory, we could fold the load, and avoid the stall caused by - // the partial register store, either in ExecutionDepsFix or with smarter RA. + // the partial register store, either in BreakFalseDeps or with smarter RA. let Predicates = [target] in { def : Pat<(OpNode RC:$src), (!cast("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; @@ -3396,7 +3487,7 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { +let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3504,8 +3595,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - hasSideEffects = 0, SchedRW = [WriteLoad] in { -let Predicates = [HasAVX,NoVLX] in + hasSideEffects = 0, SchedRW = [WriteLoad], Predicates = [HasAVX,NoVLX] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2i64 addr:$src))], @@ -3513,7 +3603,6 @@ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX, VEX_L, VEX_WIG; -let Predicates = [HasAVX,NoVLX] in def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv2i64 addr:$src))], @@ -3523,8 +3612,8 @@ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), XS, VEX, VEX_L, VEX_WIG; } -let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { -let Predicates = [HasAVX,NoVLX] in +let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore], + Predicates = [HasAVX,NoVLX] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", @@ -3534,7 +3623,6 @@ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG; -let Predicates = [HasAVX,NoVLX] in def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(store (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, @@ -3643,7 +3731,7 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : PDI opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3864,9 +3952,14 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, // SSE2 - Packed Integer Shuffle Instructions //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PSHUF : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + let ExeDomain = SSEPackedInt in { multiclass sse2_pshuffle { + SDNode OpNode, OpndItins itins, Predicate prd> { let Predicates = [HasAVX, prd] in { def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), @@ -3874,15 +3967,15 @@ let Predicates = [HasAVX, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>, VEX_WIG; + itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, - Sched<[WriteShuffleLd]>, VEX_WIG; + (i8 imm:$src2))))], itins.rm>, VEX, + Sched<[itins.Sched.Folded]>, VEX_WIG; } let Predicates = [HasAVX2, prd] in { @@ -3892,15 +3985,15 @@ let Predicates = [HasAVX2, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>, VEX_WIG; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, - Sched<[WriteShuffleLd]>, VEX_WIG; + (i8 imm:$src2))))], itins.rm>, VEX, VEX_L, + Sched<[itins.Sched.Folded]>, VEX_WIG; } let Predicates = [UseSSE2] in { @@ -3910,23 +4003,24 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; + itins.rr>, Sched<[itins.Sched]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, - Sched<[WriteShuffleLd, ReadAfterLd]>; + (i8 imm:$src2))))], itins.rm>, + Sched<[itins.Sched.Folded]>; } } } // ExeDomain = SSEPackedInt -defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, NoVLX>, PD; -defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, SSE_PSHUF, + NoVLX>, PD; +defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, SSE_PSHUF, NoVLX_Or_NoBWI>, XS; -defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, +defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, SSE_PSHUF, NoVLX_Or_NoBWI>, XD; //===---------------------------------------------------------------------===// @@ -3935,126 +4029,94 @@ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, let ExeDomain = SSEPackedInt in { multiclass sse2_pack opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, PatFrag ld_frag, + ValueType ArgVT, SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop, OpndItins itins, PatFrag ld_frag, bit Is2Addr = 1> { def rr : PDI, Sched<[WriteShuffle]>; + [(set RC:$dst, + (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))], + itins.rr>, Sched<[itins.Sched]>; def rm : PDI, Sched<[WriteShuffleLd, ReadAfterLd]>; -} - -multiclass sse2_pack_y opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode> { - def Yrr : PDI, - Sched<[WriteShuffle]>; - def Yrm : PDI, - Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass sse4_pack opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, PatFrag ld_frag, + ValueType ArgVT, SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop, OpndItins itins, PatFrag ld_frag, bit Is2Addr = 1> { def rr : SS48I, Sched<[WriteShuffle]>; + [(set RC:$dst, + (OutVT (OpNode (ArgVT RC:$src1), RC:$src2)))], + itins.rr>, Sched<[itins.Sched]>; def rm : SS48I, Sched<[WriteShuffleLd, ReadAfterLd]>; -} - -multiclass sse4_pack_y opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode> { - def Yrr : SS48I, - Sched<[WriteShuffle]>; - def Yrm : SS48I, - Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, - loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, - loadv2i64, 0>, VEX_4V; + defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss>, - VEX_4V, VEX_L, VEX_WIG; - defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss>, - VEX_4V, VEX_L, VEX_WIG; + defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus>, - VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus>, - VEX_4V, VEX_L; + defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, + VR256,i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { - defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, - memopv2i64>; - defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, - memopv2i64>; + defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, + i128mem, SSE_PACK, memopv2i64>; + defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, + i128mem, SSE_PACK, memopv2i64>; - defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, - memopv2i64>; + defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, + i128mem, SSE_PACK, memopv2i64>; - defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, - memopv2i64>; + defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, + i128mem, SSE_PACK, memopv2i64>; } } // ExeDomain = SSEPackedInt @@ -4062,103 +4124,107 @@ let Constraints = "$src1 = $dst" in { // SSE2 - Packed Integer Unpack Instructions //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PUNPCK : OpndItins< + IIC_SSE_UNPCK, IIC_SSE_UNPCK +>; + let ExeDomain = SSEPackedInt in { multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, - SDNode OpNode, PatFrag ld_frag, bit Is2Addr = 1> { + SDNode OpNode, RegisterClass RC, X86MemOperand x86memop, + OpndItins itins, PatFrag ld_frag, bit Is2Addr = 1> { def rr : PDI, Sched<[WriteShuffle]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], + itins.rr>, Sched<[itins.Sched]>; def rm : PDI, - Sched<[WriteShuffleLd, ReadAfterLd]>; -} - -multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, - SDNode OpNode> { - def Yrr : PDI, - Sched<[WriteShuffle]>; - def Yrm : PDI, - Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, VEX_4V, VEX_WIG; } + let Predicates = [HasAVX, NoVLX] in { - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, - loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, + VEX_4V, VEX_WIG; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, + VEX_4V, VEX_WIG; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, + VEX_4V, VEX_WIG; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, loadv2i64, 0>, + VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>, - VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; } + let Predicates = [HasAVX2, NoVLX] in { - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>, - VEX_4V, VEX_L, VEX_WIG; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>, - VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, + i256mem, SSE_PUNPCK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, - memopv2i64>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, - memopv2i64>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, - memopv2i64>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, - memopv2i64>; - - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, - memopv2i64>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, - memopv2i64>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, - memopv2i64>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, - memopv2i64>; + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, + i128mem, SSE_PUNPCK, memopv2i64>; } } // ExeDomain = SSEPackedInt @@ -4712,6 +4778,12 @@ let Predicates = [UseSSE3] in { // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// +// FIXME: Improve MOVDDUP/BROADCAST reg/mem scheduling itineraries. +let Sched = WriteFShuffle in +def SSE_MOVDDUP : OpndItins< + IIC_SSE_MOV_LH, IIC_SSE_MOV_LH +>; + multiclass sse3_replicate_dfp { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -4831,77 +4903,82 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { // SSE3 Instructions //===---------------------------------------------------------------------===// +let Sched = WriteFHAdd in +def SSE_HADDSUB : OpndItins< + IIC_SSE_HADDSUB_RR, IIC_SSE_HADDSUB_RM +>; + // Horizontal ops multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3DI, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3DI, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, - X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, - bit Is2Addr = 1> { + X86MemOperand x86memop, SDNode OpNode, OpndItins itins, + PatFrag ld_frag, bit Is2Addr = 1> { def rr : S3I, - Sched<[WriteFHAdd]>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : S3I, Sched<[WriteFHAddLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f32, 0>, VEX_4V, VEX_WIG; defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble in { defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv2f64, 0>, VEX_4V, VEX_WIG; defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhadd, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; + X86fhsub, SSE_HADDSUB, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst" in { let ExeDomain = SSEPackedSingle in { defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, - memopv4f32>; + SSE_HADDSUB, memopv4f32>; } let ExeDomain = SSEPackedDouble in { defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, - memopv2f64>; + SSE_HADDSUB, memopv2f64>; } } @@ -4909,59 +4986,63 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in +def SSE_PABS : OpndItins< + IIC_SSE_PABS_RR, IIC_SSE_PABS_RM +>; /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm opc, string OpcodeStr, ValueType vt, - SDNode OpNode, PatFrag ld_frag> { + SDNode OpNode, OpndItins itins, PatFrag ld_frag> { def rr : SS38I, Sched<[WriteVecALU]>; + itins.rr>, Sched<[itins.Sched]>; def rm : SS38I, Sched<[WriteVecALULd]>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_y opc, string OpcodeStr, ValueType vt, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { def Yrr : SS38I, - Sched<[WriteVecALU]>; + [(set VR256:$dst, (vt (OpNode VR256:$src)))], itins.rr>, + Sched<[itins.Sched]>; def Yrm : SS38I, - Sched<[WriteVecALULd]>; + (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))], itins.rm>, + Sched<[itins.Sched.Folded]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SSE_PABS, loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SSE_PABS>, VEX, VEX_L, VEX_WIG; } -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SSE_PABS, memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SSE_PABS, memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SSE_PABS, memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -5181,9 +5262,14 @@ defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// +let Sched = WriteShuffle in +def SSE_PALIGN : OpndItins< + IIC_SSE_PALIGNRR, IIC_SSE_PALIGNRM +>; + multiclass ssse3_palignr { + OpndItins itins, bit Is2Addr = 1> { let hasSideEffects = 0 in { def rri : SS3AI<0x0F, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), @@ -5192,7 +5278,7 @@ multiclass ssse3_palignr, Sched<[WriteShuffle]>; + itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1 in def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), @@ -5203,19 +5289,19 @@ multiclass ssse3_palignr, Sched<[WriteShuffleLd, ReadAfterLd]>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, - i128mem, 0>, VEX_4V, VEX_WIG; + i128mem, SSE_PALIGN, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, - i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + i256mem, SSE_PALIGN, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, - i128mem>; + i128mem, SSE_PALIGN>; //===---------------------------------------------------------------------===// // SSSE3 - Thread synchronization @@ -6083,6 +6169,11 @@ let Predicates = [UseSSE41] in { // SSE4.1 - Packed Bit Test //===----------------------------------------------------------------------===// +let Sched = WriteVecLogic in +def SSE_PTEST : OpndItins< + IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM +>; + // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. let Defs = [EFLAGS], Predicates = [HasAVX] in { @@ -6219,7 +6310,7 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst @@ -6244,7 +6335,7 @@ multiclass SS48I_binop_rm2 opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { @@ -6830,14 +6921,15 @@ multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : SS428I, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -6871,7 +6963,7 @@ multiclass pseudo_pcmpistrm { (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>; } -let Defs = [EFLAGS], usesCustomInserter = 1 in { +let Defs = [EFLAGS], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>, Requires<[HasAVX]>, VEX_WIG; defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>, @@ -6908,7 +7000,7 @@ multiclass pseudo_pcmpestrm { (bc_v16i8 (ld_frag addr:$src3)), EDX, imm:$src5))]>; } -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128", loadv2i64>, Requires<[HasAVX]>; defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128", memopv2i64>, @@ -6945,7 +7037,7 @@ multiclass pseudo_pcmpistri { (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>; } -let Defs = [EFLAGS], usesCustomInserter = 1 in { +let Defs = [EFLAGS], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>, Requires<[HasAVX]>, VEX_WIG; defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>, @@ -6983,7 +7075,7 @@ multiclass pseudo_pcmpestri { imm:$src5))]>; } -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { +let Defs = [EFLAGS], Uses = [EAX, EDX], hasNoSchedulingInfo = 1, usesCustomInserter = 1 in { defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI", loadv2i64>, Requires<[HasAVX]>; defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI", memopv2i64>, @@ -7061,8 +7153,9 @@ let Constraints = "$src1 = $dst" in { // SHA-NI Instructions //===----------------------------------------------------------------------===// +// FIXME: Is there a better scheduler itinerary for SHA than WriteVecIMul? multiclass SHAI_binop Opc, string OpcodeStr, Intrinsic IntId, - bit UsesXMM0 = 0> { + OpndItins itins, bit UsesXMM0 = 0> { def rr : I Opc, string OpcodeStr, Intrinsic IntId, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")), [!if(UsesXMM0, (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), - (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; + (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))], itins.rr>, + T8, Sched<[itins.Sched]>; def rm : I Opc, string OpcodeStr, Intrinsic IntId, (set VR128:$dst, (IntId VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), (set VR128:$dst, (IntId VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; + (bc_v4i32 (memopv2i64 addr:$src2)))))], itins.rm>, T8, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { @@ -7090,24 +7185,32 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, - (i8 imm:$src3)))]>, TA; + (i8 imm:$src3)))], IIC_SSE_INTMUL_P_RR>, TA, + Sched<[WriteVecIMul]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), - (i8 imm:$src3)))]>, TA; + (i8 imm:$src3)))], IIC_SSE_INTMUL_P_RM>, TA, + Sched<[WriteVecIMulLd, ReadAfterLd]>; - defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; - defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; - defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; + defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, + SSE_INTMUL_ITINS_P>; + defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, + SSE_INTMUL_ITINS_P>; + defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, + SSE_INTMUL_ITINS_P>; let Uses=[XMM0] in - defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; + defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, + SSE_INTMUL_ITINS_P, 1>; - defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; - defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; + defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, + SSE_INTMUL_ITINS_P>; + defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, + SSE_INTMUL_ITINS_P>; } // Aliases with explicit %xmm0 @@ -7343,23 +7446,27 @@ def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), (ins VR128:$src, u8imm:$len, u8imm:$idx), "extrq\t{$idx, $len, $src|$src, $len, $idx}", [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, - imm:$idx))]>, PD; + imm:$idx))], IIC_SSE_INTALU_P_RR>, + PD, Sched<[WriteVecALU]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "extrq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, - VR128:$mask))]>, PD; + VR128:$mask))], IIC_SSE_INTALU_P_RR>, + PD, Sched<[WriteVecALU]>; def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, - imm:$len, imm:$idx))]>, XD; + imm:$len, imm:$idx))], IIC_SSE_INTALU_P_RR>, + XD, Sched<[WriteVecALU]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "insertq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, - VR128:$mask))]>, XD; + VR128:$mask))], IIC_SSE_INTALU_P_RR>, + XD, Sched<[WriteVecALU]>; } } // ExeDomain = SSEPackedInt @@ -7446,7 +7553,8 @@ def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, Sched<[WriteLoad]>, VEX, VEX_L; -let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX] in +let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], + ExeDomain = SSEPackedSingle in def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, @@ -7576,21 +7684,23 @@ multiclass avx_movmask_rm opc_rm, bits<8> opc_mr, string OpcodeStr, def rm : AVX8I, - VEX_4V; + [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))], + IIC_SSE_MASKMOV>, VEX_4V, Sched<[WriteLoad]>; def Yrm : AVX8I, - VEX_4V, VEX_L; + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))], + IIC_SSE_MASKMOV>, VEX_4V, VEX_L, Sched<[WriteLoad]>; def mr : AVX8I, VEX_4V; + [(IntSt addr:$dst, VR128:$src1, VR128:$src2)], IIC_SSE_MASKMOV>, + VEX_4V, Sched<[WriteStore]>; def Ymr : AVX8I, VEX_4V, VEX_L; + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)], IIC_SSE_MASKMOV>, + VEX_4V, VEX_L, Sched<[WriteStore]>; } let ExeDomain = SSEPackedSingle in @@ -7609,6 +7719,17 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values // + +let Sched = WriteFShuffle in +def AVX_VPERMILV : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; + +let Sched = WriteFShuffle in +def AVX_VPERMIL : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; + multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag i_frag, @@ -7700,16 +7821,20 @@ def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), // VZERO - Zero YMM registers // // Note, these instruction do not affect the YMM16-YMM31. +let SchedRW = [WriteSystem] in { let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { // Zero All YMM registers def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", - [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>, VEX_WIG; + [(int_x86_avx_vzeroall)], IIC_AVX_ZERO>, PS, VEX, VEX_L, + Requires<[HasAVX]>, VEX_WIG; // Zero Upper bits of YMM registers def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", - [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>, VEX_WIG; -} + [(int_x86_avx_vzeroupper)], IIC_AVX_ZERO>, PS, VEX, + Requires<[HasAVX]>, VEX_WIG; +} // Defs +} // SchedRW //===----------------------------------------------------------------------===// // Half precision conversion instructions @@ -8056,6 +8181,16 @@ let Predicates = [HasAVX1Only] in { // VPERM - Permute instructions // +let Sched = WriteFShuffle256 in +def AVX2_PERMV_F : OpndItins< + IIC_SSE_SHUFP, IIC_SSE_SHUFP +>; + +let Sched = WriteShuffle256 in +def AVX2_PERMV_I : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, ValueType OpVT, X86FoldableSchedWrite Sched, X86MemOperand memOp> { @@ -8186,20 +8321,23 @@ multiclass avx2_pmovmask, VEX_4V; + [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))], + IIC_SSE_MASKMOV>, VEX_4V, Sched<[WriteLoad]>; def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L; + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))], + IIC_SSE_MASKMOV>, VEX_4V, VEX_L, Sched<[WriteLoad]>; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; + [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)], IIC_SSE_MASKMOV>, + VEX_4V, Sched<[WriteStore]>; def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)], IIC_SSE_MASKMOV>, + VEX_4V, VEX_L, Sched<[WriteStore]>; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", @@ -8346,12 +8484,12 @@ let Predicates = [HasAVX2, NoVLX] in { (VPSRAVDYrm VR256:$src1, addr:$src2)>; } - - //===----------------------------------------------------------------------===// // VGATHER - GATHER Operations + +// FIXME: Improve scheduling of gather instructions. multiclass avx2_gather opc, string OpcodeStr, ValueType VTx, - ValueType VTy, PatFrag GatherNode128, + ValueType VTy, PatFrag GatherNode128, PatFrag GatherNode256, RegisterClass RC256, X86MemOperand memop128, X86MemOperand memop256, ValueType MTx = VTx, ValueType MTy = VTy> { @@ -8361,14 +8499,16 @@ multiclass avx2_gather opc, string OpcodeStr, ValueType VTx, "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), [(set (VTx VR128:$dst), (MTx VR128:$mask_wb), (GatherNode128 VR128:$src1, VR128:$mask, - vectoraddr:$src2))]>, VEX; + vectoraddr:$src2))]>, + VEX, Sched<[WriteLoad]>; def Yrm : AVX28I, VEX, VEX_L; + vectoraddr:$src2))]>, + VEX, VEX_L, Sched<[WriteLoad]>; } let Predicates = [UseAVX2] in { diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td index 41867099a6c5..bdf478600279 100644 --- a/lib/Target/X86/X86InstrSVM.td +++ b/lib/Target/X86/X86InstrSVM.td @@ -15,44 +15,44 @@ //===----------------------------------------------------------------------===// // SVM instructions +let SchedRW = [WriteSystem] in { // 0F 01 D9 -def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB; +def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", [], IIC_SVM>, TB; // 0F 01 DC -def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB; +def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", [], IIC_STGI>, TB; // 0F 01 DD -def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB; +def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", [], IIC_CLGI>, TB; // 0F 01 DE let Uses = [EAX] in -def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB; +def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", [], IIC_SKINIT>, TB; // 0F 01 D8 let Uses = [EAX] in def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), - "vmrun\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; + "vmrun\t{%eax|eax}", [], IIC_SVM>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), - "vmrun\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; + "vmrun\t{%rax|rax}", [], IIC_SVM>, TB, Requires<[In64BitMode]>; // 0F 01 DA let Uses = [EAX] in def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), - "vmload\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; + "vmload\t{%eax|eax}", [], IIC_SVM>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), - "vmload\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; + "vmload\t{%rax|rax}", [], IIC_SVM>, TB, Requires<[In64BitMode]>; // 0F 01 DB let Uses = [EAX] in def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), - "vmsave\t{%eax|eax}", []>, TB, Requires<[Not64BitMode]>; + "vmsave\t{%eax|eax}", [], IIC_SVM>, TB, Requires<[Not64BitMode]>; let Uses = [RAX] in def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), - "vmsave\t{%rax|rax}", []>, TB, Requires<[In64BitMode]>; + "vmsave\t{%rax|rax}", [], IIC_SVM>, TB, Requires<[In64BitMode]>; -let SchedRW = [WriteSystem] in { // 0F 01 DF let Uses = [EAX, ECX] in def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins), diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 44bcef6d98b7..43e1752f2df2 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -83,7 +83,8 @@ def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), OpSize32; def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t{%cl, $dst|$dst, cl}", - [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>, + Requires<[In64BitMode]>; } def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src), "shl{b}\t{$src, $dst|$dst, $src}", @@ -100,7 +101,7 @@ def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, u8imm:$src), def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src), "shl{q}\t{$src, $dst|$dst, $src}", [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; // Shift by 1 def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), @@ -118,7 +119,7 @@ def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } // SchedRW let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { @@ -183,7 +184,8 @@ def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), OpSize32; def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t{%cl, $dst|$dst, cl}", - [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>; + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>, + Requires<[In64BitMode]>; } def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src), "shr{b}\t{$src, $dst|$dst, $src}", @@ -200,7 +202,7 @@ def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, u8imm:$src), def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src), "shr{q}\t{$src, $dst|$dst, $src}", [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; // Shift by 1 def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), @@ -218,7 +220,7 @@ def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } // SchedRW let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { @@ -296,7 +298,7 @@ def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi64 addr:$dst), CL), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src), "sar{b}\t{$src, $dst|$dst, $src}", @@ -313,7 +315,7 @@ def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, u8imm:$src), def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src), "sar{q}\t{$src, $dst|$dst, $src}", [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; // Shift by 1 def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), @@ -331,7 +333,7 @@ def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t$dst", [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -418,9 +420,10 @@ def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, u8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t$dst", [], IIC_SR>; + "rcl{q}\t$dst", [], IIC_SR>, Requires<[In64BitMode]>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, u8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, + Requires<[In64BitMode]>; def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), "rcr{b}\t$dst", [], IIC_SR>; @@ -435,9 +438,10 @@ def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, u8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32; def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t$dst", [], IIC_SR>; + "rcr{q}\t$dst", [], IIC_SR>, Requires<[In64BitMode]>; def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>; + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, + Requires<[In64BitMode]>; } // Uses = [EFLAGS] let Uses = [CL, EFLAGS] in { @@ -448,7 +452,8 @@ def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), "rcl{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32; def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>; + "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, + Requires<[In64BitMode]>; def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>; @@ -457,7 +462,8 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32; def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>; + "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, + Requires<[In64BitMode]>; } // Uses = [CL, EFLAGS] } // SchedRW } // hasSideEffects = 0 @@ -532,7 +538,7 @@ def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t{%cl, $dst|$dst, cl}", [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1), "rol{b}\t{$src1, $dst|$dst, $src1}", @@ -549,7 +555,7 @@ def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, u8imm:$src1), def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1), "rol{q}\t{$src1, $dst|$dst, $src1}", [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; // Rotate by 1 def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), @@ -567,7 +573,7 @@ def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } // SchedRW let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { @@ -640,7 +646,7 @@ def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src), "ror{b}\t{$src, $dst|$dst, $src}", @@ -657,7 +663,7 @@ def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, u8imm:$src), def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src), "ror{q}\t{$src, $dst|$dst, $src}", [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; // Rotate by 1 def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), @@ -675,7 +681,7 @@ def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)], - IIC_SR>; + IIC_SR>, Requires<[In64BitMode]>; } // SchedRW diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index a399c6c462d4..1d1b9698daee 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -19,7 +19,8 @@ let Defs = [RAX, RDX] in TB; let Defs = [RAX, RCX, RDX] in - def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB; + def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)], + IIC_RDTSCP>, TB; // CPU flow control instructions @@ -154,13 +155,14 @@ def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), //===----------------------------------------------------------------------===// // Segment override instruction prefixes -def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; -def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; -def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; -def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; -def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; -def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; - +let SchedRW = [WriteNop] in { +def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", [], IIC_NOP>; +def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", [], IIC_NOP>; +def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", [], IIC_NOP>; +def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", [], IIC_NOP>; +def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", [], IIC_NOP>; +def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", [], IIC_NOP>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from segment registers. @@ -415,10 +417,10 @@ def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), // LLDT is not interpreted specially in 64-bit mode because there is no sign // extension. def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), - "sldt{q}\t$dst", [], IIC_SLDT>, TB; + "sldt{q}\t$dst", [], IIC_SLDT>, TB, Requires<[In64BitMode]>; let mayStore = 1 in def SLDT64m : RI<0x00, MRM0m, (outs), (ins i16mem:$dst), - "sldt{q}\t$dst", [], IIC_SLDT>, TB; + "sldt{q}\t$dst", [], IIC_SLDT>, TB, Requires<[In64BitMode]>; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), "lgdt{w}\t$src", [], IIC_LGDT>, TB, OpSize16, Requires<[Not64BitMode]>; @@ -488,24 +490,22 @@ let SchedRW = [WriteSystem], Predicates = [HasSHSTK] in{ def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src", [(int_x86_incsspd GR32:$src)]>, XS; def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src", - [(int_x86_incsspq GR64:$src)]>, XS, - Requires<[In64BitMode]>; + [(int_x86_incsspq GR64:$src)]>, XS; } // Defs SSP let Constraints = "$src = $dst" in { - def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src), + def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src), "rdsspd\t$dst", [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS; - def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src), - "rdsspq\t$dst", - [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS, - Requires<[In64BitMode]>; + def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src), + "rdsspq\t$dst", + [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS; } let Defs = [SSP] in { def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp", [(int_x86_saveprevssp)]>, XS; - def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src), + def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src), "rstorssp\t$src", [(int_x86_rstorssp addr:$src)]>, XS; } // Defs SSP @@ -513,18 +513,16 @@ let SchedRW = [WriteSystem], Predicates = [HasSHSTK] in{ def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "wrssd\t{$src, $dst|$dst, $src}", - [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8; + [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS; def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "wrssq\t{$src, $dst|$dst, $src}", - [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8, - Requires<[In64BitMode]>; + [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS; def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "wrussd\t{$src, $dst|$dst, $src}", [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD; - def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "wrussq\t{$src, $dst|$dst, $src}", - [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD, - Requires<[In64BitMode]>; + [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD; let Defs = [SSP] in { let Uses = [SSP] in { @@ -532,12 +530,17 @@ let SchedRW = [WriteSystem], Predicates = [HasSHSTK] in{ [(int_x86_setssbsy)]>, XS; } // Uses SSP - def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src), + def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src), "clrssbsy\t$src", [(int_x86_clrssbsy addr:$src)]>, XS; } // Defs SSP } // SchedRW && HasSHSTK +let Predicates = [HasIBT] in { + def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS; + def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS; +} // HasIBT + //===----------------------------------------------------------------------===// // XSAVE instructions let SchedRW = [WriteSystem] in { @@ -546,67 +549,60 @@ let Defs = [EDX, EAX], Uses = [ECX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; let Uses = [EDX, EAX, ECX] in - def XSETBV : I<0x01, MRM_D1, (outs), (ins), - "xsetbv", + def XSETBV : I<0x01, MRM_D1, (outs), (ins), + "xsetbv", [(int_x86_xsetbv ECX, EDX, EAX)]>, TB; } // HasXSAVE let Uses = [EDX, EAX] in { -let Predicates = [HasXSAVE] in { - def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), - "xsave\t$dst", - [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS; - def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), - "xsave64\t$dst", - [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; - def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstor\t$dst", - [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS; - def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstor64\t$dst", - [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; -} -let Predicates = [HasXSAVEOPT] in { - def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), - "xsaveopt\t$dst", - [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS; - def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), - "xsaveopt64\t$dst", - [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; -} -let Predicates = [HasXSAVEC] in { - def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), - "xsavec\t$dst", - [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB; - def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), - "xsavec64\t$dst", - [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; -} -let Predicates = [HasXSAVES] in { - def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), - "xsaves\t$dst", - [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB; - def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), - "xsaves64\t$dst", - [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; - def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), - "xrstors\t$dst", - [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB; - def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), - "xrstors64\t$dst", - [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; -} +def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), + "xsave\t$dst", + [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>; +def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), + "xsave64\t$dst", + [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>; +def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor\t$dst", + [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>; +def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), + "xrstor64\t$dst", + [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>; +def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), + "xsaveopt\t$dst", + [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>; +def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), + "xsaveopt64\t$dst", + [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>; +def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), + "xsavec\t$dst", + [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>; +def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst), + "xsavec64\t$dst", + [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>; +def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), + "xsaves\t$dst", + [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>; +def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst), + "xsaves64\t$dst", + [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>; +def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), + "xrstors\t$dst", + [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>; +def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst), + "xrstors64\t$dst", + [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>; } // Uses } // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions -let Defs = [RAX, RDI], Uses = [RDX, RDI] in +let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB; def : InstAlias<"xstorerng", (XSTORE)>; +let SchedRW = [WriteSystem] in { let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB; def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB; @@ -621,88 +617,118 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { } let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB; +} // SchedRW + //==-----------------------------------------------------------------------===// // PKU - enable protection key -let usesCustomInserter = 1 in { +let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def WRPKRU : PseudoI<(outs), (ins GR32:$src), [(int_x86_wrpkru GR32:$src)]>; def RDPKRU : PseudoI<(outs GR32:$dst), (ins), [(set GR32:$dst, (int_x86_rdpkru))]>; } +let SchedRW = [WriteSystem] in { let Defs = [EAX, EDX], Uses = [ECX] in - def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB; + def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", [], IIC_PKU>, TB; let Uses = [EAX, ECX, EDX] in - def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB; + def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", [], IIC_PKU>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // FS/GS Base Instructions -let Predicates = [HasFSGSBase, In64BitMode] in { +let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in { def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), "rdfsbase{l}\t$dst", - [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS; + [(set GR32:$dst, (int_x86_rdfsbase_32))], + IIC_SEGMENT_BASE_R>, XS; def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), "rdfsbase{q}\t$dst", - [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS; + [(set GR64:$dst, (int_x86_rdfsbase_64))], + IIC_SEGMENT_BASE_R>, XS; def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), "rdgsbase{l}\t$dst", - [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS; + [(set GR32:$dst, (int_x86_rdgsbase_32))], + IIC_SEGMENT_BASE_R>, XS; def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), "rdgsbase{q}\t$dst", - [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS; + [(set GR64:$dst, (int_x86_rdgsbase_64))], + IIC_SEGMENT_BASE_R>, XS; def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src), "wrfsbase{l}\t$src", - [(int_x86_wrfsbase_32 GR32:$src)]>, XS; + [(int_x86_wrfsbase_32 GR32:$src)], + IIC_SEGMENT_BASE_W>, XS; def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src), "wrfsbase{q}\t$src", - [(int_x86_wrfsbase_64 GR64:$src)]>, XS; + [(int_x86_wrfsbase_64 GR64:$src)], + IIC_SEGMENT_BASE_W>, XS; def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src), "wrgsbase{l}\t$src", - [(int_x86_wrgsbase_32 GR32:$src)]>, XS; + [(int_x86_wrgsbase_32 GR32:$src)], IIC_SEGMENT_BASE_W>, XS; def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src), "wrgsbase{q}\t$src", - [(int_x86_wrgsbase_64 GR64:$src)]>, XS; + [(int_x86_wrgsbase_64 GR64:$src)], + IIC_SEGMENT_BASE_W>, XS; } //===----------------------------------------------------------------------===// // INVPCID Instruction +let SchedRW = [WriteSystem] in { def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invpcid\t{$src2, $src1|$src1, $src2}", [], IIC_INVPCID>, T8PD, Requires<[Not64BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invpcid\t{$src2, $src1|$src1, $src2}", [], IIC_INVPCID>, T8PD, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // SMAP Instruction -let Defs = [EFLAGS] in { - def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB; - def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB; +let Defs = [EFLAGS], SchedRW = [WriteSystem] in { + def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", [], IIC_SMAP>, TB; + def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", [], IIC_SMAP>, TB; } //===----------------------------------------------------------------------===// // SMX Instruction +let SchedRW = [WriteSystem] in { let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in { - def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB; -} + def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", [], IIC_SMX>, TB; +} // Uses, Defs +} // SchedRW //===----------------------------------------------------------------------===// // RDPID Instruction -def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins), - "rdpid\t$src", []>, XS, - Requires<[Not64BitMode]>; -def RDPID64 : I<0xC7, MRM7r, (outs GR64:$src), (ins), - "rdpid\t$src", []>, XS, - Requires<[In64BitMode]>; +let SchedRW = [WriteSystem] in { +def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins), + "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))], IIC_RDPID>, XS, + Requires<[Not64BitMode, HasRDPID]>; +def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), + "rdpid\t$dst", [], IIC_RDPID>, XS, + Requires<[In64BitMode, HasRDPID]>; +} // SchedRW + +let Predicates = [In64BitMode, HasRDPID] in { + // Due to silly instruction definition, we have to compensate for the + // instruction outputing a 64-bit register. + def : Pat<(int_x86_rdpid), + (EXTRACT_SUBREG (RDPID64), sub_32bit)>; +} + //===----------------------------------------------------------------------===// // PTWRITE Instruction +let SchedRW = [WriteSystem] in { + def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst), - "ptwrite{l}\t$dst", []>, XS; + "ptwrite{l}\t$dst", [], IIC_PTWRITE>, XS; def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst), - "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; + "ptwrite{q}\t$dst", [], IIC_PTWRITE>, XS, + Requires<[In64BitMode]>; def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst), - "ptwrite{l}\t$dst", []>, XS; + "ptwrite{l}\t$dst", [], IIC_PTWRITE>, XS; def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), - "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; + "ptwrite{q}\t$dst", [], IIC_PTWRITE>, XS, + Requires<[In64BitMode]>; +} // SchedRW diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index 61aac58a491f..10c6eef78639 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -18,6 +18,8 @@ def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, [SDNPHasChain, SDNPSideEffect]>; +let SchedRW = [WriteSystem] in { + let usesCustomInserter = 1 in def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>, @@ -45,11 +47,14 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins), def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; +} // SchedRW // HLE prefixes +let SchedRW = [WriteSystem] in { let isAsmParserOnly = 1 in { def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>; def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>; } +} // SchedRW diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 273ad24e84ba..4bb2c204b368 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -15,56 +15,66 @@ //===----------------------------------------------------------------------===// // VMX instructions +let SchedRW = [WriteSystem] in { // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invept\t{$src2, $src1|$src1, $src2}", [], IIC_VMX>, T8PD, Requires<[Not64BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invept\t{$src2, $src1|$src1, $src2}", [], IIC_VMX>, T8PD, Requires<[In64BitMode]>; + // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invvpid\t{$src2, $src1|$src1, $src2}", [], IIC_VMX>, T8PD, Requires<[Not64BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD, + "invvpid\t{$src2, $src1|$src1, $src2}", [], IIC_VMX>, T8PD, Requires<[In64BitMode]>; + // 0F 01 C1 -def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", [], IIC_VMX>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, PD; + // OF 01 D4 -def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB; +def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", [], IIC_VMX>, TB; + // 0F 01 C2 -def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", [], IIC_VMX>, TB; + // 0F 01 C3 -def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", [], IIC_VMX>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmptrld\t$vmcs", []>, PS; + "vmptrld\t$vmcs", [], IIC_VMX>, PS; def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs), - "vmptrst\t$vmcs", []>, PS; + "vmptrst\t$vmcs", [], IIC_VMX>, PS; def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmread{q}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[In64BitMode]>; def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmread{l}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[Not64BitMode]>; + let mayStore = 1 in { def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmread{q}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[In64BitMode]>; def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; -} + "vmread{l}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[Not64BitMode]>; +} // mayStore + def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmwrite{q}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[In64BitMode]>; def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; + "vmwrite{l}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[Not64BitMode]>; + let mayLoad = 1 in { def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; + "vmwrite{q}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[In64BitMode]>; def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; -} + "vmwrite{l}\t{$src, $dst|$dst, $src}", [], IIC_VMX>, PS, Requires<[Not64BitMode]>; +} // mayLoad + // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t$vmxon", []>, XS; - +} // SchedRW diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td index 7e2195cf93aa..b2ddfa89debe 100644 --- a/lib/Target/X86/X86InstrVecCompiler.td +++ b/lib/Target/X86/X86InstrVecCompiler.td @@ -217,13 +217,13 @@ let Predicates = [HasVLX] in { sub_xmm>; defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR256X, v2i64, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64, v4i64, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR256X, v4i32, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32, v8i32, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR256X, v8i16, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16, v16i16, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR256X, v16i8, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8, v32i8, sub_xmm>; // Special patterns for storing subvector extracts of lower 128-bits of 512. @@ -232,13 +232,13 @@ let Predicates = [HasVLX] in { sub_xmm>; defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR512, v2i64, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64, v8i64, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR512, v4i32, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32, v16i32, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR512, v8i16, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16, v32i16, sub_xmm>; - defm : subvector_store_lowering<"DQA32Z128", "DQU32Z128", VR512, v16i8, + defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8, v64i8, sub_xmm>; // Special patterns for storing subvector extracts of lower 256-bits of 512. @@ -247,13 +247,13 @@ let Predicates = [HasVLX] in { sub_ymm>; defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32, sub_ymm>; - defm : subvector_store_lowering<"DQA32Z256", "DQU32Z256", VR512, v4i64, + defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64, v8i64, sub_ymm>; - defm : subvector_store_lowering<"DQA32Z256", "DQU32Z256", VR512, v8i32, + defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32, v16i32, sub_ymm>; - defm : subvector_store_lowering<"DQA32Z256", "DQU32Z256", VR512, v16i16, + defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16, v32i16, sub_ymm>; - defm : subvector_store_lowering<"DQA32Z256", "DQU32Z256", VR512, v32i8, + defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8, v64i8, sub_ymm>; } @@ -460,16 +460,16 @@ let Predicates = [HasAVX512] in { (COPY_TO_REGCLASS VK8:$src, VK16)>; } -let Predicates = [HasVLX] in { - def : Pat<(v4i1 (insert_subvector (v4i1 immAllZerosV), - maskzeroupperv2i1:$src, (iPTR 0))), - (COPY_TO_REGCLASS VK2:$src, VK4)>; +let Predicates = [HasVLX, HasDQI] in { def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), maskzeroupperv2i1:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK8)>; def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), maskzeroupperv4i1:$src, (iPTR 0))), (COPY_TO_REGCLASS VK4:$src, VK8)>; +} + +let Predicates = [HasVLX] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), maskzeroupperv2i1:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK16)>; @@ -495,9 +495,91 @@ let Predicates = [HasBWI, HasVLX] in { // If the bits are not zero we have to fall back to explicitly zeroing by // using shifts. -let Predicates = [HasAVX512, NoVLX] in { +let Predicates = [HasAVX512] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), + (i8 14)), (i8 14))>; + + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), + (i8 12)), (i8 12))>; +} + +let Predicates = [HasAVX512, NoDQI] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16), (i8 8)), (i8 8))>; } + +let Predicates = [HasDQI] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; + + def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), + (i8 6)), (i8 6))>; + def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8), + (i8 4)), (i8 4))>; +} + +let Predicates = [HasBWI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v16i1 VK16:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>; + + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v16i1 VK16:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v32i1 VK32:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>; +} + +let Predicates = [HasBWI, NoDQI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32), + (i8 24)), (i8 24))>; + + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64), + (i8 56)), (i8 56))>; +} + +let Predicates = [HasBWI, HasDQI] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>; + + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v8i1 VK8:$mask), (iPTR 0))), + (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>; +} + +let Predicates = [HasBWI, HasVLX] in { + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32), + (i8 30)), (i8 30))>; + def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32), + (i8 28)), (i8 28))>; + + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64), + (i8 62)), (i8 62))>; + def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64), + (i8 60)), (i8 60))>; +} diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 383ffbffb395..c4b8e3e90d29 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -18,7 +18,7 @@ multiclass xop2op opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rm : IXOP, XOP, - Sched<[WritePHAdd, ReadAfterLd]>; + Sched<[WritePHAddLd, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { @@ -48,7 +48,7 @@ multiclass xop2opsld opc, string OpcodeStr, Intrinsic Int, def rm : IXOP, XOP, - Sched<[WriteFAdd, ReadAfterLd]>; + Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass xop2op128 opc, string OpcodeStr, Intrinsic Int, @@ -59,7 +59,7 @@ multiclass xop2op128 opc, string OpcodeStr, Intrinsic Int, def rm : IXOP, XOP, - Sched<[WriteFAdd, ReadAfterLd]>; + Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass xop2op256 opc, string OpcodeStr, Intrinsic Int, @@ -70,7 +70,7 @@ multiclass xop2op256 opc, string OpcodeStr, Intrinsic Int, def rmY : IXOP, XOP, VEX_L, - Sched<[WriteFAdd, ReadAfterLd]>; + Sched<[WriteFAddLd, ReadAfterLd]>; } let ExeDomain = SSEPackedSingle in { @@ -101,14 +101,14 @@ multiclass xop3op opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; def mr : IXOP, - XOP, Sched<[WriteVarVecShift, ReadAfterLd]>; + XOP, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>, - XOP, Sched<[WriteVecShift, ReadAfterLd]>; + XOP, Sched<[WriteVecShiftLd, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { @@ -172,7 +172,7 @@ multiclass xop4opm2 opc, string OpcodeStr, Intrinsic Int> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)), - VR128:$src3))]>, XOP_4V, Sched<[WriteVecIMul, ReadAfterLd]>; + VR128:$src3))]>, XOP_4V, Sched<[WriteVecIMulLd, ReadAfterLd]>; } let ExeDomain = SSEPackedInt in { @@ -221,7 +221,7 @@ multiclass xopvpcom opc, string Suffix, SDNode OpNode, ValueType vt128> [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), imm:$cc)))]>, - XOP_4V, Sched<[WriteVecALU, ReadAfterLd]>; + XOP_4V, Sched<[WriteVecALULd, ReadAfterLd]>; def mi : IXOPi8 opc, string Suffix, SDNode OpNode, ValueType vt128> (vt128 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))), imm:$cc)))]>, - XOP_4V, Sched<[WriteVecALU, ReadAfterLd]>; + XOP_4V, Sched<[WriteVecALULd, ReadAfterLd]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { def ri_alt : IXOPi8, XOP_4V, Sched<[WriteVecALU, ReadAfterLd]>; + []>, XOP_4V, Sched<[WriteVecALULd, ReadAfterLd]>; let mayLoad = 1 in def mi_alt : IXOPi8, XOP_4V, Sched<[WriteVecALU, ReadAfterLd]>; + []>, XOP_4V, Sched<[WriteVecALULd, ReadAfterLd]>; } } @@ -274,7 +274,7 @@ multiclass xop4op opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>, - XOP_4V, VEX_W, Sched<[WriteShuffle, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd]>; def rmr : IXOPi8Reg opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[WriteShuffle, ReadAfterLd]>; + XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rrr_REV : IXOPi8Reg opc, string OpcodeStr, RegisterClass RC, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1), (X86andnp (load addr:$src3), RC:$src2))))]>, - XOP_4V, VEX_W, Sched<[WriteShuffle, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd]>; def rmr : IXOPi8Reg, - XOP_4V, Sched<[WriteShuffle, ReadAfterLd]>; + XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rrr_REV : IXOPi8Reg Opc, string OpcodeStr, RegisterClass RC, (VT (X86vpermil2 RC:$src1, RC:$src2, (bitconvert (IntLdFrag addr:$src3)), (i8 imm:$src4))))]>, VEX_W, - Sched<[WriteFShuffle, ReadAfterLd]>; + Sched<[WriteFShuffleLd, ReadAfterLd]>; def mr : IXOP5 Opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), RC:$src3, (i8 imm:$src4))))]>, - Sched<[WriteFShuffle, ReadAfterLd]>; + Sched<[WriteFShuffleLd, ReadAfterLd]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP5EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo); + OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), + EnablePrintSchedInfo && + !(Inst.getFlags() & X86::NO_SCHED_INFO)); SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); } @@ -875,6 +874,10 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI, // address is to far away. (TODO: support non-relative addressing) break; case MachineOperand::MO_Register: + // FIXME: Add retpoline support and remove this. + if (Subtarget->useRetpoline()) + report_fatal_error("Lowering register statepoints with retpoline not " + "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); CallOpcode = X86::CALL64r; break; @@ -961,7 +964,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, // This is an optimization that lets us get away without emitting a nop in // many cases. // - // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %R9) takes two + // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two // bytes too, so the check on MinSize is important. MCI.setOpcode(X86::PUSH64rmr); } else { @@ -1029,6 +1032,10 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); + // FIXME: Add retpoline support and remove this. + if (Subtarget->useRetpoline()) + report_fatal_error( + "Lowering patchpoint with retpoline not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } @@ -2003,6 +2010,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); + if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment)) + TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO); // Stackmap shadows cannot include branch targets, so we can count the bytes // in a call towards the shadow, but must ensure that the no thread returns diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index cc136866c479..1fc6f07b79fa 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -568,6 +568,7 @@ MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, if (AddrDispShift != 0) Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift, + DIExpression::NoDeref, DIExpression::WithStackValue); // Replace DBG_VALUE instruction with modified version. @@ -671,7 +672,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - if (DisableX86LEAOpt || skipFunction(*MF.getFunction())) + if (DisableX86LEAOpt || skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); @@ -695,7 +696,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { // Remove redundant address calculations. Do it only for -Os/-Oz since only // a code size gain is expected from this part of the pass. - if (MF.getFunction()->optForSize()) + if (MF.getFunction().optForSize()) Changed |= removeRedundantAddrCalc(LEAs); } diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 9b7732c1db88..1da0fad8b6cf 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include #include "X86.h" #include "X86InstrInfo.h" @@ -21,7 +20,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Function.h" @@ -98,10 +96,10 @@ FunctionPass *llvm::createX86PadShortFunctions() { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// NOOP instructions before early exits. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; - if (MF.getFunction()->optForSize()) { + if (MF.getFunction().optForSize()) { return false; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5a2230d394f9..f979cc51da4f 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -15,24 +15,19 @@ #include "X86RegisterInfo.h" #include "X86FrameLowering.h" -#include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" -#include "X86TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" @@ -80,7 +75,7 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT) bool X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - // ExecutionDepsFixer and PostRAScheduler require liveness. + // ExecutionDomainFix, BreakFalseDeps and PostRAScheduler require liveness. return true; } @@ -223,13 +218,13 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, const TargetRegisterClass * X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const { - const Function *F = MF.getFunction(); - if (IsWin64 || (F && F->getCallingConv() == CallingConv::Win64)) + const Function &F = MF.getFunction(); + if (IsWin64 || (F.getCallingConv() == CallingConv::Win64)) return &X86::GR64_TCW64RegClass; else if (Is64Bit) return &X86::GR64_TCRegClass; - bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); + bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE); if (hasHipeCC) return &X86::GR32RegClass; return &X86::GR32_TCRegClass; @@ -271,17 +266,17 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "MachineFunction required"); const X86Subtarget &Subtarget = MF->getSubtarget(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); bool HasSSE = Subtarget.hasSSE1(); bool HasAVX = Subtarget.hasAVX(); bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); - CallingConv::ID CC = F->getCallingConv(); + CallingConv::ID CC = F.getCallingConv(); // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling // convention because it has the CSR list. - if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) + if (MF->getFunction().hasFnAttribute("no_caller_saved_registers")) CC = CallingConv::X86_INTR; switch (CC) { @@ -367,7 +362,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Is64Bit) { bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && - F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_SaveList : CSR_64_SwiftError_SaveList; @@ -385,7 +380,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo()->isSplitCSR()) return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; return nullptr; @@ -478,9 +473,9 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check // callsEHReturn(). if (Is64Bit) { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); bool IsSwiftCC = Subtarget.getTargetLowering()->supportSwiftError() && - F->getAttributes().hasAttrSomewhere(Attribute::SwiftError); + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError); if (IsSwiftCC) return IsWin64 ? CSR_Win64_SwiftError_RegMask : CSR_64_SwiftError_RegMask; return IsWin64 ? CSR_Win64_RegMask : CSR_64_RegMask; @@ -524,7 +519,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Set the base-pointer register and its aliases as reserved if needed. if (hasBasePointer(MF)) { - CallingConv::ID CC = MF.getFunction()->getCallingConv(); + CallingConv::ID CC = MF.getFunction().getCallingConv(); const uint32_t *RegMask = getCallPreservedMask(MF, CC); if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) report_fatal_error( diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index b6eb37d5f0e5..ca508255c365 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -360,7 +360,7 @@ def GR64 : RegisterClass<"X86", [i64], 64, def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>; // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>; +def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>; // Control registers. def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>; @@ -400,11 +400,6 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, def GR64_NOREX : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>; -// GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit -// mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs -// to clear upper 32-bits of RAX so is not a NOP. -def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)>; - // GR32_NOSP - GR32 registers except ESP. def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>; diff --git a/lib/Target/X86/X86RetpolineThunks.cpp b/lib/Target/X86/X86RetpolineThunks.cpp new file mode 100644 index 000000000000..6b4bc8a4e1b3 --- /dev/null +++ b/lib/Target/X86/X86RetpolineThunks.cpp @@ -0,0 +1,276 @@ +//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Pass that injects an MI thunk implementing a "retpoline". This is +/// a RET-implemented trampoline that is used to lower indirect calls in a way +/// that prevents speculation on some x86 processors and can be used to mitigate +/// security vulnerabilities due to targeted speculative execution and side +/// channels such as CVE-2017-5715. +/// +/// TODO(chandlerc): All of this code could use better comments and +/// documentation. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-retpoline-thunks" + +namespace { +class X86RetpolineThunks : public ModulePass { +public: + static char ID; + + X86RetpolineThunks() : ModulePass(ID) {} + + StringRef getPassName() const override { return "X86 Retpoline Thunks"; } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + } + +private: + MachineModuleInfo *MMI; + const TargetMachine *TM; + bool Is64Bit; + const X86Subtarget *STI; + const X86InstrInfo *TII; + + Function *createThunkFunction(Module &M, StringRef Name); + void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg); + void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB); + void createThunk(Module &M, StringRef NameSuffix, + Optional Reg = None); +}; + +} // end anonymous namespace + +ModulePass *llvm::createX86RetpolineThunksPass() { + return new X86RetpolineThunks(); +} + +char X86RetpolineThunks::ID = 0; + +bool X86RetpolineThunks::runOnModule(Module &M) { + DEBUG(dbgs() << getPassName() << '\n'); + + auto *TPC = getAnalysisIfAvailable(); + assert(TPC && "X86-specific target pass should not be run without a target " + "pass config!"); + + MMI = &getAnalysis(); + TM = &TPC->getTM(); + Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64; + + // Only add a thunk if we have at least one function that has the retpoline + // feature enabled in its subtarget. + // FIXME: Conditionalize on indirect calls so we don't emit a thunk when + // nothing will end up calling it. + // FIXME: It's a little silly to look at every function just to enumerate + // the subtargets, but eventually we'll want to look at them for indirect + // calls, so maybe this is OK. + if (!llvm::any_of(M, [&](const Function &F) { + // Save the subtarget we find for use in emitting the subsequent + // thunk. + STI = &TM->getSubtarget(F); + return STI->useRetpoline() && !STI->useRetpolineExternalThunk(); + })) + return false; + + // If we have a relevant subtarget, get the instr info as well. + TII = STI->getInstrInfo(); + + if (Is64Bit) { + // __llvm_retpoline_r11: + // callq .Lr11_call_target + // .Lr11_capture_spec: + // pause + // lfence + // jmp .Lr11_capture_spec + // .align 16 + // .Lr11_call_target: + // movq %r11, (%rsp) + // retq + + createThunk(M, "r11", X86::R11); + } else { + // For 32-bit targets we need to emit a collection of thunks for various + // possible scratch registers as well as a fallback that is used when + // there are no scratch registers and assumes the retpoline target has + // been pushed. + // __llvm_retpoline_eax: + // calll .Leax_call_target + // .Leax_capture_spec: + // pause + // jmp .Leax_capture_spec + // .align 16 + // .Leax_call_target: + // movl %eax, (%esp) # Clobber return addr + // retl + // + // __llvm_retpoline_ecx: + // ... # Same setup + // movl %ecx, (%esp) + // retl + // + // __llvm_retpoline_edx: + // ... # Same setup + // movl %edx, (%esp) + // retl + // + // This last one is a bit more special and so needs a little extra + // handling. + // __llvm_retpoline_push: + // calll .Lpush_call_target + // .Lpush_capture_spec: + // pause + // lfence + // jmp .Lpush_capture_spec + // .align 16 + // .Lpush_call_target: + // # Clear pause_loop return address. + // addl $4, %esp + // # Top of stack words are: Callee, RA. Exchange Callee and RA. + // pushl 4(%esp) # Push callee + // pushl 4(%esp) # Push RA + // popl 8(%esp) # Pop RA to final RA + // popl (%esp) # Pop callee to next top of stack + // retl # Ret to callee + createThunk(M, "eax", X86::EAX); + createThunk(M, "ecx", X86::ECX); + createThunk(M, "edx", X86::EDX); + createThunk(M, "push"); + } + + return true; +} + +Function *X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) { + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + return F; +} + +void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB, + unsigned Reg) { + const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP; + addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0) + .addReg(Reg); +} +void X86RetpolineThunks::insert32BitPushReturnAddrClobber( + MachineBasicBlock &MBB) { + // The instruction sequence we use to replace the return address without + // a scratch register is somewhat complicated: + // # Clear capture_spec from return address. + // addl $4, %esp + // # Top of stack words are: Callee, RA. Exchange Callee and RA. + // pushl 4(%esp) # Push callee + // pushl 4(%esp) # Push RA + // popl 8(%esp) # Pop RA to final RA + // popl (%esp) # Pop callee to next top of stack + // retl # Ret to callee + BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP) + .addReg(X86::ESP) + .addImm(4); + addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP, + false, 4); + addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP, + false, 4); + addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP, + false, 8); + addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP, + false, 0); +} + +void X86RetpolineThunks::createThunk(Module &M, StringRef NameSuffix, + Optional Reg) { + Function &F = + *createThunkFunction(M, (Twine("__llvm_retpoline_") + NameSuffix).str()); + MachineFunction &MF = MMI->getOrCreateMachineFunction(F); + + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + + BasicBlock &OrigEntryBB = F.getEntryBlock(); + MachineBasicBlock *Entry = MF.CreateMachineBasicBlock(&OrigEntryBB); + MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(&OrigEntryBB); + MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(&OrigEntryBB); + + MF.push_back(Entry); + MF.push_back(CaptureSpec); + MF.push_back(CallTarget); + + const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + + BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addMBB(CallTarget); + Entry->addSuccessor(CallTarget); + Entry->addSuccessor(CaptureSpec); + CallTarget->setHasAddressTaken(); + + // In the capture loop for speculation, we want to stop the processor from + // speculating as fast as possible. On Intel processors, the PAUSE instruction + // will block speculation without consuming any execution resources. On AMD + // processors, the PAUSE instruction is (essentially) a nop, so we also use an + // LFENCE instruction which they have advised will stop speculation as well + // with minimal resource utilization. We still end the capture with a jump to + // form an infinite loop to fully guarantee that no matter what implementation + // of the x86 ISA, speculating this code path never escapes. + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec); + CaptureSpec->setHasAddressTaken(); + CaptureSpec->addSuccessor(CaptureSpec); + + CallTarget->setAlignment(4); + if (Reg) { + insertRegReturnAddrClobber(*CallTarget, *Reg); + } else { + assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!"); + insert32BitPushReturnAddrClobber(*CallTarget); + } + BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc)); +} diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td index 7fef01c72aaf..3f707822f761 100755 --- a/lib/Target/X86/X86SchedBroadwell.td +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -120,6 +120,9 @@ def : WriteRes; // These can often bypass execution ports completely. def : WriteRes; +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. defm : BWWriteResPair; @@ -403,18 +406,18 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; def: InstRW<[BWWriteResGroup3], (instregex "MOV64toPQIrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVAPDrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPDrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPSrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVDDUPrr")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVDI2PDIrr")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVHLPSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVLHPSrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSDrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVSHDUPrr")>; def: InstRW<[BWWriteResGroup3], (instregex "MOVSLDUPrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVSSrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSSrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPDrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPSrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "ORPDrr")>; def: InstRW<[BWWriteResGroup3], (instregex "ORPSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "PACKSSDWrr")>; @@ -466,25 +469,25 @@ def: InstRW<[BWWriteResGroup3], (instregex "VANDPSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VBROADCASTSSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VINSERTPSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOV64toPQIrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPYrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVDI2PDIrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVHLPSrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVLHPSrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSDrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPYrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPYrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPrr")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVSSrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSSrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSrr(_REV)?")>; def: InstRW<[BWWriteResGroup3], (instregex "VORPDYrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VORPDrr")>; def: InstRW<[BWWriteResGroup3], (instregex "VORPSYrr")>; @@ -590,13 +593,11 @@ def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)rr(_REV?)")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADCX32rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADCX64rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADOX32rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "ADOX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)rr(_REV)?")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC8rr(_REV)?")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADCX(32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADOX(32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8")>; def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "BTC(16|32|64)ri8")>; @@ -606,109 +607,44 @@ def: InstRW<[BWWriteResGroup6], (instregex "BTR(16|32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)ri8")>; def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "CDQ")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "CQO")>; -def: InstRW<[BWWriteResGroup6], (instregex "JAE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JAE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JA_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JA_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JBE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JBE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JB_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JB_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JGE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JGE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JG_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JG_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JLE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JLE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JL_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JL_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4")>; def: InstRW<[BWWriteResGroup6], (instregex "JMP_1")>; def: InstRW<[BWWriteResGroup6], (instregex "JMP_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNE_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNE_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNO_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNO_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNP_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNP_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNS_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JNS_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JO_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JO_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JP_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JP_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "JS_1")>; -def: InstRW<[BWWriteResGroup6], (instregex "JS_4")>; -def: InstRW<[BWWriteResGroup6], (instregex "RORX32ri")>; -def: InstRW<[BWWriteResGroup6], (instregex "RORX64ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "RORX(32|64)ri")>; def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)ri")>; def: InstRW<[BWWriteResGroup6], (instregex "SAR8r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SAR8ri")>; -def: InstRW<[BWWriteResGroup6], (instregex "SARX32rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SARX64rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)rr(_REV?)")>; -def: InstRW<[BWWriteResGroup6], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETAEr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETBr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETEr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETGEr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETGr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETLEr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETLr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETNEr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETNOr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETNPr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETNSr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETOr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETPr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SETSr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SARX(32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)rr(_REV)?")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB8rr(_REV)?")>; +def: InstRW<[BWWriteResGroup6], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r")>; def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)ri")>; def: InstRW<[BWWriteResGroup6], (instregex "SHL8r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SHL8ri")>; -def: InstRW<[BWWriteResGroup6], (instregex "SHLX32rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SHLX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHLX(32|64)rr")>; def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)ri")>; def: InstRW<[BWWriteResGroup6], (instregex "SHR8r1")>; def: InstRW<[BWWriteResGroup6], (instregex "SHR8ri")>; -def: InstRW<[BWWriteResGroup6], (instregex "SHRX32rr")>; -def: InstRW<[BWWriteResGroup6], (instregex "SHRX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHRX(32|64)rr")>; def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup7], (instregex "ANDN32rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "ANDN64rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSI32rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSI64rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK32rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK64rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSR32rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BLSR64rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BZHI32rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "BZHI64rr")>; -def: InstRW<[BWWriteResGroup7], (instregex "LEA(16|32|64)r")>; +def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSI(32|64)rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK(32|64)rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSR(32|64)rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BZHI(32|64)rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "LEA(16|32|64)(_32)?r")>; def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSBrr64")>; def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSDrr64")>; def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSWrr64")>; @@ -881,13 +817,13 @@ def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> { def: InstRW<[BWWriteResGroup8], (instregex "BLENDPDrri")>; def: InstRW<[BWWriteResGroup8], (instregex "BLENDPSrri")>; def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr(_REV)?")>; def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDNirr")>; def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDirr")>; def: InstRW<[BWWriteResGroup8], (instregex "MMX_PORirr")>; def: InstRW<[BWWriteResGroup8], (instregex "MMX_PXORirr")>; -def: InstRW<[BWWriteResGroup8], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[BWWriteResGroup8], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQArr(_REV)?")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQUrr(_REV)?")>; def: InstRW<[BWWriteResGroup8], (instregex "MOVPQI2QIrr")>; def: InstRW<[BWWriteResGroup8], (instregex "PANDNrr")>; def: InstRW<[BWWriteResGroup8], (instregex "PANDrr")>; @@ -897,10 +833,10 @@ def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDYrri")>; def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDrri")>; def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSYrri")>; def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSrri")>; -def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQAYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQArr(_REV)?")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUYrr(_REV)?")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUrr(_REV)?")>; def: InstRW<[BWWriteResGroup8], (instregex "VMOVPQI2QIrr")>; def: InstRW<[BWWriteResGroup8], (instregex "VMOVZPQILo2PQIrr")>; def: InstRW<[BWWriteResGroup8], (instregex "VPANDNYrr")>; @@ -919,34 +855,33 @@ def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "ADD8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "ADD8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8rr(_REV)?")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "AND8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "AND8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "AND8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "CBW")>; def: InstRW<[BWWriteResGroup9], (instregex "CLC")>; def: InstRW<[BWWriteResGroup9], (instregex "CMC")>; -def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "CMP8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "CMP8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "CWDE")>; def: InstRW<[BWWriteResGroup9], (instregex "DEC(16|32|64)r")>; def: InstRW<[BWWriteResGroup9], (instregex "DEC8r")>; def: InstRW<[BWWriteResGroup9], (instregex "INC(16|32|64)r")>; def: InstRW<[BWWriteResGroup9], (instregex "INC8r")>; def: InstRW<[BWWriteResGroup9], (instregex "LAHF")>; -def: InstRW<[BWWriteResGroup9], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri_alt")>; -def: InstRW<[BWWriteResGroup9], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV(16|32|64)rr(_REV)?")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri(_alt)?")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr16")>; def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr32")>; def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr8")>; @@ -957,11 +892,11 @@ def: InstRW<[BWWriteResGroup9], (instregex "NEG8r")>; def: InstRW<[BWWriteResGroup9], (instregex "NOOP")>; def: InstRW<[BWWriteResGroup9], (instregex "NOT(16|32|64)r")>; def: InstRW<[BWWriteResGroup9], (instregex "NOT8r")>; -def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "OR8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "OR8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "OR8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "SAHF")>; def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m")>; def: InstRW<[BWWriteResGroup9], (instregex "SIDT64m")>; @@ -969,22 +904,22 @@ def: InstRW<[BWWriteResGroup9], (instregex "SLDT64m")>; def: InstRW<[BWWriteResGroup9], (instregex "SMSW16m")>; def: InstRW<[BWWriteResGroup9], (instregex "STC")>; def: InstRW<[BWWriteResGroup9], (instregex "STRm")>; -def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "SUB8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "SUB8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "SYSCALL")>; def: InstRW<[BWWriteResGroup9], (instregex "TEST(16|32|64)rr")>; def: InstRW<[BWWriteResGroup9], (instregex "TEST8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "TEST8ri")>; def: InstRW<[BWWriteResGroup9], (instregex "TEST8rr")>; def: InstRW<[BWWriteResGroup9], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)ri8")>; -def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)rr(_REV)?")>; def: InstRW<[BWWriteResGroup9], (instregex "XOR8i8")>; def: InstRW<[BWWriteResGroup9], (instregex "XOR8ri")>; -def: InstRW<[BWWriteResGroup9], (instregex "XOR8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8rr(_REV)?")>; def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> { let Latency = 1; @@ -1015,6 +950,7 @@ def: InstRW<[BWWriteResGroup10], (instregex "MOVNTPSmr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVPDI2DImr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVPQI2QImr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVPQIto64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVSDmr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVSSmr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVUPDmr")>; def: InstRW<[BWWriteResGroup10], (instregex "MOVUPSmr")>; @@ -1175,8 +1111,7 @@ def BWWriteResGroup19 : SchedWriteRes<[BWPort06,BWPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup19], (instregex "BEXTR32rr")>; -def: InstRW<[BWWriteResGroup19], (instregex "BEXTR64rr")>; +def: InstRW<[BWWriteResGroup19], (instregex "BEXTR(32|64)rr")>; def: InstRW<[BWWriteResGroup19], (instregex "BSWAP(16|32|64)r")>; def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { @@ -1186,14 +1121,12 @@ def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { } def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8")>; def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri")>; -def: InstRW<[BWWriteResGroup20], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup20], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup20], (instregex "CMOV(A|BE)(16|32|64)rr")>; def: InstRW<[BWWriteResGroup20], (instregex "CWD")>; def: InstRW<[BWWriteResGroup20], (instregex "JRCXZ")>; def: InstRW<[BWWriteResGroup20], (instregex "SBB8i8")>; def: InstRW<[BWWriteResGroup20], (instregex "SBB8ri")>; -def: InstRW<[BWWriteResGroup20], (instregex "SETAr")>; -def: InstRW<[BWWriteResGroup20], (instregex "SETBEr")>; +def: InstRW<[BWWriteResGroup20], (instregex "SET(A|BE)r")>; def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> { let Latency = 2; @@ -1225,20 +1158,7 @@ def BWWriteResGroup23 : SchedWriteRes<[BWPort4,BWPort237,BWPort06]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup23], (instregex "SETAEm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETBm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETEm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETGEm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETGm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETLEm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETLm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETNEm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETNOm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETNPm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETNSm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETOm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETPm")>; -def: InstRW<[BWWriteResGroup23], (instregex "SETSm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; def BWWriteResGroup24 : SchedWriteRes<[BWPort4,BWPort237,BWPort15]> { let Latency = 2; @@ -1252,8 +1172,7 @@ def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)r")>; -def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)r(mr)?")>; def: InstRW<[BWWriteResGroup25], (instregex "PUSH64i8")>; def: InstRW<[BWWriteResGroup25], (instregex "STOSB")>; def: InstRW<[BWWriteResGroup25], (instregex "STOSL")>; @@ -1293,29 +1212,28 @@ def: InstRW<[BWWriteResGroup27], (instregex "BSF(16|32|64)rr")>; def: InstRW<[BWWriteResGroup27], (instregex "BSR(16|32|64)rr")>; def: InstRW<[BWWriteResGroup27], (instregex "CMPPDrri")>; def: InstRW<[BWWriteResGroup27], (instregex "CMPPSrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPSDrr")>; def: InstRW<[BWWriteResGroup27], (instregex "CMPSSrr")>; def: InstRW<[BWWriteResGroup27], (instregex "COMISDrr")>; def: InstRW<[BWWriteResGroup27], (instregex "COMISSrr")>; def: InstRW<[BWWriteResGroup27], (instregex "CVTDQ2PSrr")>; def: InstRW<[BWWriteResGroup27], (instregex "CVTPS2DQrr")>; def: InstRW<[BWWriteResGroup27], (instregex "CVTTPS2DQrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "IMUL(32|64)rr(i8?)")>; +def: InstRW<[BWWriteResGroup27], (instregex "IMUL(32|64)rr(i8)?")>; def: InstRW<[BWWriteResGroup27], (instregex "IMUL8r")>; def: InstRW<[BWWriteResGroup27], (instregex "LZCNT(16|32|64)rr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MAXPDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MAXPSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MAXSDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MAXSSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MINPDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MINPSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MINSDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "MINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)PDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)SDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAX(C?)SSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MIN(C?)PDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MIN(C?)PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MIN(C?)SDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MIN(C?)SSrr")>; def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr")>; def: InstRW<[BWWriteResGroup27], (instregex "MUL8r")>; -def: InstRW<[BWWriteResGroup27], (instregex "PDEP32rr")>; -def: InstRW<[BWWriteResGroup27], (instregex "PDEP64rr")>; -def: InstRW<[BWWriteResGroup27], (instregex "PEXT32rr")>; -def: InstRW<[BWWriteResGroup27], (instregex "PEXT64rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PDEP(32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PEXT(32|64)rr")>; def: InstRW<[BWWriteResGroup27], (instregex "POPCNT(16|32|64)rr")>; def: InstRW<[BWWriteResGroup27], (instregex "SHLD(16|32|64)rri8")>; def: InstRW<[BWWriteResGroup27], (instregex "SHRD(16|32|64)rri8")>; @@ -1356,18 +1274,18 @@ def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQYrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQYrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDYrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSYrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXSDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMAXSSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINPDYrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINPDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINPSYrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINPSrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINSDrr")>; -def: InstRW<[BWWriteResGroup27], (instregex "VMINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)PDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)PDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)PSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)SDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAX(C?)SSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)PDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)PDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)PSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)SDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMIN(C?)SSrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDYrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDrr")>; def: InstRW<[BWWriteResGroup27], (instregex "VSUBPSYrr")>; @@ -1382,7 +1300,7 @@ def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup27_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[BWWriteResGroup27_16], (instregex "IMUL16rr(i8)?")>; def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { let Latency = 3; @@ -1546,8 +1464,7 @@ def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> { let ResourceCycles = [1,1,1,1]; } def: InstRW<[BWWriteResGroup38], (instregex "CALL64pcrel32")>; -def: InstRW<[BWWriteResGroup38], (instregex "SETAm")>; -def: InstRW<[BWWriteResGroup38], (instregex "SETBEm")>; +def: InstRW<[BWWriteResGroup38], (instregex "SET(A|BE)m")>; def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> { let Latency = 4; @@ -1603,7 +1520,7 @@ def: InstRW<[BWWriteResGroup42], (instregex "CVTDQ2PDrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2DQrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2PSrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTSD2SSrr")>; -def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI642SDrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SDrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SSrr")>; def: InstRW<[BWWriteResGroup42], (instregex "CVTTPD2DQrr")>; @@ -1620,7 +1537,7 @@ def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2DQrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2PSrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTPS2PHrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTSD2SSrr")>; -def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI642SDrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SDrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SSrr")>; def: InstRW<[BWWriteResGroup42], (instregex "VCVTTPD2DQrr")>; @@ -1733,102 +1650,9 @@ def BWWriteResGroup48 : SchedWriteRes<[BWPort01]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SDr")>; -def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SSr")>; +def: InstRW<[BWWriteResGroup48], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)(Y)?r", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)r")>; def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { let Latency = 5; @@ -1850,6 +1674,8 @@ def: InstRW<[BWWriteResGroup49], (instregex "MOVDI2PDIrm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVDQArm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVDQUrm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVNTDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVQI2PQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSDrm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVSHDUPrm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVSLDUPrm")>; def: InstRW<[BWWriteResGroup49], (instregex "MOVSSrm")>; @@ -1889,12 +1715,12 @@ def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[BWWriteResGroup50], (instregex "CVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "CVTSI642SSrr")>; def: InstRW<[BWWriteResGroup50], (instregex "HADDPDrr")>; def: InstRW<[BWWriteResGroup50], (instregex "HADDPSrr")>; def: InstRW<[BWWriteResGroup50], (instregex "HSUBPDrr")>; def: InstRW<[BWWriteResGroup50], (instregex "HSUBPSrr")>; -def: InstRW<[BWWriteResGroup50], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VCVTSI642SSrr")>; def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDYrr")>; def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDrr")>; def: InstRW<[BWWriteResGroup50], (instregex "VHADDPSYrr")>; @@ -2174,51 +2000,27 @@ def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> { } def: InstRW<[BWWriteResGroup63], (instregex "ADC(16|32|64)rm")>; def: InstRW<[BWWriteResGroup63], (instregex "ADC8rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "ADCX32rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "ADCX64rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "ADOX32rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "ADOX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADCX(32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADOX(32|64)rm")>; def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "CMOVS(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "RORX32mi")>; -def: InstRW<[BWWriteResGroup63], (instregex "RORX64mi")>; -def: InstRW<[BWWriteResGroup63], (instregex "SARX32rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "SARX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "RORX(32|64)mi")>; +def: InstRW<[BWWriteResGroup63], (instregex "SARX(32|64)rm")>; def: InstRW<[BWWriteResGroup63], (instregex "SBB(16|32|64)rm")>; def: InstRW<[BWWriteResGroup63], (instregex "SBB8rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "SHLX32rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "SHLX64rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "SHRX32rm")>; -def: InstRW<[BWWriteResGroup63], (instregex "SHRX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHLX(32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHRX(32|64)rm")>; def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup64], (instregex "ANDN32rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "ANDN64rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSI32rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSI64rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK32rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK64rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSR32rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BLSR64rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BZHI32rm")>; -def: InstRW<[BWWriteResGroup64], (instregex "BZHI64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSI(32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK(32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSR(32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BZHI(32|64)rm")>; def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSBrm64")>; def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSDrm64")>; def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSWrm64")>; @@ -2375,7 +2177,7 @@ def: InstRW<[BWWriteResGroup66], (instregex "ADD(16|32|64)rm")>; def: InstRW<[BWWriteResGroup66], (instregex "ADD8rm")>; def: InstRW<[BWWriteResGroup66], (instregex "AND(16|32|64)rm")>; def: InstRW<[BWWriteResGroup66], (instregex "AND8rm")>; -def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mi")>; def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mr")>; def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)rm")>; def: InstRW<[BWWriteResGroup66], (instregex "CMP8mi")>; @@ -2383,8 +2185,7 @@ def: InstRW<[BWWriteResGroup66], (instregex "CMP8mr")>; def: InstRW<[BWWriteResGroup66], (instregex "CMP8rm")>; def: InstRW<[BWWriteResGroup66], (instregex "OR(16|32|64)rm")>; def: InstRW<[BWWriteResGroup66], (instregex "OR8rm")>; -def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)r")>; -def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)r(mr)?")>; def: InstRW<[BWWriteResGroup66], (instregex "SUB(16|32|64)rm")>; def: InstRW<[BWWriteResGroup66], (instregex "SUB8rm")>; def: InstRW<[BWWriteResGroup66], (instregex "TEST(16|32|64)mr")>; @@ -2434,11 +2235,11 @@ def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mi")>; def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mr")>; def: InstRW<[BWWriteResGroup70], (instregex "ADD8mi")>; def: InstRW<[BWWriteResGroup70], (instregex "ADD8mr")>; -def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mi")>; def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mr")>; def: InstRW<[BWWriteResGroup70], (instregex "AND8mi")>; def: InstRW<[BWWriteResGroup70], (instregex "AND8mr")>; @@ -2450,17 +2251,17 @@ def: InstRW<[BWWriteResGroup70], (instregex "NEG(16|32|64)m")>; def: InstRW<[BWWriteResGroup70], (instregex "NEG8m")>; def: InstRW<[BWWriteResGroup70], (instregex "NOT(16|32|64)m")>; def: InstRW<[BWWriteResGroup70], (instregex "NOT8m")>; -def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mi")>; def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mr")>; def: InstRW<[BWWriteResGroup70], (instregex "OR8mi")>; def: InstRW<[BWWriteResGroup70], (instregex "OR8mr")>; def: InstRW<[BWWriteResGroup70], (instregex "POP(16|32|64)rmm")>; def: InstRW<[BWWriteResGroup70], (instregex "PUSH(16|32|64)rmm")>; -def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mi")>; def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mr")>; def: InstRW<[BWWriteResGroup70], (instregex "SUB8mi")>; def: InstRW<[BWWriteResGroup70], (instregex "SUB8mr")>; -def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mi")>; def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mr")>; def: InstRW<[BWWriteResGroup70], (instregex "XOR8mi")>; def: InstRW<[BWWriteResGroup70], (instregex "XOR8mr")>; @@ -2709,16 +2510,14 @@ def BWWriteResGroup85 : SchedWriteRes<[BWPort23,BWPort06,BWPort15]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup85], (instregex "BEXTR32rm")>; -def: InstRW<[BWWriteResGroup85], (instregex "BEXTR64rm")>; +def: InstRW<[BWWriteResGroup85], (instregex "BEXTR(32|64)rm")>; def BWWriteResGroup86 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup86], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup86], (instregex "CMOVBE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup86], (instregex "CMOV(A|BE)(16|32|64)rm")>; def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { let Latency = 7; @@ -2772,6 +2571,7 @@ def: InstRW<[BWWriteResGroup91], (instregex "BSF(16|32|64)rm")>; def: InstRW<[BWWriteResGroup91], (instregex "BSR(16|32|64)rm")>; def: InstRW<[BWWriteResGroup91], (instregex "CMPPDrmi")>; def: InstRW<[BWWriteResGroup91], (instregex "CMPPSrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPSDrm")>; def: InstRW<[BWWriteResGroup91], (instregex "CMPSSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "COMISDrm")>; def: InstRW<[BWWriteResGroup91], (instregex "COMISSrm")>; @@ -2779,26 +2579,24 @@ def: InstRW<[BWWriteResGroup91], (instregex "CVTDQ2PSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "CVTPS2DQrm")>; def: InstRW<[BWWriteResGroup91], (instregex "CVTTPS2DQrm")>; def: InstRW<[BWWriteResGroup91], (instregex "IMUL64m")>; -def: InstRW<[BWWriteResGroup91], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL(32|64)rm(i8)?")>; def: InstRW<[BWWriteResGroup91], (instregex "IMUL8m")>; def: InstRW<[BWWriteResGroup91], (instregex "LZCNT(16|32|64)rm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MAXPDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MAXPSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MAXSDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MAXSSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MINPDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MINPSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MINSDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "MINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAX(C?)PDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAX(C?)PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAX(C?)SDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAX(C?)SSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MIN(C?)PDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MIN(C?)PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MIN(C?)SDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MIN(C?)SSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm")>; def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPS2PIirm")>; def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTTPS2PIirm")>; def: InstRW<[BWWriteResGroup91], (instregex "MUL64m")>; def: InstRW<[BWWriteResGroup91], (instregex "MUL8m")>; -def: InstRW<[BWWriteResGroup91], (instregex "PDEP32rm")>; -def: InstRW<[BWWriteResGroup91], (instregex "PDEP64rm")>; -def: InstRW<[BWWriteResGroup91], (instregex "PEXT32rm")>; -def: InstRW<[BWWriteResGroup91], (instregex "PEXT64rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PDEP(32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PEXT(32|64)rm")>; def: InstRW<[BWWriteResGroup91], (instregex "POPCNT(16|32|64)rm")>; def: InstRW<[BWWriteResGroup91], (instregex "SUBPDrm")>; def: InstRW<[BWWriteResGroup91], (instregex "SUBPSrm")>; @@ -2822,14 +2620,14 @@ def: InstRW<[BWWriteResGroup91], (instregex "VCOMISSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VCVTDQ2PSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VCVTPS2DQrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMAXPDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMAXPSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMAXSDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMAXSSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMINPDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMINPSrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMINSDrm")>; -def: InstRW<[BWWriteResGroup91], (instregex "VMINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAX(C?)PDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAX(C?)PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAX(C?)SDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAX(C?)SSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMIN(C?)PDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMIN(C?)PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMIN(C?)SDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMIN(C?)SSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VSUBPDrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VSUBPSrm")>; def: InstRW<[BWWriteResGroup91], (instregex "VSUBSDrm")>; @@ -2842,7 +2640,7 @@ def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup91_16], (instregex "IMUL16rm(i8?)")>; +def: InstRW<[BWWriteResGroup91_16], (instregex "IMUL16rm(i8)?")>; def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { let Latency = 8; @@ -2959,7 +2757,7 @@ def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { let NumMicroOps = 6; let ResourceCycles = [1,1,1,3]; } -def: InstRW<[BWWriteResGroup99], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup99], (instregex "ADC(16|32|64)mi")>; def: InstRW<[BWWriteResGroup99], (instregex "ADC8mi")>; def: InstRW<[BWWriteResGroup99], (instregex "ADD8mi")>; def: InstRW<[BWWriteResGroup99], (instregex "AND8mi")>; @@ -2982,7 +2780,7 @@ def: InstRW<[BWWriteResGroup100], (instregex "ROL(16|32|64)mCL")>; def: InstRW<[BWWriteResGroup100], (instregex "ROL8mCL")>; def: InstRW<[BWWriteResGroup100], (instregex "SAR(16|32|64)mCL")>; def: InstRW<[BWWriteResGroup100], (instregex "SAR8mCL")>; -def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mi")>; def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mr")>; def: InstRW<[BWWriteResGroup100], (instregex "SBB8mi")>; def: InstRW<[BWWriteResGroup100], (instregex "SBB8mr")>; @@ -3014,10 +2812,10 @@ def: InstRW<[BWWriteResGroup101], (instregex "VCMPPSYrmi")>; def: InstRW<[BWWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; def: InstRW<[BWWriteResGroup101], (instregex "VCVTPS2DQYrm")>; def: InstRW<[BWWriteResGroup101], (instregex "VCVTTPS2DQYrm")>; -def: InstRW<[BWWriteResGroup101], (instregex "VMAXPDYrm")>; -def: InstRW<[BWWriteResGroup101], (instregex "VMAXPSYrm")>; -def: InstRW<[BWWriteResGroup101], (instregex "VMINPDYrm")>; -def: InstRW<[BWWriteResGroup101], (instregex "VMINPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAX(C?)PDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAX(C?)PSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMIN(C?)PDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMIN(C?)PSYrm")>; def: InstRW<[BWWriteResGroup101], (instregex "VSUBPDYrm")>; def: InstRW<[BWWriteResGroup101], (instregex "VSUBPSYrm")>; @@ -3213,66 +3011,9 @@ def BWWriteResGroup116 : SchedWriteRes<[BWPort01,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PSm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SDm")>; -def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SSm")>; +def: InstRW<[BWWriteResGroup116], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)m")>; def BWWriteResGroup117 : SchedWriteRes<[BWPort1,BWPort23]> { let Latency = 10; @@ -3352,42 +3093,8 @@ def BWWriteResGroup124 : SchedWriteRes<[BWPort01,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[BWWriteResGroup124], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; def BWWriteResGroup125 : SchedWriteRes<[BWPort0]> { let Latency = 11; @@ -3826,7 +3533,7 @@ def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { let NumMicroOps = 19; let ResourceCycles = [3,1,15]; } -def: InstRW<[BWWriteResGroup176], (instregex "XRSTOR(64?)")>; +def: InstRW<[BWWriteResGroup176], (instregex "XRSTOR(64)?")>; def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { let Latency = 24; @@ -3889,50 +3596,50 @@ def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156 let NumMicroOps = 7; let ResourceCycles = [1,3,2,1]; } -def: InstRW<[BWWriteResGroup183_1], (instregex "VGATHERQPDrm")>; +def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>; def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 23; let NumMicroOps = 9; let ResourceCycles = [1,3,4,1]; } -def: InstRW<[BWWriteResGroup183_2], (instregex "VGATHERQPDYrm")>; +def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>; def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 24; let NumMicroOps = 9; let ResourceCycles = [1,5,2,1]; } -def: InstRW<[BWWriteResGroup183_3], (instregex "VGATHERQPSYrm")>; +def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>; def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 25; let NumMicroOps = 7; let ResourceCycles = [1,3,2,1]; } -def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPDrm")>; -def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPSrm")>; +def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm, + VGATHERDPSrm)>; def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 26; let NumMicroOps = 9; let ResourceCycles = [1,5,2,1]; } -def: InstRW<[BWWriteResGroup183_5], (instregex "VGATHERDPDYrm")>; +def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>; def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 26; let NumMicroOps = 14; let ResourceCycles = [1,4,8,1]; } -def: InstRW<[BWWriteResGroup183_6], (instregex "VGATHERDPSYrm")>; +def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>; def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { let Latency = 27; let NumMicroOps = 9; let ResourceCycles = [1,5,2,1]; } -def: InstRW<[BWWriteResGroup183_7], (instregex "VGATHERQPSrm")>; +def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>; def BWWriteResGroup184 : SchedWriteRes<[BWPort0,BWPort5,BWPort015]> { let Latency = 29; @@ -3954,7 +3661,7 @@ def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPor let NumMicroOps = 28; let ResourceCycles = [1,6,1,1,19]; } -def: InstRW<[BWWriteResGroup186], (instregex "XSAVE(OPT?)")>; +def: InstRW<[BWWriteResGroup186], (instregex "XSAVE(OPT)?")>; def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> { let Latency = 31; @@ -3991,8 +3698,8 @@ def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[BWWriteResGroup191], (instregex "IN32ri")>; -def: InstRW<[BWWriteResGroup191], (instregex "IN32rr")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN(16|32)ri")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN(16|32)rr")>; def: InstRW<[BWWriteResGroup191], (instregex "IN8ri")>; def: InstRW<[BWWriteResGroup191], (instregex "IN8rr")>; @@ -4009,8 +3716,8 @@ def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPor let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[BWWriteResGroup194], (instregex "OUT32ir")>; -def: InstRW<[BWWriteResGroup194], (instregex "OUT32rr")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT(16|32)ir")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT(16|32)rr")>; def: InstRW<[BWWriteResGroup194], (instregex "OUT8ir")>; def: InstRW<[BWWriteResGroup194], (instregex "OUT8rr")>; @@ -4034,7 +3741,6 @@ def BWWriteResGroup197 : SchedWriteRes<[BWPort0,BWPort01,BWPort23,BWPort05,BWPor let ResourceCycles = [2,2,8,1,10,2,39]; } def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; -def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; def BWWriteResGroup198 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> { let Latency = 63; @@ -4070,7 +3776,6 @@ def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6, let ResourceCycles = [9,9,11,8,1,11,21,30]; } def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; -def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; } // SchedModel diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 5b9223432df2..35beb5a57304 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -17,7 +17,7 @@ def HaswellModel : SchedMachineModel { // instructions per cycle. let IssueWidth = 4; let MicroOpBufferSize = 192; // Based on the reorder buffer. - let LoadLatency = 4; + let LoadLatency = 5; let MispredictPenalty = 16; // Based on the LSD (loop-stream detector) queue size and benchmarking data. @@ -70,9 +70,9 @@ def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4, // Integer division issued on port 0. def HWDivider : ProcResource<1>; -// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 // cycles after the memory operand. -def : ReadAdvance; +def : ReadAdvance; // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear @@ -85,10 +85,10 @@ multiclass HWWriteResPair { let Latency = Lat; } - // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the + // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the // latency. def : WriteRes { - let Latency = !add(Lat, 4); + let Latency = !add(Lat, 5); } } @@ -99,7 +99,7 @@ def : WriteRes; // Store_addr on 237. // Store_data on 4. def : WriteRes; -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 5; } def : WriteRes; def : WriteRes; @@ -435,7 +435,7 @@ def : InstRW<[WriteALULd], (instregex "MOV16rm")>; // MOVSX, MOVZX. // r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; +def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm8")>; // XLAT. def WriteXLAT : SchedWriteRes<[]> { @@ -535,9 +535,6 @@ def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> { } def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>; -// SCAS. -def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>; - // CMPS. def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> { let Latency = 4; @@ -678,81 +675,6 @@ def WriteFNINIT : SchedWriteRes<[]> { } def : InstRW<[WriteFNINIT], (instregex "FNINIT")>; -//=== Integer MMX and XMM Instructions ===// - -// PBLENDW. -// x,x,i / v,v,v,i -def WritePBLENDWr : SchedWriteRes<[HWPort5]>; -def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>; - -// x,m,i / v,v,m,i -def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> { - let NumMicroOps = 2; - let Latency = 4; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>; - -// PMOVMSKB. -def WritePMOVMSKB : SchedWriteRes<[HWPort0]> { - let Latency = 3; -} -def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>; - -// VPGATHERDD. -// x. -def WriteVPGATHERDD128 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>; - -// y. -def WriteVPGATHERDD256 : SchedWriteRes<[]> { - let NumMicroOps = 34; -} -def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>; - -// VPGATHERQD. -// x. -def WriteVPGATHERQD128 : SchedWriteRes<[]> { - let NumMicroOps = 15; -} -def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>; - -// y. -def WriteVPGATHERQD256 : SchedWriteRes<[]> { - let NumMicroOps = 22; -} -def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>; - -// VPGATHERDQ. -// x. -def WriteVPGATHERDQ128 : SchedWriteRes<[]> { - let NumMicroOps = 12; -} -def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>; - -// y. -def WriteVPGATHERDQ256 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>; - -// VPGATHERQQ. -// x. -def WriteVPGATHERQQ128 : SchedWriteRes<[]> { - let NumMicroOps = 14; -} -def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>; - -// y. -def WriteVPGATHERQQ256 : SchedWriteRes<[]> { - let NumMicroOps = 22; -} -def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; - -//-- Arithmetic instructions --// - //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// @@ -788,133 +710,105 @@ def : WriteRes { //=== Floating Point XMM and YMM Instructions ===// -// VGATHERDPS. -// x. -def WriteVGATHERDPS128 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; - -// y. -def WriteVGATHERDPS256 : SchedWriteRes<[]> { - let NumMicroOps = 34; -} -def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; - -// VGATHERQPS. -// x. -def WriteVGATHERQPS128 : SchedWriteRes<[]> { - let NumMicroOps = 15; -} -def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; - -// y. -def WriteVGATHERQPS256 : SchedWriteRes<[]> { - let NumMicroOps = 22; -} -def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; - -// VGATHERDPD. -// x. -def WriteVGATHERDPD128 : SchedWriteRes<[]> { - let NumMicroOps = 12; -} -def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; - -// y. -def WriteVGATHERDPD256 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; - -// VGATHERQPD. -// x. -def WriteVGATHERQPD128 : SchedWriteRes<[]> { - let NumMicroOps = 14; -} -def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; - -// y. -def WriteVGATHERQPD256 : SchedWriteRes<[]> { - let NumMicroOps = 22; -} -def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; - // Remaining instrs. def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 1; let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup0], (instregex "LDDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "LD_F32m")>; -def: InstRW<[HWWriteResGroup0], (instregex "LD_F64m")>; -def: InstRW<[HWWriteResGroup0], (instregex "LD_F80m")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64to64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVQ64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV64toPQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV8rm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVAPDrm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVAPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDI2PDIrm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVDQArm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVDQUrm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVNTDQArm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVSHDUPrm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVSLDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSX(16|32|64)rm16")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSX(16|32|64)rm32")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSX(16|32|64)rm8")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVUPDrm")>; def: InstRW<[HWWriteResGroup0], (instregex "MOVUPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVZX(16|32|64)rm16")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVZX(16|32|64)rm8")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHNTA")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT0")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT1")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT2")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTF128")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTI128")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOV64toPQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQAYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQAYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQYrm")>; def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "ROUNDPDr")>; +def: InstRW<[HWWriteResGroup0], (instregex "ROUNDPSr")>; +def: InstRW<[HWWriteResGroup0], (instregex "ROUNDSDr")>; +def: InstRW<[HWWriteResGroup0], (instregex "ROUNDSSr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDPDr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDPSr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDSDr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDSSr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDYPDr")>; +def: InstRW<[HWWriteResGroup0], (instregex "VROUNDYPSr")>; + +def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F32m")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F64m")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F80m")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VBROADCASTF128")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VBROADCASTI128")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VLDDQUYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVAPDYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVAPSYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVDDUPYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVDQAYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVDQUYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVUPDYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VMOVUPSYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[HWWriteResGroup0_1], (instregex "VPBROADCASTQYrm")>; + +def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[HWWriteResGroup0_2], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MMX_MOVD64rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOV64toPQIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOV8rm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVDDUPrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVDI2PDIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVQI2PQIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSDrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSSrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "PREFETCHNTA")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "PREFETCHT0")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "PREFETCHT1")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "PREFETCHT2")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOV64toPQIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOVDDUPrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOVSDrm")>; +def: InstRW<[HWWriteResGroup0_2], (instregex "VMOVSSrm")>; def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { let Latency = 1; @@ -945,6 +839,7 @@ def: InstRW<[HWWriteResGroup1], (instregex "MOVNTPSmr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVPDI2DImr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVPQI2QImr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVPQIto64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVSDmr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVSSmr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVUPDmr")>; def: InstRW<[HWWriteResGroup1], (instregex "MOVUPSmr")>; @@ -1085,12 +980,12 @@ def: InstRW<[HWWriteResGroup4], (instregex "MOVDDUPrr")>; def: InstRW<[HWWriteResGroup4], (instregex "MOVDI2PDIrr")>; def: InstRW<[HWWriteResGroup4], (instregex "MOVHLPSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "MOVLHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSDrr(_REV)?")>; def: InstRW<[HWWriteResGroup4], (instregex "MOVSHDUPrr")>; def: InstRW<[HWWriteResGroup4], (instregex "MOVSLDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSSrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSSrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVUPDrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVUPSrr(_REV)?")>; def: InstRW<[HWWriteResGroup4], (instregex "ORPDrr")>; def: InstRW<[HWWriteResGroup4], (instregex "ORPSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "PACKSSDWrr")>; @@ -1142,25 +1037,25 @@ def: InstRW<[HWWriteResGroup4], (instregex "VANDPSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VBROADCASTSSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VINSERTPSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOV64toPQIrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSrr(_REV)?")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPYrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVDI2PDIrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVHLPSrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVLHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSDrr(_REV)?")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPYrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPYrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSSrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSSrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSrr(_REV)?")>; def: InstRW<[HWWriteResGroup4], (instregex "VORPDYrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VORPDrr")>; def: InstRW<[HWWriteResGroup4], (instregex "VORPSYrr")>; @@ -1276,91 +1171,39 @@ def: InstRW<[HWWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; def: InstRW<[HWWriteResGroup7], (instregex "BTS(16|32|64)rr")>; def: InstRW<[HWWriteResGroup7], (instregex "CDQ")>; def: InstRW<[HWWriteResGroup7], (instregex "CQO")>; -def: InstRW<[HWWriteResGroup7], (instregex "JAE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JAE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JA_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JA_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JBE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JBE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JB_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JB_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JGE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JGE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JG_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JG_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JLE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JLE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JL_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[HWWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1")>; +def: InstRW<[HWWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4")>; def: InstRW<[HWWriteResGroup7], (instregex "JMP_1")>; def: InstRW<[HWWriteResGroup7], (instregex "JMP_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNE_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNE_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNO_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNO_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNP_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNP_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNS_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JNS_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JO_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JO_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JP_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JP_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "JS_1")>; -def: InstRW<[HWWriteResGroup7], (instregex "JS_4")>; -def: InstRW<[HWWriteResGroup7], (instregex "RORX32ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "RORX(32|64)ri")>; def: InstRW<[HWWriteResGroup7], (instregex "SAR(16|32|64)r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SAR(16|32|64)ri")>; def: InstRW<[HWWriteResGroup7], (instregex "SAR8r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SAR8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SARX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SARX64rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETAEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETBr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETGEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETGr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETLEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETLr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNOr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNPr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNSr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETOr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETPr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SARX(32|64)rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r")>; def: InstRW<[HWWriteResGroup7], (instregex "SHL(16|32|64)r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SHL(16|32|64)ri")>; def: InstRW<[HWWriteResGroup7], (instregex "SHL8r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SHL8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHLX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHLX(32|64)rr")>; def: InstRW<[HWWriteResGroup7], (instregex "SHR(16|32|64)r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SHR(16|32|64)ri")>; def: InstRW<[HWWriteResGroup7], (instregex "SHR8r1")>; def: InstRW<[HWWriteResGroup7], (instregex "SHR8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHRX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHRX(32|64)rr")>; def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup8], (instregex "ANDN32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "ANDN64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSI32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSI64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSR32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSR64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BZHI32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BZHI64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "LEA(16|32|64)r")>; +def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSI(32|64)rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK(32|64)rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSR(32|64)rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BZHI(32|64)rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "LEA(16|32|64)(_32)?r")>; def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSBrr64")>; def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSDrr64")>; def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSWrr64")>; @@ -1533,13 +1376,13 @@ def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> { def: InstRW<[HWWriteResGroup9], (instregex "BLENDPDrri")>; def: InstRW<[HWWriteResGroup9], (instregex "BLENDPSrri")>; def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr(_REV)?")>; def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDNirr")>; def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDirr")>; def: InstRW<[HWWriteResGroup9], (instregex "MMX_PORirr")>; def: InstRW<[HWWriteResGroup9], (instregex "MMX_PXORirr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[HWWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[HWWriteResGroup9], (instregex "MOVDQArr(_REV)?")>; +def: InstRW<[HWWriteResGroup9], (instregex "MOVDQUrr(_REV)?")>; def: InstRW<[HWWriteResGroup9], (instregex "MOVPQI2QIrr")>; def: InstRW<[HWWriteResGroup9], (instregex "PANDNrr")>; def: InstRW<[HWWriteResGroup9], (instregex "PANDrr")>; @@ -1549,10 +1392,10 @@ def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDYrri")>; def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDrri")>; def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSYrri")>; def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQAYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQArr(_REV)?")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUYrr(_REV)?")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUrr(_REV)?")>; def: InstRW<[HWWriteResGroup9], (instregex "VMOVPQI2QIrr")>; def: InstRW<[HWWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; def: InstRW<[HWWriteResGroup9], (instregex "VPANDNYrr")>; @@ -1571,33 +1414,33 @@ def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "ADD8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "ADD8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD8rr(_REV)?")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "AND8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "AND8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND8rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "CBW")>; def: InstRW<[HWWriteResGroup10], (instregex "CLC")>; def: InstRW<[HWWriteResGroup10], (instregex "CMC")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "CMP8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "CMP8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP8rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "CWDE")>; def: InstRW<[HWWriteResGroup10], (instregex "DEC(16|32|64)r")>; def: InstRW<[HWWriteResGroup10], (instregex "DEC8r")>; def: InstRW<[HWWriteResGroup10], (instregex "INC(16|32|64)r")>; def: InstRW<[HWWriteResGroup10], (instregex "INC8r")>; def: InstRW<[HWWriteResGroup10], (instregex "LAHF")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV8ri(_alt?)")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV)?")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV8ri(_alt)?")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV8rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; def: InstRW<[HWWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; def: InstRW<[HWWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; @@ -1608,11 +1451,11 @@ def: InstRW<[HWWriteResGroup10], (instregex "NEG8r")>; def: InstRW<[HWWriteResGroup10], (instregex "NOOP")>; def: InstRW<[HWWriteResGroup10], (instregex "NOT(16|32|64)r")>; def: InstRW<[HWWriteResGroup10], (instregex "NOT8r")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "OR8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "OR8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR8rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "SAHF")>; def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m")>; def: InstRW<[HWWriteResGroup10], (instregex "SIDT64m")>; @@ -1620,30 +1463,29 @@ def: InstRW<[HWWriteResGroup10], (instregex "SLDT64m")>; def: InstRW<[HWWriteResGroup10], (instregex "SMSW16m")>; def: InstRW<[HWWriteResGroup10], (instregex "STC")>; def: InstRW<[HWWriteResGroup10], (instregex "STRm")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "SUB8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "SUB8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB8rr(_REV)?")>; def: InstRW<[HWWriteResGroup10], (instregex "SYSCALL")>; def: InstRW<[HWWriteResGroup10], (instregex "TEST(16|32|64)rr")>; def: InstRW<[HWWriteResGroup10], (instregex "TEST8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "TEST8ri")>; def: InstRW<[HWWriteResGroup10], (instregex "TEST8rr")>; def: InstRW<[HWWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "XOR(16|32|64)ri")>; def: InstRW<[HWWriteResGroup10], (instregex "XOR(16|32|64)rr")>; def: InstRW<[HWWriteResGroup10], (instregex "XOR8i8")>; def: InstRW<[HWWriteResGroup10], (instregex "XOR8ri")>; def: InstRW<[HWWriteResGroup10], (instregex "XOR8rr")>; def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup11], (instregex "CVTPS2PDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "CVTSS2SDrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLDrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLQrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLWrm")>; @@ -1652,39 +1494,95 @@ def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRAWrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLDrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLQrm")>; def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLWrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSYrm")>; def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm")>; def: InstRW<[HWWriteResGroup11], (instregex "VCVTPS2PDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTSS2SDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLDYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLWYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRADYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRAWYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLDYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLWYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSrm")>; + +def HWWriteResGroup11_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup11_1], (instregex "CVTSS2SDrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VCVTSS2SDrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VPSLLVQrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VPSRLVQrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VTESTPDrm")>; +def: InstRW<[HWWriteResGroup11_1], (instregex "VTESTPSrm")>; + +def HWWriteResGroup11_2 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLDYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLQYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLVQYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSLLWYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRADYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRAWYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRLDYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRLQYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRLVQYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VPSRLWYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VTESTPDYrm")>; +def: InstRW<[HWWriteResGroup11_2], (instregex "VTESTPSYrm")>; def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { - let Latency = 1; + let Latency = 8; let NumMicroOps = 2; let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup12], (instregex "ADDSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ADDSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "CMPSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "CMPSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "COMISDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "COMISSrm")>; def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m")>; def: InstRW<[HWWriteResGroup12], (instregex "FCOM64m")>; def: InstRW<[HWWriteResGroup12], (instregex "FCOMP32m")>; def: InstRW<[HWWriteResGroup12], (instregex "FCOMP64m")>; +def: InstRW<[HWWriteResGroup12], (instregex "IMUL(16|32|64)m")>; +def: InstRW<[HWWriteResGroup12], (instregex "IMUL(16|32|64)rm(i8)?")>; +def: InstRW<[HWWriteResGroup12], (instregex "IMUL8m")>; +def: InstRW<[HWWriteResGroup12], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MAX(C?)SDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MAX(C?)SSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MIN(C?)SDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MIN(C?)SSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MUL(16|32|64)m")>; +def: InstRW<[HWWriteResGroup12], (instregex "MUL8m")>; +def: InstRW<[HWWriteResGroup12], (instregex "PDEP(32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PEXT(32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "SUBSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "SUBSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "TZCNT(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup12], (instregex "UCOMISDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "UCOMISSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VADDSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VADDSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VCMPSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VCMPSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VCOMISDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VCOMISSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMAX(C?)SDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMAX(C?)SSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMIN(C?)SDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMIN(C?)SSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSUBSDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSUBSSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUCOMISDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUCOMISSrm")>; def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 1; + let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -1693,20 +1591,6 @@ def: InstRW<[HWWriteResGroup13], (instregex "ANDNPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "ANDPDrm")>; def: InstRW<[HWWriteResGroup13], (instregex "ANDPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "INSERTPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PINSRWirmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PSHUFWmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKHBWirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKHDQirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKHWDirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKLBWirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKLDQirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MMX_PUNPCKLWDirm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MOVHPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MOVHPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MOVLPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "MOVLPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "ORPDrm")>; def: InstRW<[HWWriteResGroup13], (instregex "ORPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "PACKSSDWrm")>; @@ -1715,22 +1599,6 @@ def: InstRW<[HWWriteResGroup13], (instregex "PACKUSDWrm")>; def: InstRW<[HWWriteResGroup13], (instregex "PACKUSWBrm")>; def: InstRW<[HWWriteResGroup13], (instregex "PALIGNRrmi")>; def: InstRW<[HWWriteResGroup13], (instregex "PBLENDWrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "PINSRBrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PINSRDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PINSRQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PINSRWrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXBDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXBQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVSXWQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXBDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXBQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "PMOVZXWQrm")>; def: InstRW<[HWWriteResGroup13], (instregex "PSHUFBrm")>; def: InstRW<[HWWriteResGroup13], (instregex "PSHUFDmi")>; def: InstRW<[HWWriteResGroup13], (instregex "PSHUFHWmi")>; @@ -1749,104 +1617,149 @@ def: InstRW<[HWWriteResGroup13], (instregex "UNPCKHPDrm")>; def: InstRW<[HWWriteResGroup13], (instregex "UNPCKHPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "UNPCKLPDrm")>; def: InstRW<[HWWriteResGroup13], (instregex "UNPCKLPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VANDNPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VANDNPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VANDNPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VANDNPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VANDPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VANDPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VANDPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VANDPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VINSERTPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VMOVHPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VMOVHPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VMOVLPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VMOVLPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VORPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VORPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VORPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VORPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPACKSSDWYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPACKSSDWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPACKSSWBYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPACKSSWBrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPACKUSDWYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPACKUSDWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPACKUSWBYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPACKUSWBrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPALIGNRYrmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPALIGNRrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPBLENDWYrmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPBLENDWrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPDYmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPDmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPSYmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPSmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPERMILPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPINSRBrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPINSRDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPINSRQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPINSRWrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXBDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXBQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVSXWQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXBDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXBQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPMOVZXWQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFBYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFBrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFDYmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFDmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFHWYmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFHWmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFLWYmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VPSHUFLWmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHBWYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHDQYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHQDQYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHWDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLBWYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLDQYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLQDQYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLWDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VPUNPCKLWDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VSHUFPDYrmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VSHUFPDrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VSHUFPSYrmi")>; def: InstRW<[HWWriteResGroup13], (instregex "VSHUFPSrmi")>; -def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKHPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKHPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKHPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKHPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKLPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKLPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKLPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VUNPCKLPSrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VXORPDYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VXORPDrm")>; -def: InstRW<[HWWriteResGroup13], (instregex "VXORPSYrm")>; def: InstRW<[HWWriteResGroup13], (instregex "VXORPSrm")>; def: InstRW<[HWWriteResGroup13], (instregex "XORPDrm")>; def: InstRW<[HWWriteResGroup13], (instregex "XORPSrm")>; +def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VANDNPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VANDPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VANDPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VORPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VORPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSDWYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKSSWBYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKUSDWYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPACKUSWBYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPALIGNRYrmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPBLENDWYrmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPERMILPDYmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPERMILPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPERMILPSYmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPERMILPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPSHUFBYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPSHUFDYmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPSHUFHWYmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPSHUFLWYmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VSHUFPDYrmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VSHUFPSYrmi")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VXORPDYrm")>; +def: InstRW<[HWWriteResGroup13_1], (instregex "VXORPSYrm")>; + +def HWWriteResGroup13_2 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MOVHPDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MOVHPSrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MOVLPDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "MOVLPSrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PINSRBrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PINSRDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PINSRQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PINSRWrmi")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXBDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXBQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXBWrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXDQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXWDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVSXWQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXBDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXBQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXBWrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXDQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXWDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "PMOVZXWQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VMOVHPDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VMOVHPSrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VMOVLPDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VMOVLPSrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPINSRBrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPINSRDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPINSRQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPINSRWrmi")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXBDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXBQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXBWrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXDQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXWDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVSXWQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXBDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXBQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXBWrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXDQrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXWDrm")>; +def: InstRW<[HWWriteResGroup13_2], (instregex "VPMOVZXWQrm")>; + def HWWriteResGroup14 : SchedWriteRes<[HWPort6,HWPort23]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -1854,7 +1767,7 @@ def: InstRW<[HWWriteResGroup14], (instregex "FARJMP64")>; def: InstRW<[HWWriteResGroup14], (instregex "JMP(16|32|64)m")>; def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort06]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -1869,20 +1782,15 @@ def: InstRW<[HWWriteResGroup15], (instregex "SHRX32rm")>; def: InstRW<[HWWriteResGroup15], (instregex "SHRX64rm")>; def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort15]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup16], (instregex "ANDN32rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "ANDN64rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSI32rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSI64rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSMSK32rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSMSK64rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSR32rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLSR64rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BZHI32rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "BZHI64rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "BLSI(32|64)rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "BLSMSK(32|64)rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "BLSR(32|64)rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "BZHI(32|64)rm")>; def: InstRW<[HWWriteResGroup16], (instregex "MMX_PABSBrm64")>; def: InstRW<[HWWriteResGroup16], (instregex "MMX_PABSDrm64")>; def: InstRW<[HWWriteResGroup16], (instregex "MMX_PABSWrm64")>; @@ -1918,170 +1826,194 @@ def: InstRW<[HWWriteResGroup16], (instregex "MMX_PSUBUSBirm")>; def: InstRW<[HWWriteResGroup16], (instregex "MMX_PSUBUSWirm")>; def: InstRW<[HWWriteResGroup16], (instregex "MMX_PSUBWirm")>; def: InstRW<[HWWriteResGroup16], (instregex "MOVBE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PABSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PABSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PABSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDUSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDUSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PADDWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PAVGBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PAVGWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPEQBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPEQDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPEQQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPEQWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPGTBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPGTDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PCMPGTWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXUBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXUDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMAXUWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINUBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINUDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PMINUWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSIGNBrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSIGNDrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSIGNWrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBUSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBUSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PSUBWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPABSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDQYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDUSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDUSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDUSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDUSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPADDWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPAVGBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPAVGBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPAVGWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPAVGWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQQYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPEQWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPCMPGTWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMAXUWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPMINUWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNBYrm256")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNBrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNDYrm256")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNDrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNWYrm256")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSIGNWrm128")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBQYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBQrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBUSBYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBUSBrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBUSWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBUSWrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBWYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPSUBWrm")>; + +def HWWriteResGroup16_1 : SchedWriteRes<[HWPort23,HWPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup16_1], (instregex "PABSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PABSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PABSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDUSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDUSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PADDWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PAVGBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PAVGWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPEQBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPEQDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPEQQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPEQWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPGTBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPGTDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PCMPGTWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXUBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXUDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMAXUWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINUBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINUDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PMINUWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSIGNBrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSIGNDrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSIGNWrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBUSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBUSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "PSUBWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPABSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPABSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPABSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDUSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDUSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPADDWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPAVGBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPAVGWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPEQBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPEQDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPEQQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPEQWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPGTBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPGTDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPCMPGTWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXUBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXUDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMAXUWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINSDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINUBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINUDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPMINUWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSIGNBrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSIGNDrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSIGNWrm128")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBDrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBQrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBUSBrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBUSWrm")>; +def: InstRW<[HWWriteResGroup16_1], (instregex "VPSUBWrm")>; + +def HWWriteResGroup16_2 : SchedWriteRes<[HWPort23,HWPort15]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDQYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDUSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDUSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPADDWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPAVGBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPAVGWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPEQBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPEQDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPEQQYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPEQWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPGTBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPGTDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPCMPGTWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXSDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXUBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXUDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMAXUWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINSDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINUBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINUDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPMINUWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSIGNBYrm256")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSIGNDYrm256")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSIGNWYrm256")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBDYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBQYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBUSBYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBUSWYrm")>; +def: InstRW<[HWWriteResGroup16_2], (instregex "VPSUBWYrm")>; def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> { - let Latency = 1; + let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup17], (instregex "BLENDPDrmi")>; def: InstRW<[HWWriteResGroup17], (instregex "BLENDPSrmi")>; -def: InstRW<[HWWriteResGroup17], (instregex "MMX_PANDNirm")>; -def: InstRW<[HWWriteResGroup17], (instregex "MMX_PANDirm")>; -def: InstRW<[HWWriteResGroup17], (instregex "MMX_PORirm")>; -def: InstRW<[HWWriteResGroup17], (instregex "MMX_PXORirm")>; def: InstRW<[HWWriteResGroup17], (instregex "PANDNrm")>; def: InstRW<[HWWriteResGroup17], (instregex "PANDrm")>; def: InstRW<[HWWriteResGroup17], (instregex "PORrm")>; def: InstRW<[HWWriteResGroup17], (instregex "PXORrm")>; -def: InstRW<[HWWriteResGroup17], (instregex "VBLENDPDYrmi")>; def: InstRW<[HWWriteResGroup17], (instregex "VBLENDPDrmi")>; -def: InstRW<[HWWriteResGroup17], (instregex "VBLENDPSYrmi")>; def: InstRW<[HWWriteResGroup17], (instregex "VBLENDPSrmi")>; def: InstRW<[HWWriteResGroup17], (instregex "VINSERTF128rm")>; def: InstRW<[HWWriteResGroup17], (instregex "VINSERTI128rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "VPANDNYrm")>; def: InstRW<[HWWriteResGroup17], (instregex "VPANDNrm")>; -def: InstRW<[HWWriteResGroup17], (instregex "VPANDYrm")>; def: InstRW<[HWWriteResGroup17], (instregex "VPANDrm")>; -def: InstRW<[HWWriteResGroup17], (instregex "VPBLENDDYrmi")>; def: InstRW<[HWWriteResGroup17], (instregex "VPBLENDDrmi")>; -def: InstRW<[HWWriteResGroup17], (instregex "VPORYrm")>; def: InstRW<[HWWriteResGroup17], (instregex "VPORrm")>; -def: InstRW<[HWWriteResGroup17], (instregex "VPXORYrm")>; def: InstRW<[HWWriteResGroup17], (instregex "VPXORrm")>; +def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PANDNirm")>; +def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PANDirm")>; +def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PORirm")>; +def: InstRW<[HWWriteResGroup17_1], (instregex "MMX_PXORirm")>; + +def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup17_2], (instregex "VBLENDPDYrmi")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VBLENDPSYrmi")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDYrm")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPORYrm")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPXORYrm")>; + def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 1; + let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -2089,7 +2021,7 @@ def: InstRW<[HWWriteResGroup18], (instregex "ADD(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "ADD8rm")>; def: InstRW<[HWWriteResGroup18], (instregex "AND(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "AND8rm")>; -def: InstRW<[HWWriteResGroup18], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup18], (instregex "CMP(16|32|64)mi")>; def: InstRW<[HWWriteResGroup18], (instregex "CMP(16|32|64)mr")>; def: InstRW<[HWWriteResGroup18], (instregex "CMP(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "CMP8mi")>; @@ -2097,7 +2029,7 @@ def: InstRW<[HWWriteResGroup18], (instregex "CMP8mr")>; def: InstRW<[HWWriteResGroup18], (instregex "CMP8rm")>; def: InstRW<[HWWriteResGroup18], (instregex "OR(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "OR8rm")>; -def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)r(mr?)")>; +def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)r(mr)?")>; def: InstRW<[HWWriteResGroup18], (instregex "SUB(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "SUB8rm")>; def: InstRW<[HWWriteResGroup18], (instregex "TEST(16|32|64)mr")>; @@ -2107,14 +2039,14 @@ def: InstRW<[HWWriteResGroup18], (instregex "XOR(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "XOR8rm")>; def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup19], (instregex "SFENCE")>; def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -2132,52 +2064,39 @@ def: InstRW<[HWWriteResGroup20], (instregex "VPEXTRWmr")>; def: InstRW<[HWWriteResGroup20], (instregex "VSTMXCSR")>; def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup21], (instregex "FNSTCW16m")>; def HWWriteResGroup22 : SchedWriteRes<[HWPort4,HWPort237,HWPort06]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup22], (instregex "SETAEm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETBm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETEm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETGEm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETGm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETLEm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETLm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETNEm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETNOm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETNPm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETNSm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETOm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETPm")>; -def: InstRW<[HWWriteResGroup22], (instregex "SETSm")>; +def: InstRW<[HWWriteResGroup22], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; def HWWriteResGroup23 : SchedWriteRes<[HWPort4,HWPort237,HWPort15]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup23], (instregex "MOVBE(32|64)mr")>; def HWWriteResGroup23_16 : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup23_16], (instregex "MOVBE16mr")>; def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { - let Latency = 1; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)r(mr?)")>; +def: InstRW<[HWWriteResGroup24], (instregex "PUSH(16|32|64)r(mr)?")>; def: InstRW<[HWWriteResGroup24], (instregex "PUSH64i8")>; def: InstRW<[HWWriteResGroup24], (instregex "STOSB")>; def: InstRW<[HWWriteResGroup24], (instregex "STOSL")>; @@ -2185,7 +2104,7 @@ def: InstRW<[HWWriteResGroup24], (instregex "STOSQ")>; def: InstRW<[HWWriteResGroup24], (instregex "STOSW")>; def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> { - let Latency = 1; + let Latency = 7; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -2206,15 +2125,15 @@ def: InstRW<[HWWriteResGroup25], (instregex "SHR8m1")>; def: InstRW<[HWWriteResGroup25], (instregex "SHR8mi")>; def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 1; + let Latency = 7; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[HWWriteResGroup26], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup26], (instregex "ADD(16|32|64)mi")>; def: InstRW<[HWWriteResGroup26], (instregex "ADD(16|32|64)mr")>; def: InstRW<[HWWriteResGroup26], (instregex "ADD8mi")>; def: InstRW<[HWWriteResGroup26], (instregex "ADD8mr")>; -def: InstRW<[HWWriteResGroup26], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup26], (instregex "AND(16|32|64)mi")>; def: InstRW<[HWWriteResGroup26], (instregex "AND(16|32|64)mr")>; def: InstRW<[HWWriteResGroup26], (instregex "AND8mi")>; def: InstRW<[HWWriteResGroup26], (instregex "AND8mr")>; @@ -2226,15 +2145,17 @@ def: InstRW<[HWWriteResGroup26], (instregex "NEG(16|32|64)m")>; def: InstRW<[HWWriteResGroup26], (instregex "NEG8m")>; def: InstRW<[HWWriteResGroup26], (instregex "NOT(16|32|64)m")>; def: InstRW<[HWWriteResGroup26], (instregex "NOT8m")>; -def: InstRW<[HWWriteResGroup26], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup26], (instregex "OR(16|32|64)mi")>; def: InstRW<[HWWriteResGroup26], (instregex "OR(16|32|64)mr")>; def: InstRW<[HWWriteResGroup26], (instregex "OR8mi")>; def: InstRW<[HWWriteResGroup26], (instregex "OR8mr")>; -def: InstRW<[HWWriteResGroup26], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[HWWriteResGroup26], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[HWWriteResGroup26], (instregex "SUB(16|32|64)mi")>; def: InstRW<[HWWriteResGroup26], (instregex "SUB(16|32|64)mr")>; def: InstRW<[HWWriteResGroup26], (instregex "SUB8mi")>; def: InstRW<[HWWriteResGroup26], (instregex "SUB8mr")>; -def: InstRW<[HWWriteResGroup26], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup26], (instregex "XOR(16|32|64)mi")>; def: InstRW<[HWWriteResGroup26], (instregex "XOR(16|32|64)mr")>; def: InstRW<[HWWriteResGroup26], (instregex "XOR8mi")>; def: InstRW<[HWWriteResGroup26], (instregex "XOR8mr")>; @@ -2356,8 +2277,7 @@ def HWWriteResGroup34 : SchedWriteRes<[HWPort06,HWPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup34], (instregex "BEXTR32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "BEXTR64rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "BEXTR(32|64)rr")>; def: InstRW<[HWWriteResGroup34], (instregex "BSWAP(16|32|64)r")>; def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> { @@ -2365,63 +2285,61 @@ def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup35], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup35], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup35], (instregex "ADC(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup35], (instregex "ADC(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup35], (instregex "ADC8i8")>; def: InstRW<[HWWriteResGroup35], (instregex "ADC8ri")>; -def: InstRW<[HWWriteResGroup35], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup35], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[HWWriteResGroup35], (instregex "ADC8rr(_REV)?")>; +def: InstRW<[HWWriteResGroup35], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr")>; def: InstRW<[HWWriteResGroup35], (instregex "CWD")>; def: InstRW<[HWWriteResGroup35], (instregex "JRCXZ")>; -def: InstRW<[HWWriteResGroup35], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[HWWriteResGroup35], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[HWWriteResGroup35], (instregex "SBB(16|32|64)ri")>; +def: InstRW<[HWWriteResGroup35], (instregex "SBB(16|32|64)rr(_REV)?")>; def: InstRW<[HWWriteResGroup35], (instregex "SBB8i8")>; def: InstRW<[HWWriteResGroup35], (instregex "SBB8ri")>; -def: InstRW<[HWWriteResGroup35], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[HWWriteResGroup35], (instregex "SETAr")>; -def: InstRW<[HWWriteResGroup35], (instregex "SETBEr")>; +def: InstRW<[HWWriteResGroup35], (instregex "SBB8rr(_REV)?")>; +def: InstRW<[HWWriteResGroup35], (instregex "SET(A|BE)r")>; def HWWriteResGroup36 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 2; + let Latency = 8; let NumMicroOps = 3; let ResourceCycles = [2,1]; } def: InstRW<[HWWriteResGroup36], (instregex "BLENDVPDrm0")>; def: InstRW<[HWWriteResGroup36], (instregex "BLENDVPSrm0")>; -def: InstRW<[HWWriteResGroup36], (instregex "MMX_PACKSSDWirm")>; -def: InstRW<[HWWriteResGroup36], (instregex "MMX_PACKSSWBirm")>; -def: InstRW<[HWWriteResGroup36], (instregex "MMX_PACKUSWBirm")>; def: InstRW<[HWWriteResGroup36], (instregex "PBLENDVBrm0")>; -def: InstRW<[HWWriteResGroup36], (instregex "VBLENDVPDYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VBLENDVPDrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VBLENDVPSYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VBLENDVPSrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPSYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPSrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VPBLENDVBYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VPBLENDVBrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VPMASKMOVDYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VPMASKMOVDrm")>; -def: InstRW<[HWWriteResGroup36], (instregex "VPMASKMOVQYrm")>; def: InstRW<[HWWriteResGroup36], (instregex "VPMASKMOVQrm")>; +def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[HWWriteResGroup36_1], (instregex "VBLENDVPDYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VBLENDVPSYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VPBLENDVBYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[HWWriteResGroup36_1], (instregex "VPMASKMOVQYrm")>; + +def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[HWWriteResGroup36_2], (instregex "MMX_PACKUSWBirm")>; + def HWWriteResGroup37 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,2]; } @@ -2432,7 +2350,7 @@ def: InstRW<[HWWriteResGroup37], (instregex "SCASQ")>; def: InstRW<[HWWriteResGroup37], (instregex "SCASW")>; def HWWriteResGroup38 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 2; + let Latency = 8; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -2456,14 +2374,14 @@ def: InstRW<[HWWriteResGroup38], (instregex "VPSRLWrm")>; def: InstRW<[HWWriteResGroup38], (instregex "VPTESTrm")>; def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup39], (instregex "FLDCW16m")>; def HWWriteResGroup40 : SchedWriteRes<[HWPort0,HWPort23,HWPort0156]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -2471,63 +2389,49 @@ def: InstRW<[HWWriteResGroup40], (instregex "LDMXCSR")>; def: InstRW<[HWWriteResGroup40], (instregex "VLDMXCSR")>; def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup41], (instregex "LRETQ")>; +def: InstRW<[HWWriteResGroup41], (instregex "RETL")>; def: InstRW<[HWWriteResGroup41], (instregex "RETQ")>; def HWWriteResGroup42 : SchedWriteRes<[HWPort23,HWPort06,HWPort15]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup42], (instregex "BEXTR32rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "BEXTR64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "BEXTR(32|64)rm")>; def HWWriteResGroup43 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { - let Latency = 2; + let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup43], (instregex "ADC(16|32|64)rm")>; def: InstRW<[HWWriteResGroup43], (instregex "ADC8rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup43], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup43], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; def: InstRW<[HWWriteResGroup43], (instregex "SBB(16|32|64)rm")>; def: InstRW<[HWWriteResGroup43], (instregex "SBB8rm")>; def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> { - let Latency = 2; + let Latency = 3; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } def: InstRW<[HWWriteResGroup44], (instregex "CALL(16|32|64)r")>; def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort237,HWPort06,HWPort0156]> { - let Latency = 2; + let Latency = 3; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } def: InstRW<[HWWriteResGroup45], (instregex "CALL64pcrel32")>; -def: InstRW<[HWWriteResGroup45], (instregex "SETAm")>; -def: InstRW<[HWWriteResGroup45], (instregex "SETBEm")>; +def: InstRW<[HWWriteResGroup45], (instregex "SET(A|BE)m")>; def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06]> { - let Latency = 2; + let Latency = 8; let NumMicroOps = 5; let ResourceCycles = [1,1,1,2]; } @@ -2541,7 +2445,7 @@ def: InstRW<[HWWriteResGroup46], (instregex "ROR8m1")>; def: InstRW<[HWWriteResGroup46], (instregex "ROR8mi")>; def HWWriteResGroup47 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 2; + let Latency = 8; let NumMicroOps = 5; let ResourceCycles = [1,1,1,2]; } @@ -2549,7 +2453,7 @@ def: InstRW<[HWWriteResGroup47], (instregex "XADD(16|32|64)rm")>; def: InstRW<[HWWriteResGroup47], (instregex "XADD8rm")>; def HWWriteResGroup48 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 2; + let Latency = 8; let NumMicroOps = 5; let ResourceCycles = [1,1,1,1,1]; } @@ -2589,29 +2493,28 @@ def: InstRW<[HWWriteResGroup50], (instregex "BSF(16|32|64)rr")>; def: InstRW<[HWWriteResGroup50], (instregex "BSR(16|32|64)rr")>; def: InstRW<[HWWriteResGroup50], (instregex "CMPPDrri")>; def: InstRW<[HWWriteResGroup50], (instregex "CMPPSrri")>; +def: InstRW<[HWWriteResGroup50], (instregex "CMPSDrr")>; def: InstRW<[HWWriteResGroup50], (instregex "CMPSSrr")>; def: InstRW<[HWWriteResGroup50], (instregex "COMISDrr")>; def: InstRW<[HWWriteResGroup50], (instregex "COMISSrr")>; def: InstRW<[HWWriteResGroup50], (instregex "CVTDQ2PSrr")>; def: InstRW<[HWWriteResGroup50], (instregex "CVTPS2DQrr")>; def: InstRW<[HWWriteResGroup50], (instregex "CVTTPS2DQrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[HWWriteResGroup50], (instregex "IMUL64rr(i8)?")>; def: InstRW<[HWWriteResGroup50], (instregex "IMUL8r")>; def: InstRW<[HWWriteResGroup50], (instregex "LZCNT(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXPDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXPSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXSDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXSSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINPDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINPSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINSDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINSSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAX(C?)PDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAX(C?)PSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAX(C?)SDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAX(C?)SSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MIN(C?)PDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MIN(C?)PSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MIN(C?)SDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "MIN(C?)SSrr")>; def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr")>; def: InstRW<[HWWriteResGroup50], (instregex "MUL8r")>; -def: InstRW<[HWWriteResGroup50], (instregex "PDEP32rr")>; -def: InstRW<[HWWriteResGroup50], (instregex "PDEP64rr")>; -def: InstRW<[HWWriteResGroup50], (instregex "PEXT32rr")>; -def: InstRW<[HWWriteResGroup50], (instregex "PEXT64rr")>; +def: InstRW<[HWWriteResGroup50], (instregex "PDEP(32|64)rr")>; +def: InstRW<[HWWriteResGroup50], (instregex "PEXT(32|64)rr")>; def: InstRW<[HWWriteResGroup50], (instregex "POPCNT(16|32|64)rr")>; def: InstRW<[HWWriteResGroup50], (instregex "SHLD(16|32|64)rri8")>; def: InstRW<[HWWriteResGroup50], (instregex "SHRD(16|32|64)rri8")>; @@ -2652,18 +2555,18 @@ def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQYrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQYrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDYrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSYrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXSDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXSSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPDYrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPSYrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPSrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINSDrr")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINSSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)PDYrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)PDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)PSYrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)PSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)SDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAX(C?)SSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)PDYrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)PDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)PSYrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)PSrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)SDrr")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMIN(C?)SSrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDYrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDrr")>; def: InstRW<[HWWriteResGroup50], (instregex "VSUBPSYrr")>; @@ -2677,13 +2580,13 @@ def HWWriteResGroup50_16 : SchedWriteRes<[HWPort1, HWPort0156]> { let Latency = 3; let NumMicroOps = 4; } -def: InstRW<[HWWriteResGroup50_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[HWWriteResGroup50_16], (instregex "IMUL16rr(i8)?")>; def HWWriteResGroup50_32 : SchedWriteRes<[HWPort1, HWPort0156]> { let Latency = 3; let NumMicroOps = 3; } -def: InstRW<[HWWriteResGroup50_32], (instregex "IMUL32rr(i8?)")>; +def: InstRW<[HWWriteResGroup50_32], (instregex "IMUL32rr(i8)?")>; def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> { let Latency = 3; @@ -2722,127 +2625,73 @@ def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWDYrr")>; def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWQYrr")>; def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> { - let Latency = 3; + let Latency = 9; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup52], (instregex "ADDPDrm")>; def: InstRW<[HWWriteResGroup52], (instregex "ADDPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "ADDSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "ADDSSrm")>; def: InstRW<[HWWriteResGroup52], (instregex "ADDSUBPDrm")>; def: InstRW<[HWWriteResGroup52], (instregex "ADDSUBPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "ADD_F32m")>; -def: InstRW<[HWWriteResGroup52], (instregex "ADD_F64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "BSF(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "BSR(16|32|64)rm")>; def: InstRW<[HWWriteResGroup52], (instregex "CMPPDrmi")>; def: InstRW<[HWWriteResGroup52], (instregex "CMPPSrmi")>; -def: InstRW<[HWWriteResGroup52], (instregex "CMPSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "COMISDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "COMISSrm")>; def: InstRW<[HWWriteResGroup52], (instregex "CVTDQ2PSrm")>; def: InstRW<[HWWriteResGroup52], (instregex "CVTPS2DQrm")>; def: InstRW<[HWWriteResGroup52], (instregex "CVTTPS2DQrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "ILD_F16m")>; -def: InstRW<[HWWriteResGroup52], (instregex "ILD_F32m")>; -def: InstRW<[HWWriteResGroup52], (instregex "ILD_F64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "IMUL64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "IMUL64rm(i8?)")>; -def: InstRW<[HWWriteResGroup52], (instregex "IMUL8m")>; -def: InstRW<[HWWriteResGroup52], (instregex "LZCNT(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MAXPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MAXPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MAXSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MAXSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MINPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MINPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MINSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MINSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[HWWriteResGroup52], (instregex "MUL64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "MUL8m")>; -def: InstRW<[HWWriteResGroup52], (instregex "PDEP32rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "PDEP64rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "PEXT32rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "PEXT64rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup52], (instregex "MAX(C?)PDrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "MAX(C?)PSrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "MIN(C?)PDrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "MIN(C?)PSrm")>; def: InstRW<[HWWriteResGroup52], (instregex "SUBPDrm")>; def: InstRW<[HWWriteResGroup52], (instregex "SUBPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUBR_F32m")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUBR_F64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUBSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUBSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUB_F32m")>; -def: InstRW<[HWWriteResGroup52], (instregex "SUB_F64m")>; -def: InstRW<[HWWriteResGroup52], (instregex "TZCNT(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup52], (instregex "UCOMISDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "UCOMISSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDPDYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VADDPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDPSYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VADDPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDSUBPDYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VADDSUBPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VADDSUBPSYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VADDSUBPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCMPPDYrmi")>; def: InstRW<[HWWriteResGroup52], (instregex "VCMPPDrmi")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCMPPSYrmi")>; def: InstRW<[HWWriteResGroup52], (instregex "VCMPPSrmi")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCMPSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCMPSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCOMISDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCOMISSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCVTDQ2PSYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCVTPS2DQYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VCVTPS2DQrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VCVTTPS2DQYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXPDYrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXPSYrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMAXSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINPDYrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINPSYrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VMINSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VSUBPDYrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "VMAX(C?)PDrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "VMAX(C?)PSrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "VMIN(C?)PDrm")>; +def: InstRW<[HWWriteResGroup52], (instregex "VMIN(C?)PSrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VSUBPDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VSUBPSYrm")>; def: InstRW<[HWWriteResGroup52], (instregex "VSUBPSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VSUBSDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VSUBSSrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VUCOMISDrm")>; -def: InstRW<[HWWriteResGroup52], (instregex "VUCOMISSrm")>; - -def HWWriteResGroup52_16 : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { - let Latency = 3; - let NumMicroOps = 4; -} -def: InstRW<[HWWriteResGroup52_16], (instregex "IMUL16m")>; -def: InstRW<[HWWriteResGroup52_16], (instregex "IMUL16rm(i8?)")>; -def: InstRW<[HWWriteResGroup52_16], (instregex "MUL16m")>; -def HWWriteResGroup52_32 : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { - let Latency = 3; - let NumMicroOps = 3; +def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup52_32], (instregex "IMUL32m")>; -def: InstRW<[HWWriteResGroup52_32], (instregex "IMUL32rm(i8?)")>; -def: InstRW<[HWWriteResGroup52_32], (instregex "MUL32m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "ADD_F32m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "ADD_F64m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "ILD_F16m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "ILD_F32m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "ILD_F64m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "SUBR_F32m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "SUBR_F64m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "SUB_F32m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "SUB_F64m")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VADDPDYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VADDPSYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VADDSUBPDYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VADDSUBPSYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VCMPPDYrmi")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VCMPPSYrmi")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VMAX(C?)PDYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VMAX(C?)PSYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VMIN(C?)PDYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VMIN(C?)PSYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VSUBPDYrm")>; +def: InstRW<[HWWriteResGroup52_1], (instregex "VSUBPSYrm")>; def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 3; + let Latency = 10; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -2852,19 +2701,22 @@ def: InstRW<[HWWriteResGroup53], (instregex "VPERMDYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPERMPDYmi")>; def: InstRW<[HWWriteResGroup53], (instregex "VPERMPSYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPERMQYmi")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXBDYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXBQYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXBWYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXDQYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXWDYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVSXWQYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBQYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBWYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXDQYrm")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXWDYrm")>; def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXWQYrm")>; +def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[HWWriteResGroup53_1], (instregex "VPMOVZXWDYrm")>; + def HWWriteResGroup54 : SchedWriteRes<[HWPort0156]> { let Latency = 3; let NumMicroOps = 3; @@ -2937,8 +2789,7 @@ def HWWriteResGroup59 : SchedWriteRes<[HWPort06,HWPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[HWWriteResGroup59], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[HWWriteResGroup59], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[HWWriteResGroup59], (instregex "CMOV(A|BE)(16|32|64)rr")>; def: InstRW<[HWWriteResGroup59], (instregex "RCL(16|32|64)r1")>; def: InstRW<[HWWriteResGroup59], (instregex "RCL(16|32|64)ri")>; def: InstRW<[HWWriteResGroup59], (instregex "RCL8r1")>; @@ -2965,14 +2816,14 @@ def: InstRW<[HWWriteResGroup60], (instregex "SHR(16|32|64)rCL")>; def: InstRW<[HWWriteResGroup60], (instregex "SHR8rCL")>; def HWWriteResGroup61 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup61], (instregex "FNSTSWm")>; def HWWriteResGroup62 : SchedWriteRes<[HWPort1,HWPort4,HWPort237]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -2986,19 +2837,25 @@ def: InstRW<[HWWriteResGroup62], (instregex "IST_FP32m")>; def: InstRW<[HWWriteResGroup62], (instregex "IST_FP64m")>; def HWWriteResGroup63 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 3; + let Latency = 10; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } def: InstRW<[HWWriteResGroup63], (instregex "VPSLLVDYrm")>; -def: InstRW<[HWWriteResGroup63], (instregex "VPSLLVDrm")>; def: InstRW<[HWWriteResGroup63], (instregex "VPSRAVDYrm")>; -def: InstRW<[HWWriteResGroup63], (instregex "VPSRAVDrm")>; def: InstRW<[HWWriteResGroup63], (instregex "VPSRLVDYrm")>; -def: InstRW<[HWWriteResGroup63], (instregex "VPSRLVDrm")>; + +def HWWriteResGroup63_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[HWWriteResGroup63_1], (instregex "VPSLLVDrm")>; +def: InstRW<[HWWriteResGroup63_1], (instregex "VPSRAVDrm")>; +def: InstRW<[HWWriteResGroup63_1], (instregex "VPSRLVDrm")>; def HWWriteResGroup64 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { - let Latency = 3; + let Latency = 8; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } @@ -3008,35 +2865,46 @@ def: InstRW<[HWWriteResGroup64], (instregex "MMX_PHADDrm64")>; def: InstRW<[HWWriteResGroup64], (instregex "MMX_PHSUBDrm64")>; def: InstRW<[HWWriteResGroup64], (instregex "MMX_PHSUBSWrm64")>; def: InstRW<[HWWriteResGroup64], (instregex "MMX_PHSUBWrm64")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHADDDrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHADDSWrm128")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHADDWrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHSUBDrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHSUBSWrm128")>; -def: InstRW<[HWWriteResGroup64], (instregex "PHSUBWrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDDYrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDDrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDSWrm128")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDSWrm256")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDWYrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHADDWrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBDYrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBDrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBSWrm128")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBSWrm256")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBWYrm")>; -def: InstRW<[HWWriteResGroup64], (instregex "VPHSUBWrm")>; + +def HWWriteResGroup64_1 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDDYrm")>; +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDSWrm256")>; +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDWYrm")>; +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHSUBDYrm")>; +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHSUBSWrm256")>; +def: InstRW<[HWWriteResGroup64_1], (instregex "VPHSUBWYrm")>; + +def HWWriteResGroup64_2 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[HWWriteResGroup64_2], (instregex "PHADDDrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "PHADDSWrm128")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "PHADDWrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "PHSUBDrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "PHSUBSWrm128")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "PHSUBWrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHADDDrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHADDSWrm128")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHADDWrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHSUBDrm")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHSUBSWrm128")>; +def: InstRW<[HWWriteResGroup64_2], (instregex "VPHSUBWrm")>; def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { - let Latency = 3; + let Latency = 8; let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[HWWriteResGroup65], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[HWWriteResGroup65], (instregex "CMOVBE(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup65], (instregex "CMOV(A|BE)(16|32|64)rm")>; def HWWriteResGroup66 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 3; + let Latency = 9; let NumMicroOps = 5; let ResourceCycles = [1,1,1,2]; } @@ -3050,7 +2918,7 @@ def: InstRW<[HWWriteResGroup66], (instregex "RCR8m1")>; def: InstRW<[HWWriteResGroup66], (instregex "RCR8mi")>; def HWWriteResGroup67 : SchedWriteRes<[HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 3; + let Latency = 9; let NumMicroOps = 5; let ResourceCycles = [1,1,2,1]; } @@ -3058,11 +2926,11 @@ def: InstRW<[HWWriteResGroup67], (instregex "ROR(16|32|64)mCL")>; def: InstRW<[HWWriteResGroup67], (instregex "ROR8mCL")>; def HWWriteResGroup68 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 3; + let Latency = 9; let NumMicroOps = 6; let ResourceCycles = [1,1,1,3]; } -def: InstRW<[HWWriteResGroup68], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup68], (instregex "ADC(16|32|64)mi")>; def: InstRW<[HWWriteResGroup68], (instregex "ADC8mi")>; def: InstRW<[HWWriteResGroup68], (instregex "ADD8mi")>; def: InstRW<[HWWriteResGroup68], (instregex "AND8mi")>; @@ -3073,7 +2941,7 @@ def: InstRW<[HWWriteResGroup68], (instregex "XCHG8rm")>; def: InstRW<[HWWriteResGroup68], (instregex "XOR8mi")>; def HWWriteResGroup69 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 3; + let Latency = 9; let NumMicroOps = 6; let ResourceCycles = [1,1,1,2,1]; } @@ -3085,7 +2953,7 @@ def: InstRW<[HWWriteResGroup69], (instregex "ROL(16|32|64)mCL")>; def: InstRW<[HWWriteResGroup69], (instregex "ROL8mCL")>; def: InstRW<[HWWriteResGroup69], (instregex "SAR(16|32|64)mCL")>; def: InstRW<[HWWriteResGroup69], (instregex "SAR8mCL")>; -def: InstRW<[HWWriteResGroup69], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup69], (instregex "SBB(16|32|64)mi")>; def: InstRW<[HWWriteResGroup69], (instregex "SBB(16|32|64)mr")>; def: InstRW<[HWWriteResGroup69], (instregex "SBB8mi")>; def: InstRW<[HWWriteResGroup69], (instregex "SBB8mr")>; @@ -3148,7 +3016,7 @@ def: InstRW<[HWWriteResGroup73], (instregex "CVTDQ2PDrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTPD2DQrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTPD2PSrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTSD2SSrr")>; -def: InstRW<[HWWriteResGroup73], (instregex "CVTSI2SD64rr")>; +def: InstRW<[HWWriteResGroup73], (instregex "CVTSI642SDrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTSI2SDrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTSI2SSrr")>; def: InstRW<[HWWriteResGroup73], (instregex "CVTTPD2DQrr")>; @@ -3162,7 +3030,7 @@ def: InstRW<[HWWriteResGroup73], (instregex "VCVTPD2DQrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTPD2PSrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTPS2PHrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTSD2SSrr")>; -def: InstRW<[HWWriteResGroup73], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[HWWriteResGroup73], (instregex "VCVTSI642SDrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTSI2SDrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTSI2SSrr")>; def: InstRW<[HWWriteResGroup73], (instregex "VCVTTPD2DQrr")>; @@ -3191,7 +3059,7 @@ def: InstRW<[HWWriteResGroup74_32], (instregex "IMUL32r")>; def: InstRW<[HWWriteResGroup74_32], (instregex "MUL32r")>; def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { - let Latency = 4; + let Latency = 11; let NumMicroOps = 3; let ResourceCycles = [2,1]; } @@ -3201,7 +3069,7 @@ def: InstRW<[HWWriteResGroup75], (instregex "FICOMP16m")>; def: InstRW<[HWWriteResGroup75], (instregex "FICOMP32m")>; def HWWriteResGroup76 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { - let Latency = 4; + let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -3222,38 +3090,50 @@ def: InstRW<[HWWriteResGroup76], (instregex "VCVTTSS2SI64rm")>; def: InstRW<[HWWriteResGroup76], (instregex "VCVTTSS2SIrm")>; def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 4; + let Latency = 10; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup77], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[HWWriteResGroup77], (instregex "VPTESTYrm")>; + +def HWWriteResGroup77_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[HWWriteResGroup77_1], (instregex "VPTESTYrm")>; def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 4; + let Latency = 10; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup78], (instregex "CVTDQ2PDrm")>; def: InstRW<[HWWriteResGroup78], (instregex "CVTPD2DQrm")>; def: InstRW<[HWWriteResGroup78], (instregex "CVTPD2PSrm")>; -def: InstRW<[HWWriteResGroup78], (instregex "CVTSD2SSrm")>; def: InstRW<[HWWriteResGroup78], (instregex "CVTTPD2DQrm")>; def: InstRW<[HWWriteResGroup78], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[HWWriteResGroup78], (instregex "MMX_CVTPI2PDirm")>; def: InstRW<[HWWriteResGroup78], (instregex "MMX_CVTTPD2PIirm")>; def: InstRW<[HWWriteResGroup78], (instregex "VCVTDQ2PDrm")>; -def: InstRW<[HWWriteResGroup78], (instregex "VCVTSD2SSrm")>; + +def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[HWWriteResGroup78_1], (instregex "CVTSD2SSrm")>; +def: InstRW<[HWWriteResGroup78_1], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[HWWriteResGroup78_1], (instregex "VCVTSD2SSrm")>; def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> { - let Latency = 4; + let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } def: InstRW<[HWWriteResGroup79], (instregex "MULX64rm")>; def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { - let Latency = 4; + let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -3284,7 +3164,7 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> { def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>; def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> { - let Latency = 4; + let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -3298,14 +3178,14 @@ def: InstRW<[HWWriteResGroup84], (instregex "VPMASKMOVQYmr")>; def: InstRW<[HWWriteResGroup84], (instregex "VPMASKMOVQmr")>; def HWWriteResGroup85 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { - let Latency = 4; + let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } def: InstRW<[HWWriteResGroup85], (instregex "VCVTPS2PHmr")>; def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { - let Latency = 4; + let Latency = 10; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -3313,7 +3193,7 @@ def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8")>; def: InstRW<[HWWriteResGroup86], (instregex "SHRD(16|32|64)mri8")>; def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> { - let Latency = 4; + let Latency = 9; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } @@ -3321,7 +3201,7 @@ def: InstRW<[HWWriteResGroup87], (instregex "LAR(16|32|64)rm")>; def: InstRW<[HWWriteResGroup87], (instregex "LSL(16|32|64)rm")>; def HWWriteResGroup88 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { - let Latency = 4; + let Latency = 5; let NumMicroOps = 6; let ResourceCycles = [1,1,4]; } @@ -3394,111 +3274,18 @@ def: InstRW<[HWWriteResGroup90], (instregex "MULPDrr")>; def: InstRW<[HWWriteResGroup90], (instregex "MULPSrr")>; def: InstRW<[HWWriteResGroup90], (instregex "MULSDrr")>; def: InstRW<[HWWriteResGroup90], (instregex "MULSSrr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD132SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD213SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADD231SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB132SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB213SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUB231SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD132SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD213SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMADD231SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB132SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB213SSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231PDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231PSr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231SDr")>; -def: InstRW<[HWWriteResGroup90], (instregex "VFNMSUB231SSr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULPDYrr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULPDrr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULPSYrr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULPSrr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULSDrr")>; def: InstRW<[HWWriteResGroup90], (instregex "VMULSSrr")>; +def: InstRW<[HWWriteResGroup90], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)(Y)?r", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)r")>; def HWWriteResGroup91 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 5; + let Latency = 10; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -3510,172 +3297,114 @@ def: InstRW<[HWWriteResGroup91], (instregex "MMX_PMULHWirm")>; def: InstRW<[HWWriteResGroup91], (instregex "MMX_PMULLWirm")>; def: InstRW<[HWWriteResGroup91], (instregex "MMX_PMULUDQirm")>; def: InstRW<[HWWriteResGroup91], (instregex "MMX_PSADBWirm")>; -def: InstRW<[HWWriteResGroup91], (instregex "MUL_F32m")>; -def: InstRW<[HWWriteResGroup91], (instregex "MUL_F64m")>; -def: InstRW<[HWWriteResGroup91], (instregex "PCMPGTQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMADDUBSWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMADDWDrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULDQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULHRSWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULHUWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULHWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULLWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PMULUDQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "PSADBWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "RCPPSm")>; def: InstRW<[HWWriteResGroup91], (instregex "RCPSSm")>; -def: InstRW<[HWWriteResGroup91], (instregex "RSQRTPSm")>; def: InstRW<[HWWriteResGroup91], (instregex "RSQRTSSm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPCMPGTQYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPCMPGTQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMADDUBSWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMADDUBSWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMADDWDYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMADDWDrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULDQYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULDQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHRSWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHRSWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHUWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHUWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULHWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULLWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULLWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULUDQYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPMULUDQrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPSADBWYrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VPSADBWrm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VRCPPSm")>; def: InstRW<[HWWriteResGroup91], (instregex "VRCPSSm")>; -def: InstRW<[HWWriteResGroup91], (instregex "VRSQRTPSm")>; def: InstRW<[HWWriteResGroup91], (instregex "VRSQRTSSm")>; +def HWWriteResGroup91_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup91_1], (instregex "SQRTSSm")>; +def: InstRW<[HWWriteResGroup91_1], (instregex "VDIVSSrm")>; + +def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup91_2], (instregex "PCMPGTQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMADDUBSWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMADDWDrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULDQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULHRSWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULHUWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULHWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULLWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PMULUDQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "PSADBWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "RCPPSm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "RSQRTPSm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPCMPGTQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMADDUBSWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMADDWDrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULDQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULHRSWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULHUWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULHWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULLWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPMULUDQrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VPSADBWrm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VRCPPSm")>; +def: InstRW<[HWWriteResGroup91_2], (instregex "VRSQRTPSm")>; + +def HWWriteResGroup91_3 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F32m")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "MUL_F64m")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPCMPGTQYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMADDWDYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULDQYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULHRSWYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULHUWYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULHWYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULLWYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPMULUDQYrm")>; +def: InstRW<[HWWriteResGroup91_3], (instregex "VPSADBWYrm")>; + def HWWriteResGroup92 : SchedWriteRes<[HWPort01,HWPort23]> { - let Latency = 5; + let Latency = 11; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup92], (instregex "MULPDrm")>; def: InstRW<[HWWriteResGroup92], (instregex "MULPSrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "MULSDrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "MULSSrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD132SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD213SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADD231SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB132SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB213SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUB231SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD132SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD213SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMADD231SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB132SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB213SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231PDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231PSYm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231PSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231SDm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VFNMSUB231SSm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VMULPDYrm")>; def: InstRW<[HWWriteResGroup92], (instregex "VMULPDrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VMULPSYrm")>; def: InstRW<[HWWriteResGroup92], (instregex "VMULPSrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VMULSDrm")>; -def: InstRW<[HWWriteResGroup92], (instregex "VMULSSrm")>; +def: InstRW<[HWWriteResGroup92], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m")>; + +def HWWriteResGroup92_1 : SchedWriteRes<[HWPort01,HWPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup92_1], (instregex "VMULPDYrm")>; +def: InstRW<[HWWriteResGroup92_1], (instregex "VMULPSYrm")>; +def: InstRW<[HWWriteResGroup92_1], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; + +def HWWriteResGroup92_2 : SchedWriteRes<[HWPort01,HWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup92_2], (instregex "MULSDrm")>; +def: InstRW<[HWWriteResGroup92_2], (instregex "MULSSrm")>; +def: InstRW<[HWWriteResGroup92_2], (instregex "VMULSDrm")>; +def: InstRW<[HWWriteResGroup92_2], (instregex "VMULSSrm")>; +def: InstRW<[HWWriteResGroup92_2], + (instregex "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)m")>; def HWWriteResGroup93 : SchedWriteRes<[HWPort1,HWPort5]> { let Latency = 5; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[HWWriteResGroup93], (instregex "CVTSI2SS64rr")>; +def: InstRW<[HWWriteResGroup93], (instregex "CVTSI642SSrr")>; def: InstRW<[HWWriteResGroup93], (instregex "HADDPDrr")>; def: InstRW<[HWWriteResGroup93], (instregex "HADDPSrr")>; def: InstRW<[HWWriteResGroup93], (instregex "HSUBPDrr")>; def: InstRW<[HWWriteResGroup93], (instregex "HSUBPSrr")>; -def: InstRW<[HWWriteResGroup93], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[HWWriteResGroup93], (instregex "VCVTSI642SSrr")>; def: InstRW<[HWWriteResGroup93], (instregex "VHADDPDYrr")>; def: InstRW<[HWWriteResGroup93], (instregex "VHADDPDrr")>; def: InstRW<[HWWriteResGroup93], (instregex "VHADDPSYrr")>; @@ -3700,7 +3429,7 @@ def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { def: InstRW<[HWWriteResGroup95], (instregex "MULX32rr")>; def HWWriteResGroup96 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 5; + let Latency = 11; let NumMicroOps = 4; let ResourceCycles = [1,2,1]; } @@ -3708,24 +3437,30 @@ def: InstRW<[HWWriteResGroup96], (instregex "HADDPDrm")>; def: InstRW<[HWWriteResGroup96], (instregex "HADDPSrm")>; def: InstRW<[HWWriteResGroup96], (instregex "HSUBPDrm")>; def: InstRW<[HWWriteResGroup96], (instregex "HSUBPSrm")>; -def: InstRW<[HWWriteResGroup96], (instregex "VHADDPDYrm")>; def: InstRW<[HWWriteResGroup96], (instregex "VHADDPDrm")>; -def: InstRW<[HWWriteResGroup96], (instregex "VHADDPSYrm")>; def: InstRW<[HWWriteResGroup96], (instregex "VHADDPSrm")>; -def: InstRW<[HWWriteResGroup96], (instregex "VHSUBPDYrm")>; def: InstRW<[HWWriteResGroup96], (instregex "VHSUBPDrm")>; -def: InstRW<[HWWriteResGroup96], (instregex "VHSUBPSYrm")>; def: InstRW<[HWWriteResGroup96], (instregex "VHSUBPSrm")>; +def HWWriteResGroup96_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[HWWriteResGroup96_1], (instregex "VHADDPDYrm")>; +def: InstRW<[HWWriteResGroup96_1], (instregex "VHADDPSYrm")>; +def: InstRW<[HWWriteResGroup96_1], (instregex "VHSUBPDYrm")>; +def: InstRW<[HWWriteResGroup96_1], (instregex "VHSUBPSYrm")>; + def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 5; + let Latency = 10; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>; def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> { - let Latency = 5; + let Latency = 10; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -3752,16 +3487,6 @@ def HWWriteResGroup101 : SchedWriteRes<[HWPort06,HWPort0156]> { } def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG(16|32|64)rr")>; def: InstRW<[HWWriteResGroup101], (instregex "CMPXCHG8rr")>; -def: InstRW<[HWWriteResGroup101], (instregex "ROUNDPDr")>; -def: InstRW<[HWWriteResGroup101], (instregex "ROUNDPSr")>; -def: InstRW<[HWWriteResGroup101], (instregex "ROUNDSDr")>; -def: InstRW<[HWWriteResGroup101], (instregex "ROUNDSSr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDPDr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDPSr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDSDr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDSSr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDYPDr")>; -def: InstRW<[HWWriteResGroup101], (instregex "VROUNDYPSr")>; def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> { let Latency = 6; @@ -3775,29 +3500,35 @@ def: InstRW<[HWWriteResGroup102], (instregex "VCVTPS2PHYrr")>; def: InstRW<[HWWriteResGroup102], (instregex "VCVTTPD2DQYrr")>; def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> { - let Latency = 6; + let Latency = 13; let NumMicroOps = 3; let ResourceCycles = [2,1]; } def: InstRW<[HWWriteResGroup103], (instregex "ADD_FI16m")>; def: InstRW<[HWWriteResGroup103], (instregex "ADD_FI32m")>; -def: InstRW<[HWWriteResGroup103], (instregex "ROUNDPDm")>; -def: InstRW<[HWWriteResGroup103], (instregex "ROUNDPSm")>; -def: InstRW<[HWWriteResGroup103], (instregex "ROUNDSDm")>; -def: InstRW<[HWWriteResGroup103], (instregex "ROUNDSSm")>; def: InstRW<[HWWriteResGroup103], (instregex "SUBR_FI16m")>; def: InstRW<[HWWriteResGroup103], (instregex "SUBR_FI32m")>; def: InstRW<[HWWriteResGroup103], (instregex "SUB_FI16m")>; def: InstRW<[HWWriteResGroup103], (instregex "SUB_FI32m")>; -def: InstRW<[HWWriteResGroup103], (instregex "VROUNDPDm")>; -def: InstRW<[HWWriteResGroup103], (instregex "VROUNDPSm")>; -def: InstRW<[HWWriteResGroup103], (instregex "VROUNDSDm")>; -def: InstRW<[HWWriteResGroup103], (instregex "VROUNDSSm")>; def: InstRW<[HWWriteResGroup103], (instregex "VROUNDYPDm")>; def: InstRW<[HWWriteResGroup103], (instregex "VROUNDYPSm")>; +def HWWriteResGroup103_1 : SchedWriteRes<[HWPort1,HWPort23]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[HWWriteResGroup103_1], (instregex "ROUNDPDm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "ROUNDPSm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "ROUNDSDm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "ROUNDSSm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "VROUNDPDm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "VROUNDPSm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "VROUNDSDm")>; +def: InstRW<[HWWriteResGroup103_1], (instregex "VROUNDSSm")>; + def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 6; + let Latency = 12; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -3812,7 +3543,7 @@ def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL")>; def: InstRW<[HWWriteResGroup105], (instregex "SHRD(16|32|64)rrCL")>; def HWWriteResGroup106 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { - let Latency = 6; + let Latency = 7; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -3833,7 +3564,7 @@ def HWWriteResGroup108 : SchedWriteRes<[HWPort6,HWPort0156]> { def: InstRW<[HWWriteResGroup108], (instregex "STD")>; def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 6; + let Latency = 12; let NumMicroOps = 6; let ResourceCycles = [1,1,1,1,2]; } @@ -3855,7 +3586,7 @@ def: InstRW<[HWWriteResGroup110], (instregex "VAESENCLASTrr")>; def: InstRW<[HWWriteResGroup110], (instregex "VAESENCrr")>; def HWWriteResGroup111 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 7; + let Latency = 13; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -3878,14 +3609,20 @@ def: InstRW<[HWWriteResGroup112], (instregex "VMPSADBWYrri")>; def: InstRW<[HWWriteResGroup112], (instregex "VMPSADBWrri")>; def HWWriteResGroup113 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 7; + let Latency = 13; let NumMicroOps = 4; let ResourceCycles = [1,2,1]; } def: InstRW<[HWWriteResGroup113], (instregex "MPSADBWrmi")>; -def: InstRW<[HWWriteResGroup113], (instregex "VMPSADBWYrmi")>; def: InstRW<[HWWriteResGroup113], (instregex "VMPSADBWrmi")>; +def HWWriteResGroup113_1 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[HWWriteResGroup113_1], (instregex "VMPSADBWYrmi")>; + def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> { let Latency = 7; let NumMicroOps = 7; @@ -3894,7 +3631,7 @@ def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> { def: InstRW<[HWWriteResGroup114], (instregex "LOOP")>; def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { - let Latency = 8; + let Latency = 15; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -3910,7 +3647,7 @@ def: InstRW<[HWWriteResGroup116], (instregex "DPPDrri")>; def: InstRW<[HWWriteResGroup116], (instregex "VDPPDrri")>; def HWWriteResGroup117 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 9; + let Latency = 15; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } @@ -3927,16 +3664,22 @@ def: InstRW<[HWWriteResGroup118], (instregex "VPMULLDYrr")>; def: InstRW<[HWWriteResGroup118], (instregex "VPMULLDrr")>; def HWWriteResGroup119 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 10; + let Latency = 16; let NumMicroOps = 3; let ResourceCycles = [2,1]; } def: InstRW<[HWWriteResGroup119], (instregex "PMULLDrm")>; -def: InstRW<[HWWriteResGroup119], (instregex "VPMULLDYrm")>; def: InstRW<[HWWriteResGroup119], (instregex "VPMULLDrm")>; +def HWWriteResGroup119_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 17; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[HWWriteResGroup119_1], (instregex "VPMULLDYrm")>; + def HWWriteResGroup120 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { - let Latency = 10; + let Latency = 16; let NumMicroOps = 10; let ResourceCycles = [1,1,1,4,1,2]; } @@ -3952,12 +3695,18 @@ def: InstRW<[HWWriteResGroup121], (instregex "DIVPSrr")>; def: InstRW<[HWWriteResGroup121], (instregex "DIVSSrr")>; def HWWriteResGroup122 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 11; + let Latency = 17; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup122], (instregex "DIVPSrm")>; -def: InstRW<[HWWriteResGroup122], (instregex "DIVSSrm")>; + +def HWWriteResGroup122_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup122_1], (instregex "DIVSSrm")>; def HWWriteResGroup123 : SchedWriteRes<[HWPort0]> { let Latency = 11; @@ -3986,7 +3735,7 @@ def: InstRW<[HWWriteResGroup125], (instregex "VRCPPSYr")>; def: InstRW<[HWWriteResGroup125], (instregex "VRSQRTPSYr")>; def HWWriteResGroup126 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 11; + let Latency = 17; let NumMicroOps = 4; let ResourceCycles = [3,1]; } @@ -3996,7 +3745,7 @@ def: InstRW<[HWWriteResGroup126], (instregex "VPCMPISTRIrm")>; def: InstRW<[HWWriteResGroup126], (instregex "VPCMPISTRM128rm")>; def HWWriteResGroup127 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 11; + let Latency = 17; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } @@ -4004,7 +3753,7 @@ def: InstRW<[HWWriteResGroup127], (instregex "PCLMULQDQrm")>; def: InstRW<[HWWriteResGroup127], (instregex "VPCLMULQDQrm")>; def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 11; + let Latency = 18; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } @@ -4035,7 +3784,7 @@ def: InstRW<[HWWriteResGroup131], (instregex "LOOPE")>; def: InstRW<[HWWriteResGroup131], (instregex "LOOPNE")>; def HWWriteResGroup132 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { - let Latency = 11; + let Latency = 17; let NumMicroOps = 14; let ResourceCycles = [1,1,1,4,2,5]; } @@ -4052,17 +3801,17 @@ def: InstRW<[HWWriteResGroup133], (instregex "VDIVPSrr")>; def: InstRW<[HWWriteResGroup133], (instregex "VDIVSSrr")>; def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 13; + let Latency = 19; let NumMicroOps = 2; let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup134], (instregex "DIVSDrm")>; def: InstRW<[HWWriteResGroup134], (instregex "SQRTPSm")>; -def: InstRW<[HWWriteResGroup134], (instregex "SQRTSSm")>; def: InstRW<[HWWriteResGroup134], (instregex "VDIVPSrm")>; -def: InstRW<[HWWriteResGroup134], (instregex "VDIVSSrm")>; +def: InstRW<[HWWriteResGroup134], (instregex "VSQRTSSm")>; def HWWriteResGroup135 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort15,HWPort0156]> { - let Latency = 13; + let Latency = 19; let NumMicroOps = 11; let ResourceCycles = [2,1,1,3,1,3]; } @@ -4088,17 +3837,15 @@ def: InstRW<[HWWriteResGroup137], (instregex "AESIMCrr")>; def: InstRW<[HWWriteResGroup137], (instregex "VAESIMCrr")>; def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 14; + let Latency = 20; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup138], (instregex "DIVPDrm")>; -def: InstRW<[HWWriteResGroup138], (instregex "DIVSDrm")>; def: InstRW<[HWWriteResGroup138], (instregex "VSQRTPSm")>; -def: InstRW<[HWWriteResGroup138], (instregex "VSQRTSSm")>; def HWWriteResGroup139 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 14; + let Latency = 20; let NumMicroOps = 3; let ResourceCycles = [2,1]; } @@ -4115,14 +3862,20 @@ def: InstRW<[HWWriteResGroup140], (instregex "VDPPSYrri")>; def: InstRW<[HWWriteResGroup140], (instregex "VDPPSrri")>; def HWWriteResGroup141 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 14; + let Latency = 20; let NumMicroOps = 5; let ResourceCycles = [2,1,1,1]; } def: InstRW<[HWWriteResGroup141], (instregex "DPPSrmi")>; -def: InstRW<[HWWriteResGroup141], (instregex "VDPPSYrmi")>; def: InstRW<[HWWriteResGroup141], (instregex "VDPPSrmi")>; +def HWWriteResGroup141_1 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { + let Latency = 21; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; +} +def: InstRW<[HWWriteResGroup141_1], (instregex "VDPPSYrmi")>; + def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> { let Latency = 14; let NumMicroOps = 10; @@ -4131,14 +3884,14 @@ def HWWriteResGroup142 : SchedWriteRes<[HWPort1,HWPort06,HWPort15,HWPort0156]> { def: InstRW<[HWWriteResGroup142], (instregex "RCR8rCL")>; def HWWriteResGroup143 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 14; + let Latency = 19; let NumMicroOps = 15; let ResourceCycles = [1,14]; } def: InstRW<[HWWriteResGroup143], (instregex "POPF16")>; def HWWriteResGroup144 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 15; + let Latency = 21; let NumMicroOps = 8; let ResourceCycles = [1,1,1,1,1,1,2]; } @@ -4154,7 +3907,7 @@ def HWWriteResGroup145 : SchedWriteRes<[HWPort5]> { def: InstRW<[HWWriteResGroup145], (instregex "VZEROALL")>; def HWWriteResGroup146 : SchedWriteRes<[HWPort0,HWPort4,HWPort5,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 16; + let Latency = 22; let NumMicroOps = 19; let ResourceCycles = [2,1,4,1,1,4,6]; } @@ -4184,7 +3937,7 @@ def: InstRW<[HWWriteResGroup149], (instregex "CPUID")>; def: InstRW<[HWWriteResGroup149], (instregex "RDTSC")>; def HWWriteResGroup150 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort0156]> { - let Latency = 18; + let Latency = 24; let NumMicroOps = 9; let ResourceCycles = [4,3,1,1]; } @@ -4192,11 +3945,11 @@ def: InstRW<[HWWriteResGroup150], (instregex "PCMPESTRIrm")>; def: InstRW<[HWWriteResGroup150], (instregex "VPCMPESTRIrm")>; def HWWriteResGroup151 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { - let Latency = 18; + let Latency = 23; let NumMicroOps = 19; let ResourceCycles = [3,1,15]; } -def: InstRW<[HWWriteResGroup151], (instregex "XRSTOR(64?)")>; +def: InstRW<[HWWriteResGroup151], (instregex "XRSTOR(64)?")>; def HWWriteResGroup152 : SchedWriteRes<[HWPort0,HWPort5,HWPort015,HWPort0156]> { let Latency = 19; @@ -4207,7 +3960,7 @@ def: InstRW<[HWWriteResGroup152], (instregex "PCMPESTRM128rr")>; def: InstRW<[HWWriteResGroup152], (instregex "VPCMPESTRM128rr")>; def HWWriteResGroup153 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015,HWPort0156]> { - let Latency = 19; + let Latency = 25; let NumMicroOps = 10; let ResourceCycles = [4,3,1,1,1]; } @@ -4228,16 +3981,30 @@ def: InstRW<[HWWriteResGroup154], (instregex "VDIVPDrr")>; def: InstRW<[HWWriteResGroup154], (instregex "VDIVSDrr")>; def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 20; + let Latency = 27; let NumMicroOps = 2; let ResourceCycles = [1,1]; } def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F32m")>; def: InstRW<[HWWriteResGroup155], (instregex "DIVR_F64m")>; -def: InstRW<[HWWriteResGroup155], (instregex "SQRTPDm")>; -def: InstRW<[HWWriteResGroup155], (instregex "SQRTSDm")>; -def: InstRW<[HWWriteResGroup155], (instregex "VDIVPDrm")>; -def: InstRW<[HWWriteResGroup155], (instregex "VDIVSDrm")>; +def: InstRW<[HWWriteResGroup155], (instregex "VSQRTPDm")>; + +def HWWriteResGroup155_1 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 26; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup155_1], (instregex "SQRTPDm")>; +def: InstRW<[HWWriteResGroup155_1], (instregex "VDIVPDrm")>; +def: InstRW<[HWWriteResGroup155_1], (instregex "VSQRTSDm")>; + +def HWWriteResGroup155_2 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup155_2], (instregex "SQRTSDm")>; +def: InstRW<[HWWriteResGroup155_2], (instregex "VDIVSDrm")>; def HWWriteResGroup156 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { let Latency = 20; @@ -4254,14 +4021,6 @@ def HWWriteResGroup157 : SchedWriteRes<[HWPort0]> { def: InstRW<[HWWriteResGroup157], (instregex "VSQRTPDr")>; def: InstRW<[HWWriteResGroup157], (instregex "VSQRTSDr")>; -def HWWriteResGroup158 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 21; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup158], (instregex "VSQRTPDm")>; -def: InstRW<[HWWriteResGroup158], (instregex "VSQRTSDm")>; - def HWWriteResGroup159 : SchedWriteRes<[HWPort0,HWPort015]> { let Latency = 21; let NumMicroOps = 3; @@ -4271,7 +4030,7 @@ def: InstRW<[HWWriteResGroup159], (instregex "VDIVPSYrr")>; def: InstRW<[HWWriteResGroup159], (instregex "VSQRTPSYr")>; def HWWriteResGroup160 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 21; + let Latency = 28; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } @@ -4279,7 +4038,7 @@ def: InstRW<[HWWriteResGroup160], (instregex "VDIVPSYrm")>; def: InstRW<[HWWriteResGroup160], (instregex "VSQRTPSYm")>; def HWWriteResGroup161 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { - let Latency = 23; + let Latency = 30; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -4296,7 +4055,7 @@ def: InstRW<[HWWriteResGroup162], (instregex "DIVR_FST0r")>; def: InstRW<[HWWriteResGroup162], (instregex "DIVR_FrST0")>; def HWWriteResGroup163 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 24; + let Latency = 31; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -4304,21 +4063,21 @@ def: InstRW<[HWWriteResGroup163], (instregex "DIV_F32m")>; def: InstRW<[HWWriteResGroup163], (instregex "DIV_F64m")>; def HWWriteResGroup164 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 24; + let Latency = 30; let NumMicroOps = 27; let ResourceCycles = [1,5,1,1,19]; } def: InstRW<[HWWriteResGroup164], (instregex "XSAVE64")>; def HWWriteResGroup165 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 25; + let Latency = 31; let NumMicroOps = 28; let ResourceCycles = [1,6,1,1,19]; } -def: InstRW<[HWWriteResGroup165], (instregex "XSAVE(OPT?)")>; +def: InstRW<[HWWriteResGroup165], (instregex "XSAVE(OPT)?")>; def HWWriteResGroup166 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { - let Latency = 27; + let Latency = 34; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -4326,7 +4085,7 @@ def: InstRW<[HWWriteResGroup166], (instregex "DIV_FI16m")>; def: InstRW<[HWWriteResGroup166], (instregex "DIV_FI32m")>; def HWWriteResGroup167 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015]> { - let Latency = 28; + let Latency = 34; let NumMicroOps = 11; let ResourceCycles = [2,7,1,1]; } @@ -4342,22 +4101,22 @@ def: InstRW<[HWWriteResGroup168], (instregex "AESKEYGENASSIST128rr")>; def: InstRW<[HWWriteResGroup168], (instregex "VAESKEYGENASSIST128rr")>; def HWWriteResGroup170 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort06,HWPort0156]> { - let Latency = 30; + let Latency = 35; let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[HWWriteResGroup170], (instregex "IN32ri")>; -def: InstRW<[HWWriteResGroup170], (instregex "IN32rr")>; +def: InstRW<[HWWriteResGroup170], (instregex "IN(16|32)ri")>; +def: InstRW<[HWWriteResGroup170], (instregex "IN(16|32)rr")>; def: InstRW<[HWWriteResGroup170], (instregex "IN8ri")>; def: InstRW<[HWWriteResGroup170], (instregex "IN8rr")>; def HWWriteResGroup171 : SchedWriteRes<[HWPort5,HWPort6,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 30; + let Latency = 36; let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[HWWriteResGroup171], (instregex "OUT32ir")>; -def: InstRW<[HWWriteResGroup171], (instregex "OUT32rr")>; +def: InstRW<[HWWriteResGroup171], (instregex "OUT(16|32)ir")>; +def: InstRW<[HWWriteResGroup171], (instregex "OUT(16|32)rr")>; def: InstRW<[HWWriteResGroup171], (instregex "OUT8ir")>; def: InstRW<[HWWriteResGroup171], (instregex "OUT8rr")>; @@ -4377,7 +4136,7 @@ def: InstRW<[HWWriteResGroup173], (instregex "VDIVPDYrr")>; def: InstRW<[HWWriteResGroup173], (instregex "VSQRTPDYr")>; def HWWriteResGroup174 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 35; + let Latency = 42; let NumMicroOps = 4; let ResourceCycles = [2,1,1]; } @@ -4385,7 +4144,7 @@ def: InstRW<[HWWriteResGroup174], (instregex "VDIVPDYrm")>; def: InstRW<[HWWriteResGroup174], (instregex "VSQRTPDYm")>; def HWWriteResGroup175 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> { - let Latency = 35; + let Latency = 41; let NumMicroOps = 18; let ResourceCycles = [1,1,2,3,1,1,1,8]; } @@ -4399,22 +4158,21 @@ def HWWriteResGroup176 : SchedWriteRes<[HWPort5,HWPort0156]> { def: InstRW<[HWWriteResGroup176], (instregex "RDTSCP")>; def HWWriteResGroup177 : SchedWriteRes<[HWPort0,HWPort01,HWPort23,HWPort05,HWPort06,HWPort015,HWPort0156]> { - let Latency = 56; + let Latency = 61; let NumMicroOps = 64; let ResourceCycles = [2,2,8,1,10,2,39]; } def: InstRW<[HWWriteResGroup177], (instregex "FLDENVm")>; -def: InstRW<[HWWriteResGroup177], (instregex "FLDENVm")>; def HWWriteResGroup178 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort05,HWPort06,HWPort15,HWPort0156]> { - let Latency = 59; + let Latency = 64; let NumMicroOps = 88; let ResourceCycles = [4,4,31,1,2,1,45]; } def: InstRW<[HWWriteResGroup178], (instregex "FXRSTOR64")>; def HWWriteResGroup179 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort05,HWPort06,HWPort15,HWPort0156]> { - let Latency = 59; + let Latency = 64; let NumMicroOps = 90; let ResourceCycles = [4,2,33,1,2,1,47]; } @@ -4442,11 +4200,80 @@ def HWWriteResGroup182 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort06 def: InstRW<[HWWriteResGroup182], (instregex "IDIV(16|32|64)r")>; def HWWriteResGroup183 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort06,HWPort0156]> { - let Latency = 114; + let Latency = 115; let NumMicroOps = 100; let ResourceCycles = [9,9,11,8,1,11,21,30]; } def: InstRW<[HWWriteResGroup183], (instregex "FSTENVm")>; -def: InstRW<[HWWriteResGroup183], (instregex "FSTENVm")>; + +def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> { + let Latency = 26; + let NumMicroOps = 12; + let ResourceCycles = [2,2,1,3,2,2]; +} +def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, + VPGATHERDQrm, + VPGATHERDDrm)>; + +def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 24; + let NumMicroOps = 22; + let ResourceCycles = [5,3,4,1,5,4]; +} +def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm, + VPGATHERQQYrm)>; + +def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 28; + let NumMicroOps = 22; + let ResourceCycles = [5,3,4,1,5,4]; +} +def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>; + +def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 25; + let NumMicroOps = 22; + let ResourceCycles = [5,3,4,1,5,4]; +} +def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>; + +def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 27; + let NumMicroOps = 20; + let ResourceCycles = [3,3,4,1,5,4]; +} +def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm, + VPGATHERDQYrm)>; + +def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 27; + let NumMicroOps = 34; + let ResourceCycles = [5,3,8,1,9,8]; +} +def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm, + VPGATHERDDYrm)>; + +def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 23; + let NumMicroOps = 14; + let ResourceCycles = [3,3,2,1,3,2]; +} +def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm, + VPGATHERQQrm)>; + +def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 28; + let NumMicroOps = 15; + let ResourceCycles = [3,3,2,1,4,2]; +} +def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>; + +def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { + let Latency = 25; + let NumMicroOps = 15; + let ResourceCycles = [3,3,2,1,4,2]; +} +def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm, + VGATHERDPSrm)>; } // SchedModel diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index c6c60bf03b2e..a459bca3a4d7 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -338,41 +338,11 @@ def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>; def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>; def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>; def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "JAE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JAE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JA_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JA_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JBE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JBE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JB_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JB_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JGE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JGE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JG_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JG_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JLE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JLE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JL_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JL_4")>; +def: InstRW<[SBWriteResGroup2], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1")>; +def: InstRW<[SBWriteResGroup2], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4")>; def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>; def: InstRW<[SBWriteResGroup2], (instregex "JMP_1")>; def: InstRW<[SBWriteResGroup2], (instregex "JMP_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNE_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNE_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNO_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNO_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNP_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNP_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNS_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JNS_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JO_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JO_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JP_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JP_4")>; -def: InstRW<[SBWriteResGroup2], (instregex "JS_1")>; -def: InstRW<[SBWriteResGroup2], (instregex "JS_4")>; def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>; def: InstRW<[SBWriteResGroup2], (instregex "LOOP")>; def: InstRW<[SBWriteResGroup2], (instregex "LOOPE")>; @@ -469,7 +439,7 @@ def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup3], (instregex "LEA(16|32|64)r")>; +def: InstRW<[SBWriteResGroup3], (instregex "LEA(16|32|64)(_32)?r")>; def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { let Latency = 1; @@ -492,20 +462,7 @@ def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>; def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>; def: InstRW<[SBWriteResGroup4], (instregex "SAR(16|32|64)ri")>; def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r")>; def: InstRW<[SBWriteResGroup4], (instregex "SHL(16|32|64)ri")>; def: InstRW<[SBWriteResGroup4], (instregex "SHL(16|32|64)r1")>; def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>; @@ -691,19 +648,19 @@ def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SBWriteResGroup6], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "ADD(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "ADD8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>; def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "AND(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "AND8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>; def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>; def: InstRW<[SBWriteResGroup6], (instregex "CBW")>; def: InstRW<[SBWriteResGroup6], (instregex "CMC")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "CMP(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "CMP8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>; @@ -730,7 +687,7 @@ def: InstRW<[SBWriteResGroup6], (instregex "NEG(16|32|64)r")>; def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>; def: InstRW<[SBWriteResGroup6], (instregex "NOT(16|32|64)r")>; def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>; -def: InstRW<[SBWriteResGroup6], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "OR(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "OR8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>; @@ -740,7 +697,7 @@ def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>; def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>; def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>; def: InstRW<[SBWriteResGroup6], (instregex "STC")>; -def: InstRW<[SBWriteResGroup6], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "SUB(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "SUB8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>; @@ -755,7 +712,7 @@ def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>; def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>; def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>; def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR(16|32|64)ri")>; def: InstRW<[SBWriteResGroup6], (instregex "XOR(16|32|64)rr")>; def: InstRW<[SBWriteResGroup6], (instregex "XOR8i8")>; def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>; @@ -789,8 +746,7 @@ def: InstRW<[SBWriteResGroup9], (instregex "ROL(16|32|64)ri")>; def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>; def: InstRW<[SBWriteResGroup9], (instregex "ROR(16|32|64)ri")>; def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>; -def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>; -def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>; +def: InstRW<[SBWriteResGroup9], (instregex "SET(A|BE)r")>; def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>; def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>; def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>; @@ -903,25 +859,12 @@ def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup19], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC(16|32|64)ri")>; def: InstRW<[SBWriteResGroup19], (instregex "ADC(16|32|64)rr")>; def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>; def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVS(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB(16|32|64)ri")>; def: InstRW<[SBWriteResGroup19], (instregex "SBB(16|32|64)rr")>; def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>; def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>; @@ -975,20 +918,21 @@ def: InstRW<[SBWriteResGroup21], (instregex "BSF(16|32|64)rr")>; def: InstRW<[SBWriteResGroup21], (instregex "BSR(16|32|64)rr")>; def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>; def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r8")>; def: InstRW<[SBWriteResGroup21], (instregex "CRC32r(16|32|64)r64")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>; def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)PDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)SDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAX(C?)SSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MIN(C?)PDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MIN(C?)PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MIN(C?)SDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MIN(C?)SSrr")>; def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>; def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>; def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>; @@ -1031,18 +975,18 @@ def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>; def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>; def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQYrr")>; def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)PDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)PDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)PSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)SDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAX(C?)SSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)PDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)PDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)PSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)SDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMIN(C?)SSrr")>; def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>; def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>; def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>; @@ -1150,8 +1094,7 @@ def SBWriteResGroup26 : SchedWriteRes<[SBPort05,SBPort015]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SBWriteResGroup26], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CMOV(A|BE)(16|32|64)rr")>; def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { let Latency = 3; @@ -1179,7 +1122,7 @@ def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>; def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>; def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>; def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSI642SDrr")>; def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>; def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>; def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>; @@ -1192,7 +1135,7 @@ def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTSD2SSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI642SDrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>; def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>; @@ -1311,6 +1254,7 @@ def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVSDmr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>; def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>; @@ -1359,13 +1303,13 @@ def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> { let ResourceCycles = [1,2]; } def: InstRW<[SBWriteResGroup35], (instregex "CLI")>; -def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "CVTSI642SSrr")>; def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>; def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>; def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>; def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>; def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI642SSrr")>; def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>; def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDYrr")>; def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>; @@ -1411,20 +1355,7 @@ def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { let Latency = 5; @@ -1467,8 +1398,7 @@ def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>; -def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>; +def: InstRW<[SBWriteResGroup43], (instregex "SET(A|BE)m")>; def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; @@ -1520,6 +1450,8 @@ def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVQI2PQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSDrm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>; def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>; @@ -1583,7 +1515,7 @@ def: InstRW<[SBWriteResGroup52], (instregex "ADD(16|32|64)rm")>; def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>; def: InstRW<[SBWriteResGroup52], (instregex "AND(16|32|64)rm")>; def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)mi")>; def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)mr")>; def: InstRW<[SBWriteResGroup52], (instregex "CMP(16|32|64)rm")>; def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>; @@ -1926,20 +1858,7 @@ def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { } def: InstRW<[SBWriteResGroup65], (instregex "ADC(16|32|64)rm")>; def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; def: InstRW<[SBWriteResGroup65], (instregex "SBB(16|32|64)rm")>; def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>; @@ -1988,11 +1907,11 @@ def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let NumMicroOps = 4; let ResourceCycles = [1,2,1]; } -def: InstRW<[SBWriteResGroup70], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD(16|32|64)mi")>; def: InstRW<[SBWriteResGroup70], (instregex "ADD(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND(16|32|64)mi")>; def: InstRW<[SBWriteResGroup70], (instregex "AND(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>; @@ -2004,18 +1923,18 @@ def: InstRW<[SBWriteResGroup70], (instregex "NEG(16|32|64)m")>; def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>; def: InstRW<[SBWriteResGroup70], (instregex "NOT(16|32|64)m")>; def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>; -def: InstRW<[SBWriteResGroup70], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR(16|32|64)mi")>; def: InstRW<[SBWriteResGroup70], (instregex "OR(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mi")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>; def: InstRW<[SBWriteResGroup70], (instregex "TEST(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "TEST8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mi")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>; @@ -2167,8 +2086,7 @@ def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> { let NumMicroOps = 4; let ResourceCycles = [1,2,1]; } -def: InstRW<[SBWriteResGroup82], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE(16|32|64)rm")>; +def: InstRW<[SBWriteResGroup82], (instregex "CMOV(A|BE)(16|32|64)rm")>; def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { let Latency = 8; @@ -2264,20 +2182,21 @@ def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>; def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPSDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSI642SDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAX(C?)PDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAX(C?)PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAX(C?)SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAX(C?)SSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MIN(C?)PDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MIN(C?)PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MIN(C?)SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MIN(C?)SSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>; def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>; def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>; @@ -2302,17 +2221,17 @@ def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI642SDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAX(C?)PDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAX(C?)PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAX(C?)SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAX(C?)SSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMIN(C?)PDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMIN(C?)PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMIN(C?)SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMIN(C?)SSrm")>; def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>; def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>; def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>; @@ -2421,9 +2340,9 @@ def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let NumMicroOps = 6; let ResourceCycles = [1,2,3]; } -def: InstRW<[SBWriteResGroup98], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "ADC(16|32|64)mi")>; def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>; -def: InstRW<[SBWriteResGroup98], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "SBB(16|32|64)mi")>; def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>; def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { @@ -2469,10 +2388,10 @@ def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>; def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>; def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMINPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMINPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAX(C?)PDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAX(C?)PSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMIN(C?)PDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMIN(C?)PSYrm")>; def: InstRW<[SBWriteResGroup101], (instregex "VROUNDYPDm")>; def: InstRW<[SBWriteResGroup101], (instregex "VROUNDYPSm")>; def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>; @@ -2501,7 +2420,7 @@ def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>; def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>; def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSI642SSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>; def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>; @@ -2512,7 +2431,7 @@ def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>; def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>; def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI642SSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>; def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>; diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index eeeffdf70083..1b86431969bf 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -380,11 +380,11 @@ def: InstRW<[SKLWriteResGroup3], (instregex "MOVDDUPrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "MOVDI2PDIrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "MOVHLPSrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "MOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSDrr(_REV)?")>; def: InstRW<[SKLWriteResGroup3], (instregex "MOVSHDUPrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "MOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPDrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPSrr(_REV)?")>; def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSDWrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSWBrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "PACKUSDWrr")>; @@ -433,15 +433,15 @@ def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDDUPrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDI2PDIrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVHLPSrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSDrr(_REV)?")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPYrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPYrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSrr(_REV)?")>; def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWYrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWrr")>; def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSWBYrr")>; @@ -676,7 +676,7 @@ def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { } def: InstRW<[SKLWriteResGroup6], (instregex "FINCSTP")>; def: InstRW<[SKLWriteResGroup6], (instregex "FNOP")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV)?")>; def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSBrr64")>; def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSDrr64")>; def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSWrr64")>; @@ -701,13 +701,11 @@ def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADCX32rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADCX64rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADOX32rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC8rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADCX(32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADOX(32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8")>; def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; @@ -718,92 +716,32 @@ def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "CDQ")>; def: InstRW<[SKLWriteResGroup7], (instregex "CLAC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "CQO")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JAE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JAE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JA_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JA_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JBE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JBE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JB_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JB_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JGE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JGE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JG_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JG_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JLE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JLE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JL_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4")>; def: InstRW<[SKLWriteResGroup7], (instregex "JMP_1")>; def: InstRW<[SKLWriteResGroup7], (instregex "JMP_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNE_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNE_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNO_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNO_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNP_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNP_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNS_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JNS_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JO_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JO_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JP_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JP_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JS_1")>; -def: InstRW<[SKLWriteResGroup7], (instregex "JS_4")>; -def: InstRW<[SKLWriteResGroup7], (instregex "RORX32ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "RORX(32|64)ri")>; def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)ri")>; def: InstRW<[SKLWriteResGroup7], (instregex "SAR8r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SAR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SARX32rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SARX64rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETAEr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETBr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETEr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETGEr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETGr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETLEr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETLr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETNEr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETNOr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETNPr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETNSr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETOr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETPr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SARX(32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB8rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)ri")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHL8r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHL8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SHLX32rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHLX(32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)ri")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHR8r1")>; def: InstRW<[SKLWriteResGroup7], (instregex "SHR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SHRX32rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHRX(32|64)rr")>; def: InstRW<[SKLWriteResGroup7], (instregex "STAC")>; def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { @@ -811,17 +749,12 @@ def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup8], (instregex "ANDN32rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "ANDN64rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSI32rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSI64rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK32rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK64rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSR32rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BLSR64rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BZHI32rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "BZHI64rr")>; -def: InstRW<[SKLWriteResGroup8], (instregex "LEA(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSI(32|64)rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK(32|64)rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSR(32|64)rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BZHI(32|64)rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "LEA(16|32|64)(_32)?r")>; def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { let Latency = 1; @@ -835,12 +768,12 @@ def: InstRW<[SKLWriteResGroup9], (instregex "ANDPSrr")>; def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPDrri")>; def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPSrri")>; def: InstRW<[SKLWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPDrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPSrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQArr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQUrr(_REV)?")>; def: InstRW<[SKLWriteResGroup9], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVSSrr(_REV)?")>; def: InstRW<[SKLWriteResGroup9], (instregex "ORPDrr")>; def: InstRW<[SKLWriteResGroup9], (instregex "ORPSrr")>; def: InstRW<[SKLWriteResGroup9], (instregex "PADDBrr")>; @@ -867,16 +800,16 @@ def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDYrri")>; def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDrri")>; def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSYrri")>; def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSrri")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQAYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQArr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUYrr(_REV)?")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUrr(_REV)?")>; def: InstRW<[SKLWriteResGroup9], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVSSrr(_REV)?")>; def: InstRW<[SKLWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; def: InstRW<[SKLWriteResGroup9], (instregex "VORPDYrr")>; def: InstRW<[SKLWriteResGroup9], (instregex "VORPDrr")>; @@ -920,33 +853,33 @@ def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "ADD8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "ADD8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "AND8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "AND8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "CBW")>; def: InstRW<[SKLWriteResGroup10], (instregex "CLC")>; def: InstRW<[SKLWriteResGroup10], (instregex "CMC")>; -def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "CMP8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "CMP8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "CWDE")>; def: InstRW<[SKLWriteResGroup10], (instregex "DEC(16|32|64)r")>; def: InstRW<[SKLWriteResGroup10], (instregex "DEC8r")>; def: InstRW<[SKLWriteResGroup10], (instregex "INC(16|32|64)r")>; def: InstRW<[SKLWriteResGroup10], (instregex "INC8r")>; def: InstRW<[SKLWriteResGroup10], (instregex "LAHF")>; -def: InstRW<[SKLWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup10], (instregex "MOV8ri(_alt?)")>; -def: InstRW<[SKLWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV)?")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8ri(_alt)?")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; @@ -957,11 +890,11 @@ def: InstRW<[SKLWriteResGroup10], (instregex "NEG8r")>; def: InstRW<[SKLWriteResGroup10], (instregex "NOOP")>; def: InstRW<[SKLWriteResGroup10], (instregex "NOT(16|32|64)r")>; def: InstRW<[SKLWriteResGroup10], (instregex "NOT8r")>; -def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "OR8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "OR8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "SAHF")>; def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m")>; def: InstRW<[SKLWriteResGroup10], (instregex "SIDT64m")>; @@ -969,22 +902,22 @@ def: InstRW<[SKLWriteResGroup10], (instregex "SLDT64m")>; def: InstRW<[SKLWriteResGroup10], (instregex "SMSW16m")>; def: InstRW<[SKLWriteResGroup10], (instregex "STC")>; def: InstRW<[SKLWriteResGroup10], (instregex "STRm")>; -def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "SUB8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "SUB8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "SYSCALL")>; def: InstRW<[SKLWriteResGroup10], (instregex "TEST(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup10], (instregex "TEST8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "TEST8ri")>; def: InstRW<[SKLWriteResGroup10], (instregex "TEST8rr")>; def: InstRW<[SKLWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV)?")>; def: InstRW<[SKLWriteResGroup10], (instregex "XOR8i8")>; def: InstRW<[SKLWriteResGroup10], (instregex "XOR8ri")>; -def: InstRW<[SKLWriteResGroup10], (instregex "XOR8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8rr(_REV)?")>; def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> { let Latency = 1; @@ -1015,6 +948,7 @@ def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTPSmr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVPDI2DImr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQI2QImr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVSDmr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVSSmr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPDmr")>; def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPSmr")>; @@ -1115,8 +1049,7 @@ def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "CMOV(A|BE)(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)r1")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)ri")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROL8r1")>; @@ -1125,8 +1058,7 @@ def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)r1")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)ri")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROR8r1")>; def: InstRW<[SKLWriteResGroup15], (instregex "ROR8ri")>; -def: InstRW<[SKLWriteResGroup15], (instregex "SETAr")>; -def: InstRW<[SKLWriteResGroup15], (instregex "SETBEr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "SET(A|BE)r")>; def SKLWriteResGroup16 : SchedWriteRes<[SKLPort015]> { let Latency = 2; @@ -1209,8 +1141,7 @@ def SKLWriteResGroup22 : SchedWriteRes<[SKLPort06,SKLPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR32rr")>; -def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR(32|64)rr")>; def: InstRW<[SKLWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> { @@ -1255,20 +1186,7 @@ def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup26], (instregex "SETAEm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETBm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETEm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETGEm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETGm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETLEm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETLm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETNEm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETNOm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETNPm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETNSm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETOm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETPm")>; -def: InstRW<[SKLWriteResGroup26], (instregex "SETSm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { let Latency = 2; @@ -1282,8 +1200,7 @@ def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)r(mr)?")>; def: InstRW<[SKLWriteResGroup28], (instregex "PUSH64i8")>; def: InstRW<[SKLWriteResGroup28], (instregex "STOSB")>; def: InstRW<[SKLWriteResGroup28], (instregex "STOSL")>; @@ -1297,14 +1214,12 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { } def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup29], (instregex "BSR(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29], (instregex "IMUL64rr(i8)?")>; def: InstRW<[SKLWriteResGroup29], (instregex "IMUL8r")>; def: InstRW<[SKLWriteResGroup29], (instregex "LZCNT(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup29], (instregex "MUL8r")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PDEP32rr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PDEP64rr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PEXT32rr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PEXT64rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PEXT(32|64)rr")>; def: InstRW<[SKLWriteResGroup29], (instregex "POPCNT(16|32|64)rr")>; def: InstRW<[SKLWriteResGroup29], (instregex "SHLD(16|32|64)rri8")>; def: InstRW<[SKLWriteResGroup29], (instregex "SHRD(16|32|64)rri8")>; @@ -1315,13 +1230,13 @@ def SKLWriteResGroup29_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup29_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_16], (instregex "IMUL16rr(i8)?")>; def SKLWriteResGroup29_32 : SchedWriteRes<[SKLPort1]> { let Latency = 3; let NumMicroOps = 1; } -def: InstRW<[SKLWriteResGroup29_32], (instregex "IMUL32rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_32], (instregex "IMUL32rr(i8)?")>; def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> { let Latency = 3; @@ -1526,8 +1441,7 @@ def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[SKLWriteResGroup44], (instregex "SETAm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "SETBEm")>; +def: InstRW<[SKLWriteResGroup44], (instregex "SET(A|BE)m")>; def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { let Latency = 3; @@ -1606,102 +1520,6 @@ def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDYrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSYrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSrr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SSr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDYrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VMULPSYrr")>; @@ -1714,6 +1532,10 @@ def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSYrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSDrr")>; def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSSrr")>; +def: InstRW<[SKLWriteResGroup48], + (instregex + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)(Y)?r", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)r")>; def SKLWriteResGroup49 : SchedWriteRes<[SKLPort015]> { let Latency = 4; @@ -1722,18 +1544,19 @@ def SKLWriteResGroup49 : SchedWriteRes<[SKLPort015]> { } def: InstRW<[SKLWriteResGroup49], (instregex "CMPPDrri")>; def: InstRW<[SKLWriteResGroup49], (instregex "CMPPSrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPSDrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "CMPSSrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "CVTDQ2PSrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "CVTPS2DQrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MAXPDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MAXPSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MAXSDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MAXSSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MINPDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MINPSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MINSDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "MINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAX(C?)PDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAX(C?)PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAX(C?)SDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAX(C?)SSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MIN(C?)PDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MIN(C?)PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MIN(C?)SDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MIN(C?)SSrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "PHMINPOSUWrr128")>; def: InstRW<[SKLWriteResGroup49], (instregex "PMADDUBSWrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "PMADDWDrr")>; @@ -1755,18 +1578,18 @@ def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQYrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQYrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDYrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSYrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDYrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSYrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINSDrr")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VMINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)PDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)PDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)PSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)SDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAX(C?)SSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)PDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)PDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)PSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)SDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMIN(C?)SSrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "VPHMINPOSUWrr128")>; def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWYrr")>; def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWrr")>; @@ -1880,6 +1703,8 @@ def: InstRW<[SKLWriteResGroup58], (instregex "MOV64toPQIrm")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOV8rm")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOVDDUPrm")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVQI2PQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSDrm")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOVSSrm")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; @@ -1915,7 +1740,7 @@ def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2DQrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2PSrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTPS2PDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI642SDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SSrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "CVTSS2SDrr")>; @@ -1930,7 +1755,7 @@ def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPH2PSrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PHrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI642SDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SDrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SSrr")>; def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSS2SDrr")>; @@ -2166,25 +1991,10 @@ def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> { } def: InstRW<[SKLWriteResGroup74], (instregex "ADC(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup74], (instregex "ADC8rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "ADCX32rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "ADCX64rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "ADOX32rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "ADOX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADCX(32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADOX(32|64)rm")>; def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup74], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup74], (instregex "RORX32mi")>; def: InstRW<[SKLWriteResGroup74], (instregex "RORX64mi")>; def: InstRW<[SKLWriteResGroup74], (instregex "SARX32rm")>; @@ -2201,16 +2011,11 @@ def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup75], (instregex "ANDN32rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "ANDN64rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSI32rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSI64rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK32rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK64rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSR32rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BLSR64rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BZHI32rm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "BZHI64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSI(32|64)rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK(32|64)rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSR(32|64)rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BZHI(32|64)rm")>; def: InstRW<[SKLWriteResGroup75], (instregex "MOVBE(16|32|64)rm")>; def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> { @@ -2222,7 +2027,7 @@ def: InstRW<[SKLWriteResGroup76], (instregex "ADD(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "ADD8rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "AND(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "AND8rm")>; -def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mi")>; @@ -2230,8 +2035,7 @@ def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mr")>; def: InstRW<[SKLWriteResGroup76], (instregex "CMP8rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "OR(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "OR8rm")>; -def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)r(mr)?")>; def: InstRW<[SKLWriteResGroup76], (instregex "SUB(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "SUB8rm")>; def: InstRW<[SKLWriteResGroup76], (instregex "TEST(16|32|64)mr")>; @@ -2263,8 +2067,8 @@ def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort015]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup78], (instregex "CVTSI2SS64rr")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKLWriteResGroup78], (instregex "CVTSI642SSrr")>; +def: InstRW<[SKLWriteResGroup78], (instregex "VCVTSI642SSrr")>; def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 6; @@ -2314,11 +2118,11 @@ def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort015 let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mr")>; -def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "AND8mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "AND8mr")>; @@ -2330,17 +2134,17 @@ def: InstRW<[SKLWriteResGroup83], (instregex "NEG(16|32|64)m")>; def: InstRW<[SKLWriteResGroup83], (instregex "NEG8m")>; def: InstRW<[SKLWriteResGroup83], (instregex "NOT(16|32|64)m")>; def: InstRW<[SKLWriteResGroup83], (instregex "NOT8m")>; -def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "OR8mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "OR8mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "POP(16|32|64)rmm")>; def: InstRW<[SKLWriteResGroup83], (instregex "PUSH(16|32|64)rmm")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mr")>; -def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mi")>; def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mr")>; @@ -2641,8 +2445,7 @@ def SKLWriteResGroup93 : SchedWriteRes<[SKLPort23,SKLPort06]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup93], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup93], (instregex "CMOVBE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup93], (instregex "CMOV(A|BE)(16|32|64)rm")>; def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> { let Latency = 7; @@ -2693,8 +2496,7 @@ def SKLWriteResGroup99 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR32rm")>; -def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR64rm")>; +def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR(32|64)rm")>; def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { let Latency = 7; @@ -2776,15 +2578,13 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup107], (instregex "BSR(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup107], (instregex "IMUL64m")>; -def: InstRW<[SKLWriteResGroup107], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL(32|64)rm(i8)?")>; def: InstRW<[SKLWriteResGroup107], (instregex "IMUL8m")>; def: InstRW<[SKLWriteResGroup107], (instregex "LZCNT(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup107], (instregex "MUL(16|32|64)m")>; def: InstRW<[SKLWriteResGroup107], (instregex "MUL8m")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PDEP32rm")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PDEP64rm")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PEXT32rm")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PEXT64rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PEXT(32|64)rm")>; def: InstRW<[SKLWriteResGroup107], (instregex "POPCNT(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup107], (instregex "TZCNT(16|32|64)rm")>; @@ -2793,7 +2593,7 @@ def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup107_16], (instregex "IMUL16rm(i8?)")>; +def: InstRW<[SKLWriteResGroup107_16], (instregex "IMUL16rm(i8)?")>; def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { let Latency = 3; @@ -3020,7 +2820,7 @@ def SKLWriteResGroup118 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort01 let NumMicroOps = 6; let ResourceCycles = [1,1,1,3]; } -def: InstRW<[SKLWriteResGroup118], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup118], (instregex "ADC(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup118], (instregex "ADC8mi")>; def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { @@ -3032,7 +2832,7 @@ def: InstRW<[SKLWriteResGroup119], (instregex "ADC(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup119], (instregex "ADC8mr")>; def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(16|32|64)rm")>; def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG8rm")>; -def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mi")>; def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mr")>; def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mi")>; def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mr")>; @@ -3084,30 +2884,8 @@ def: InstRW<[SKLWriteResGroup122], (instregex "SUBSDrm")>; def: InstRW<[SKLWriteResGroup122], (instregex "SUBSSrm")>; def: InstRW<[SKLWriteResGroup122], (instregex "VADDSDrm")>; def: InstRW<[SKLWriteResGroup122], (instregex "VADDSSrm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKLWriteResGroup122], + (instregex "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)m")>; def: InstRW<[SKLWriteResGroup122], (instregex "VMULSDrm")>; def: InstRW<[SKLWriteResGroup122], (instregex "VMULSSrm")>; def: InstRW<[SKLWriteResGroup122], (instregex "VSUBSDrm")>; @@ -3118,22 +2896,23 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } +def: InstRW<[SKLWriteResGroup123], (instregex "CMPSDrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "CMPSSrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "CVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "MAXSDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "MAXSSrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "MINSDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "MINSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAX(C?)SDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAX(C?)SSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MIN(C?)SDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MIN(C?)SSrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm")>; def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTTPS2PIirm")>; def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSDrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSSrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPH2PSrm")>; def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSSrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VMINSDrm")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VMINSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAX(C?)SDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAX(C?)SSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMIN(C?)SDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMIN(C?)SSrm")>; def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 9; @@ -3269,42 +3048,8 @@ def: InstRW<[SKLWriteResGroup134], (instregex "VADDPDrm")>; def: InstRW<[SKLWriteResGroup134], (instregex "VADDPSrm")>; def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPDrm")>; def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPSrm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m")>; def: InstRW<[SKLWriteResGroup134], (instregex "VMULPDrm")>; def: InstRW<[SKLWriteResGroup134], (instregex "VMULPSrm")>; def: InstRW<[SKLWriteResGroup134], (instregex "VSUBPDrm")>; @@ -3321,10 +3066,10 @@ def: InstRW<[SKLWriteResGroup135], (instregex "CVTDQ2PSrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "CVTPS2DQrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "CVTSS2SDrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "MAXPDrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "MAXPSrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "MINPDrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "MINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAX(C?)PDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAX(C?)PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MIN(C?)PDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MIN(C?)PSrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "PHMINPOSUWrm128")>; def: InstRW<[SKLWriteResGroup135], (instregex "PMADDUBSWrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "PMADDWDrm")>; @@ -3341,10 +3086,10 @@ def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPH2PSYrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPS2DQrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "VCVTSS2SDrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPDrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPSrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "VMINPDrm")>; -def: InstRW<[SKLWriteResGroup135], (instregex "VMINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAX(C?)PDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAX(C?)PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMIN(C?)PDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMIN(C?)PSrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "VPHMINPOSUWrm128")>; def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDUBSWrm")>; def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDWDrm")>; @@ -3464,42 +3209,8 @@ def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm")>; def: InstRW<[SKLWriteResGroup147], (instregex "VADDPSYrm")>; def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPDYrm")>; def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], + (instregex "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym")>; def: InstRW<[SKLWriteResGroup147], (instregex "VMULPDYrm")>; def: InstRW<[SKLWriteResGroup147], (instregex "VMULPSYrm")>; def: InstRW<[SKLWriteResGroup147], (instregex "VSUBPDYrm")>; @@ -3516,10 +3227,10 @@ def: InstRW<[SKLWriteResGroup148], (instregex "VCVTDQ2PSYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2DQYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2PDYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VCVTTPS2DQYrm")>; -def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPDYrm")>; -def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPSYrm")>; -def: InstRW<[SKLWriteResGroup148], (instregex "VMINPDYrm")>; -def: InstRW<[SKLWriteResGroup148], (instregex "VMINPSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAX(C?)PDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAX(C?)PSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMIN(C?)PDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMIN(C?)PSYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDUBSWYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDWDYrm")>; def: InstRW<[SKLWriteResGroup148], (instregex "VPMULDQYrm")>; @@ -3965,42 +3676,28 @@ def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPor let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instrs VGATHERDPSrm, + VGATHERDPDrm, + VGATHERQPDrm, + VGATHERQPSrm, + VPGATHERDDrm, + VPGATHERDQrm, + VPGATHERQDrm, + VPGATHERQQrm)>; def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { let Latency = 25; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPDYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm, + VGATHERQPDYrm, + VGATHERQPSYrm, + VPGATHERDDYrm, + VPGATHERDQYrm, + VPGATHERQDYrm, + VPGATHERQQYrm, + VGATHERDPDYrm)>; def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 23; @@ -4099,8 +3796,8 @@ def SKLWriteResGroup209 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,S let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[SKLWriteResGroup209], (instregex "IN32ri")>; -def: InstRW<[SKLWriteResGroup209], (instregex "IN32rr")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN(16|32)ri")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN(16|32)rr")>; def: InstRW<[SKLWriteResGroup209], (instregex "IN8ri")>; def: InstRW<[SKLWriteResGroup209], (instregex "IN8rr")>; @@ -4109,8 +3806,8 @@ def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237, let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[SKLWriteResGroup210], (instregex "OUT32ir")>; -def: InstRW<[SKLWriteResGroup210], (instregex "OUT32rr")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT(16|32)ir")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT(16|32)rr")>; def: InstRW<[SKLWriteResGroup210], (instregex "OUT8ir")>; def: InstRW<[SKLWriteResGroup210], (instregex "OUT8rr")>; @@ -4119,7 +3816,7 @@ def SKLWriteResGroup211 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156 let NumMicroOps = 31; let ResourceCycles = [1,8,1,21]; } -def: InstRW<[SKLWriteResGroup211], (instregex "XRSTOR(64?)")>; +def: InstRW<[SKLWriteResGroup211], (instregex "XRSTOR(64)?")>; def SKLWriteResGroup212 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> { let Latency = 40; @@ -4147,7 +3844,7 @@ def SKLWriteResGroup215 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237, let NumMicroOps = 40; let ResourceCycles = [1,11,1,1,26]; } -def: InstRW<[SKLWriteResGroup215], (instregex "XSAVE")>; +def: InstRW<[SKLWriteResGroup215], (instregex "^XSAVE$", "XSAVEC", "XSAVES")>; def SKLWriteResGroup216 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { let Latency = 46; @@ -4162,7 +3859,6 @@ def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06, let ResourceCycles = [2,8,5,10,39]; } def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; -def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; def SKLWriteResGroup218 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 63; @@ -4205,6 +3901,5 @@ def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKL let ResourceCycles = [9,1,11,16,1,11,21,30]; } def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; -def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; } // SchedModel diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td index 8ba1ac027ce2..de2ee18d4175 100755 --- a/lib/Target/X86/X86SchedSkylakeServer.td +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -424,11 +424,11 @@ def: InstRW<[SKXWriteResGroup3], (instregex "MOVDDUPrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "MOVDI2PDIrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "MOVHLPSrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "MOVLHPSrr")>; -def: InstRW<[SKXWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSDrr(_REV)?")>; def: InstRW<[SKXWriteResGroup3], (instregex "MOVSHDUPrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "MOVSLDUPrr")>; -def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPDrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPSrr(_REV)?")>; def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSDWrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSWBrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "PACKUSDWrr")>; @@ -487,7 +487,7 @@ def: InstRW<[SKXWriteResGroup3], (instregex "VMOVHLPSrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDZrr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDrr(_REV)?")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPYrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ256rr(b?)(k?)(z?)")>; @@ -498,11 +498,11 @@ def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSSZrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSSZrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDYrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSYrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSrr(_REV)?")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWYrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ256rr(b?)(k?)(z?)")>; @@ -576,23 +576,23 @@ def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWYri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ256r(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWYri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ256r(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQYri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQYri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZ128rr(b?)(k?)(z?)")>; @@ -632,6 +632,7 @@ def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDrr")>; def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDYrri")>; def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZ128rri(b?)(k?)(z?)")>; @@ -979,7 +980,7 @@ def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { } def: InstRW<[SKXWriteResGroup6], (instregex "FINCSTP")>; def: InstRW<[SKXWriteResGroup6], (instregex "FNOP")>; -def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV)?")>; def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSBrr64")>; def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSDrr64")>; def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSWrr64")>; @@ -1004,13 +1005,11 @@ def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADCX32rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADCX64rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADOX32rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC8rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADCX(32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADOX(32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8")>; def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; @@ -1021,92 +1020,32 @@ def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "CDQ")>; def: InstRW<[SKXWriteResGroup7], (instregex "CLAC")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "CQO")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JAE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JAE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JA_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JA_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JBE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JBE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JB_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JB_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JGE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JGE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JG_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JG_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JLE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JLE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JL_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4")>; def: InstRW<[SKXWriteResGroup7], (instregex "JMP_1")>; def: InstRW<[SKXWriteResGroup7], (instregex "JMP_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNE_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNE_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNO_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNO_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNP_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNP_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNS_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JNS_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JO_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JO_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JP_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JP_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JS_1")>; -def: InstRW<[SKXWriteResGroup7], (instregex "JS_4")>; -def: InstRW<[SKXWriteResGroup7], (instregex "RORX32ri")>; -def: InstRW<[SKXWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "RORX(32|64)ri")>; def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)ri")>; def: InstRW<[SKXWriteResGroup7], (instregex "SAR8r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SAR8ri")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SARX32rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SARX64rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETAEr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETBr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETEr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETGEr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETGr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETLEr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETLr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETNEr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETNOr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETNPr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETNSr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETOr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETPr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SARX(32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB8rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)ri")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHL8r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHL8ri")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SHLX32rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHLX(32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)ri")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHR8r1")>; def: InstRW<[SKXWriteResGroup7], (instregex "SHR8ri")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SHRX32rr")>; -def: InstRW<[SKXWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHRX(32|64)rr")>; def: InstRW<[SKXWriteResGroup7], (instregex "STAC")>; def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { @@ -1114,17 +1053,12 @@ def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup8], (instregex "ANDN32rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "ANDN64rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSI32rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSI64rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK32rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK64rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSR32rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BLSR64rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BZHI32rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "BZHI64rr")>; -def: InstRW<[SKXWriteResGroup8], (instregex "LEA(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSI(32|64)rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK(32|64)rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSR(32|64)rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BZHI(32|64)rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "LEA(16|32|64)(_32)?r")>; def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { let Latency = 1; @@ -1138,12 +1072,12 @@ def: InstRW<[SKXWriteResGroup9], (instregex "ANDPSrr")>; def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPDrri")>; def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPSrri")>; def: InstRW<[SKXWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPDrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPSrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQArr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQUrr(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SKXWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVSSrr(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "ORPDrr")>; def: InstRW<[SKXWriteResGroup9], (instregex "ORPSrr")>; def: InstRW<[SKXWriteResGroup9], (instregex "PADDBrr")>; @@ -1188,47 +1122,47 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDYrri")>; def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDrri")>; def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSYrri")>; def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSrri")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDYrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSYrr(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZrr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z128rr(b?)(k?)(z?)(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z256rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Zrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQAYrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQArr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Zrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUYrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUrr(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI(2Q|Lo2PQ)IZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZrr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ128rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ256rr(b?)(k?)(z?)(_REV?)")>; -def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVSSrr(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZrr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ128rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ256rr(b?)(k?)(z?)(_REV)?")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZrr(b?)(k?)(z?)(_REV)?")>; def: InstRW<[SKXWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; def: InstRW<[SKXWriteResGroup9], (instregex "VORPDYrr")>; def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZ128rr(b?)(k?)(z?)")>; @@ -1349,34 +1283,33 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "ADD8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "ADD8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "AND8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "AND8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "CBW")>; def: InstRW<[SKXWriteResGroup10], (instregex "CLC")>; def: InstRW<[SKXWriteResGroup10], (instregex "CMC")>; -def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "CMP8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "CMP8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "CWDE")>; def: InstRW<[SKXWriteResGroup10], (instregex "DEC(16|32|64)r")>; def: InstRW<[SKXWriteResGroup10], (instregex "DEC8r")>; def: InstRW<[SKXWriteResGroup10], (instregex "INC(16|32|64)r")>; def: InstRW<[SKXWriteResGroup10], (instregex "INC8r")>; def: InstRW<[SKXWriteResGroup10], (instregex "LAHF")>; -def: InstRW<[SKXWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri_alt")>; -def: InstRW<[SKXWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV)?")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri(_alt)?")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; @@ -1387,11 +1320,11 @@ def: InstRW<[SKXWriteResGroup10], (instregex "NEG8r")>; def: InstRW<[SKXWriteResGroup10], (instregex "NOOP")>; def: InstRW<[SKXWriteResGroup10], (instregex "NOT(16|32|64)r")>; def: InstRW<[SKXWriteResGroup10], (instregex "NOT8r")>; -def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "OR8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "OR8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "SAHF")>; def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m")>; def: InstRW<[SKXWriteResGroup10], (instregex "SIDT64m")>; @@ -1399,22 +1332,22 @@ def: InstRW<[SKXWriteResGroup10], (instregex "SLDT64m")>; def: InstRW<[SKXWriteResGroup10], (instregex "SMSW16m")>; def: InstRW<[SKXWriteResGroup10], (instregex "STC")>; def: InstRW<[SKXWriteResGroup10], (instregex "STRm")>; -def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "SUB8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "SUB8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "SYSCALL")>; def: InstRW<[SKXWriteResGroup10], (instregex "TEST(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup10], (instregex "TEST8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "TEST8ri")>; def: InstRW<[SKXWriteResGroup10], (instregex "TEST8rr")>; def: InstRW<[SKXWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; -def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV)?")>; def: InstRW<[SKXWriteResGroup10], (instregex "XOR8i8")>; def: InstRW<[SKXWriteResGroup10], (instregex "XOR8ri")>; -def: InstRW<[SKXWriteResGroup10], (instregex "XOR8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8rr(_REV)?")>; def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { let Latency = 1; @@ -1449,6 +1382,7 @@ def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTPSmr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVPDI2DImr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQI2QImr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVSDmr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVSSmr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPDmr")>; def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPSmr")>; @@ -1561,9 +1495,9 @@ def: InstRW<[SKXWriteResGroup12], (instregex "MOVPQIto64rr")>; def: InstRW<[SKXWriteResGroup12], (instregex "PMOVMSKBrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISDrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISSrr")>; -def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDZrr(b?)")>; def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDrr")>; -def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSZrr(b?)")>; def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDYrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDrr")>; @@ -1579,9 +1513,9 @@ def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDYrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSYrr")>; def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSrr")>; -def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDZrr(b?)")>; def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDrr")>; -def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSZrr(b?)")>; def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSrr")>; def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { @@ -1617,8 +1551,7 @@ def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKXWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "CMOV(A|BE)(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)r1")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)ri")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROL8r1")>; @@ -1627,8 +1560,7 @@ def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)r1")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)ri")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROR8r1")>; def: InstRW<[SKXWriteResGroup15], (instregex "ROR8ri")>; -def: InstRW<[SKXWriteResGroup15], (instregex "SETAr")>; -def: InstRW<[SKXWriteResGroup15], (instregex "SETBEr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "SET(A|BE)r")>; def SKXWriteResGroup16 : SchedWriteRes<[SKXPort015]> { let Latency = 2; @@ -1719,8 +1651,7 @@ def SKXWriteResGroup22 : SchedWriteRes<[SKXPort06,SKXPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR32rr")>; -def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR(32|64)rr")>; def: InstRW<[SKXWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { @@ -1770,20 +1701,7 @@ def SKXWriteResGroup26 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup26], (instregex "SETAEm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETBm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETEm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETGEm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETGm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETLEm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETLm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETNEm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETNOm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETNPm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETNSm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETOm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETPm")>; -def: InstRW<[SKXWriteResGroup26], (instregex "SETSm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { let Latency = 2; @@ -1797,8 +1715,7 @@ def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)r")>; -def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)r(mr)?")>; def: InstRW<[SKXWriteResGroup28], (instregex "PUSH64i8")>; def: InstRW<[SKXWriteResGroup28], (instregex "STOSB")>; def: InstRW<[SKXWriteResGroup28], (instregex "STOSL")>; @@ -1841,14 +1758,12 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { } def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup31], (instregex "BSR(16|32|64)rr")>; -def: InstRW<[SKXWriteResGroup31], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKXWriteResGroup31], (instregex "IMUL64rr(i8)?")>; def: InstRW<[SKXWriteResGroup31], (instregex "IMUL8r")>; def: InstRW<[SKXWriteResGroup31], (instregex "LZCNT(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup31], (instregex "MUL8r")>; -def: InstRW<[SKXWriteResGroup31], (instregex "PDEP32rr")>; -def: InstRW<[SKXWriteResGroup31], (instregex "PDEP64rr")>; -def: InstRW<[SKXWriteResGroup31], (instregex "PEXT32rr")>; -def: InstRW<[SKXWriteResGroup31], (instregex "PEXT64rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PEXT(32|64)rr")>; def: InstRW<[SKXWriteResGroup31], (instregex "POPCNT(16|32|64)rr")>; def: InstRW<[SKXWriteResGroup31], (instregex "SHLD(16|32|64)rri8")>; def: InstRW<[SKXWriteResGroup31], (instregex "SHRD(16|32|64)rri8")>; @@ -1859,13 +1774,13 @@ def SKXWriteResGroup31_16 : SchedWriteRes<[SKXPort1, SKXPort0156]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup31_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[SKXWriteResGroup31_16], (instregex "IMUL16rr(i8)?")>; def SKXWriteResGroup31_32 : SchedWriteRes<[SKXPort1]> { let Latency = 3; let NumMicroOps = 1; } -def: InstRW<[SKXWriteResGroup31_32], (instregex "IMUL32rr(i8?)")>; +def: InstRW<[SKXWriteResGroup31_32], (instregex "IMUL32rr(i8)?")>; def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { let Latency = 3; @@ -1918,8 +1833,8 @@ def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZrri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ128rri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ256rri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZrri(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSDZrr(_Int?)(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSSZrr(_Int?)(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSDZrr(b?)(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ128rri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ256rri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZrri(b?)(k?)(z?)")>; @@ -2140,7 +2055,8 @@ def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWYrr")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWrr")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ256rr(b?)(k?)(z?)")>; @@ -2196,7 +2112,7 @@ def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDrr")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQrr")>; -def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWZrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWZrr(_REV)?")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWri")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWrr_REV")>; def: InstRW<[SKXWriteResGroup33], (instregex "VPTESTYrr")>; @@ -2331,8 +2247,7 @@ def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[SKXWriteResGroup46], (instregex "SETAm")>; -def: InstRW<[SKXWriteResGroup46], (instregex "SETBEm")>; +def: InstRW<[SKXWriteResGroup46], (instregex "SET(A|BE)m")>; def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { let Latency = 3; @@ -2407,6 +2322,7 @@ def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPDrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "CMPPDrri")>; def: InstRW<[SKXWriteResGroup50], (instregex "CMPPSrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPSDrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "CMPSSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "CVTDQ2PSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "CVTPS2DQrr")>; @@ -2446,9 +2362,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDZrr(_Int?)(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSZrr(_Int?)(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDYrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDrr")>; @@ -2510,234 +2426,15 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZ256rri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZrri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSDrri(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSSrri(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDZr(_Int?)(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSZr(_Int?)(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ128r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ256r(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSZr_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSr")>; +def: InstRW<[SKXWriteResGroup50], + (instregex + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Yr", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z128r(b?)(k?)(z?)", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z256r(b?)(k?)(z?)", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Zr(b?)(k?)(z?)", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)r", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)Zr(b?)(_Int)?(k?)(z?)", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)r")>; def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ128r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ256r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDr(b?)(k?)(z?)")>; @@ -2764,9 +2461,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)PSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)PSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)PSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)PSrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SDrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMAX(C?)SSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PDYrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PDZ128rr(b?)(k?)(z?)")>; @@ -2778,9 +2475,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)PSrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SDrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMIN(C?)SSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDYrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZ128rr(b?)(k?)(z?)")>; @@ -2792,9 +2489,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPHMINPOSUWrr128")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZ128rr(b?)(k?)(z?)")>; @@ -2835,6 +2532,7 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWYrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWrr")>; def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQYrr")>; @@ -2876,9 +2574,9 @@ def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDrr")>; -def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSrr")>; def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { @@ -2999,6 +2697,7 @@ def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWYrr")>; def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWZrr(b?)(k?)(z?)")>; def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 4; @@ -3052,6 +2751,8 @@ def: InstRW<[SKXWriteResGroup58], (instregex "MOV64toPQIrm")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOV8rm")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOVDDUPrm")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVQI2PQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSDrm")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOVSSrm")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; @@ -3074,7 +2775,7 @@ def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr(b?)(_Int)?(k?)(z?)")>; def SKXWriteResGroup60 : SchedWriteRes<[SKXPort0,SKXPort5]> { let Latency = 5; @@ -3094,7 +2795,7 @@ def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2DQrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2PSrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTPS2PDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTSD2SSrr")>; -def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI642SDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SSrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "CVTSS2SDrr")>; @@ -3119,13 +2820,13 @@ def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2QQZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2UQQZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTQQ2PSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSD2SSrr")>; -def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI642SDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI642SDZrr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDrr")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQrr")>; @@ -3363,13 +3064,13 @@ def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SI64rr")>; def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIrr")>; def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2USIZrr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64Zrr(b?)")>; def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIZrr(b?)")>; def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIrr")>; -def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USI64Zrb")>; -def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USIZrb")>; -def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSS2USIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USI64Zrr(b?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USIZrr(b?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSS2USIZrr(b?)")>; def SKXWriteResGroup75 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 6; @@ -3474,51 +3175,27 @@ def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> { } def: InstRW<[SKXWriteResGroup78], (instregex "ADC(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup78], (instregex "ADC8rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "ADCX32rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "ADCX64rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "ADOX32rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "ADOX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADCX(32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADOX(32|64)rm")>; def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "CMOVS(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "RORX32mi")>; -def: InstRW<[SKXWriteResGroup78], (instregex "RORX64mi")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SARX32rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SARX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "RORX(32|64)mi")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SARX(32|64)rm")>; def: InstRW<[SKXWriteResGroup78], (instregex "SBB(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup78], (instregex "SBB8rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SHLX32rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SHLX64rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SHRX32rm")>; -def: InstRW<[SKXWriteResGroup78], (instregex "SHRX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHLX(32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHRX(32|64)rm")>; def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup79], (instregex "ANDN32rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "ANDN64rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSI32rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSI64rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK32rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK64rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSR32rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BLSR64rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BZHI32rm")>; -def: InstRW<[SKXWriteResGroup79], (instregex "BZHI64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSI(32|64)rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK(32|64)rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSR(32|64)rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BZHI(32|64)rm")>; def: InstRW<[SKXWriteResGroup79], (instregex "MOVBE(16|32|64)rm")>; def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { @@ -3538,7 +3215,7 @@ def: InstRW<[SKXWriteResGroup81], (instregex "ADD(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "ADD8rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "AND(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "AND8rm")>; -def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mi")>; @@ -3546,8 +3223,7 @@ def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mr")>; def: InstRW<[SKXWriteResGroup81], (instregex "CMP8rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "OR(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "OR8rm")>; -def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)r")>; -def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)r(mr)?")>; def: InstRW<[SKXWriteResGroup81], (instregex "SUB(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "SUB8rm")>; def: InstRW<[SKXWriteResGroup81], (instregex "TEST(16|32|64)mr")>; @@ -3561,12 +3237,12 @@ def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort015]> { let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKXWriteResGroup82], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "CVTSI642SSrr")>; def: InstRW<[SKXWriteResGroup82], (instregex "HADDPDrr")>; def: InstRW<[SKXWriteResGroup82], (instregex "HADDPSrr")>; def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPDrr")>; def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPSrr")>; -def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI642SSrr")>; def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI642SSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup82], (instregex "VCVTUSI642SSZrr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPDYrr")>; @@ -3626,11 +3302,11 @@ def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort015 let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mr")>; -def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "AND8mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "AND8mr")>; @@ -3642,17 +3318,17 @@ def: InstRW<[SKXWriteResGroup87], (instregex "NEG(16|32|64)m")>; def: InstRW<[SKXWriteResGroup87], (instregex "NEG8m")>; def: InstRW<[SKXWriteResGroup87], (instregex "NOT(16|32|64)m")>; def: InstRW<[SKXWriteResGroup87], (instregex "NOT8m")>; -def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "OR8mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "OR8mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm")>; def: InstRW<[SKXWriteResGroup87], (instregex "PUSH(16|32|64)rmm")>; -def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mr")>; -def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mi")>; def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mr")>; @@ -4147,8 +3823,7 @@ def SKXWriteResGroup98 : SchedWriteRes<[SKXPort23,SKXPort06]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKXWriteResGroup98], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[SKXWriteResGroup98], (instregex "CMOVBE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup98], (instregex "CMOV(A|BE)(16|32|64)rm")>; def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { let Latency = 7; @@ -4169,11 +3844,11 @@ def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SI64rr")>; def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SIrr")>; def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64Zrr(b?)")>; def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIZrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIZrr(b?)")>; def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIrr")>; -def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2USI64Zrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2USI64Zrr(b?)")>; def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { let Latency = 7; @@ -4213,8 +3888,7 @@ def SKXWriteResGroup105 : SchedWriteRes<[SKXPort23,SKXPort06,SKXPort15]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR32rm")>; -def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR64rm")>; +def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR(32|64)rm")>; def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 7; @@ -4269,10 +3943,10 @@ def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015 let NumMicroOps = 7; let ResourceCycles = [1,2,2,2]; } -def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERDQZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERQQZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERDPDZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERQPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instrs VPSCATTERDQZ128mr, + VPSCATTERQQZ128mr, + VSCATTERDPDZ128mr, + VSCATTERQPDZ128mr)>; def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { let Latency = 7; @@ -4286,27 +3960,27 @@ def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort015 let NumMicroOps = 11; let ResourceCycles = [1,4,4,2]; } -def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERDQZ256mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERQQZ256mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERDPDZ256mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERQPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instrs VPSCATTERDQZ256mr, + VPSCATTERQQZ256mr, + VSCATTERDPDZ256mr, + VSCATTERQPDZ256mr)>; def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { let Latency = 7; let NumMicroOps = 19; let ResourceCycles = [1,8,8,2]; } -def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERDQZmr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERQQZmr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERDPDZmr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERQPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instrs VPSCATTERDQZmr, + VPSCATTERQQZmr, + VSCATTERDPDZmr, + VSCATTERQPDZmr)>; def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { let Latency = 7; let NumMicroOps = 36; let ResourceCycles = [1,16,1,16,2]; } -def: InstRW<[SKXWriteResGroup114], (instregex "VSCATTERDPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup114], (instrs VSCATTERDPSZmr)>; def SKXWriteResGroup115 : SchedWriteRes<[SKXPort0]> { let Latency = 8; @@ -4362,15 +4036,13 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup118], (instregex "BSR(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup118], (instregex "IMUL64m")>; -def: InstRW<[SKXWriteResGroup118], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL(32|64)rm(i8)?")>; def: InstRW<[SKXWriteResGroup118], (instregex "IMUL8m")>; def: InstRW<[SKXWriteResGroup118], (instregex "LZCNT(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup118], (instregex "MUL(16|32|64)m")>; def: InstRW<[SKXWriteResGroup118], (instregex "MUL8m")>; -def: InstRW<[SKXWriteResGroup118], (instregex "PDEP32rm")>; -def: InstRW<[SKXWriteResGroup118], (instregex "PDEP64rm")>; -def: InstRW<[SKXWriteResGroup118], (instregex "PEXT32rm")>; -def: InstRW<[SKXWriteResGroup118], (instregex "PEXT64rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PEXT(32|64)rm")>; def: InstRW<[SKXWriteResGroup118], (instregex "POPCNT(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup118], (instregex "TZCNT(16|32|64)rm")>; @@ -4379,7 +4051,7 @@ def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup118_16_1], (instregex "IMUL16rm(i8?)")>; +def: InstRW<[SKXWriteResGroup118_16_1], (instregex "IMUL16rm(i8)?")>; def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { let Latency = 8; @@ -4451,15 +4123,15 @@ def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDYmi")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZ256m(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWYmi")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ256mi(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZmi(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWYmi")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ256mi(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZmi(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ512rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ512rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWYrm")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZrm(b?)(k?)(z?)")>; @@ -4917,7 +4589,7 @@ def SKXWriteResGroup129 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort01 let NumMicroOps = 6; let ResourceCycles = [1,1,1,3]; } -def: InstRW<[SKXWriteResGroup129], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup129], (instregex "ADC(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup129], (instregex "ADC8mi")>; def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { @@ -4929,7 +4601,7 @@ def: InstRW<[SKXWriteResGroup130], (instregex "ADC(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup130], (instregex "ADC8mr")>; def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(16|32|64)rm")>; def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG8rm")>; -def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mi")>; def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mr")>; def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mi")>; def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mr")>; @@ -4939,33 +4611,33 @@ def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,S let NumMicroOps = 8; let ResourceCycles = [1,2,1,2,2]; } -def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ256mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instrs VPSCATTERQDZ128mr, + VPSCATTERQDZ256mr, + VSCATTERQPSZ128mr, + VSCATTERQPSZ256mr)>; def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { let Latency = 8; let NumMicroOps = 12; let ResourceCycles = [1,4,1,4,2]; } -def: InstRW<[SKXWriteResGroup132], (instregex "VPSCATTERDDZ128mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup132], (instregex "VSCATTERDPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup132], (instrs VPSCATTERDDZ128mr, + VSCATTERDPSZ128mr)>; def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { let Latency = 8; let NumMicroOps = 20; let ResourceCycles = [1,8,1,8,2]; } -def: InstRW<[SKXWriteResGroup133], (instregex "VPSCATTERDDZ256mr(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup133], (instregex "VSCATTERDPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup133], (instrs VPSCATTERDDZ256mr, + VSCATTERDPSZ256mr)>; def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { let Latency = 8; let NumMicroOps = 36; let ResourceCycles = [1,16,1,16,2]; } -def: InstRW<[SKXWriteResGroup134], (instregex "VPSCATTERDDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup134], (instrs VPSCATTERDDZmr)>; def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 9; @@ -4998,8 +4670,8 @@ def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNQZ128rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPDZ128rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPSZ128rm(b?)i(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VDBPSADBWZ128rmi(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VFPCLASSSSrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPBZ128rmi(b?)(k?)(z?)")>; @@ -5065,6 +4737,7 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { } def: InstRW<[SKXWriteResGroup137], (instregex "ADDSDrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "ADDSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "CMPSDrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "CMPSSrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "CVTPS2PDrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "MAX(C?)SDrm")>; @@ -5083,30 +4756,8 @@ def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSDrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSSrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPH2PSrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SSm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SDm")>; -def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKXWriteResGroup137], + (instregex "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)m")>; def: InstRW<[SKXWriteResGroup137], (instregex "VMAX(C?)SDrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "VMAX(C?)SSrm")>; def: InstRW<[SKXWriteResGroup137], (instregex "VMIN(C?)SDrm")>; @@ -5352,7 +5003,8 @@ def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQYrm")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWYrm")>; -def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMDZ256rm(b?)(k?)(z?)")>; @@ -5414,8 +5066,8 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSrm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VADDSDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VADDSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPSrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCMPPDrmi")>; @@ -5434,7 +5086,8 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2QQZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UDQZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UQQZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTQQ2PDZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTQQ2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2QQZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2UQQZ128rm(b?)(k?)(z?)")>; @@ -5446,106 +5099,16 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2UQQZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUQQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUQQ2PSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPDZ128rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPSZ128rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSDrmi(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSSrmi(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SSZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSZ128m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SDZm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], + (instregex + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z128m(b?)(k?)(z?)", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)m", + "VF(N)?M(ADD|SUB)(132|213|231)S(D|S)Zm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPDZ128m(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPSZ128m(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPSDm(b?)(k?)(z?)")>; @@ -5558,20 +5121,20 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)PDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)PDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)PSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)PSrm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)SDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)SSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)SDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAX(C?)SSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)PDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)PDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)PSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)PSrm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)SDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)SSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)SDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMIN(C?)SSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSrm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMULSDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VMULSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VPHMINPOSUWrm128")>; def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTQZ128rm(b?)(k?)(z?)")>; @@ -5607,8 +5170,8 @@ def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDrm")>; def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSrm")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSDZrm_Int(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSDZrm(_Int)?(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSSZrm(_Int)?(k?)(z?)")>; def SKXWriteResGroup150 : SchedWriteRes<[SKXPort0]> { let Latency = 10; @@ -5704,7 +5267,7 @@ def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSYrr")>; def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSrr")>; -def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSrr")>; def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { @@ -5759,6 +5322,8 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UDQZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UQQZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PSZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2UQQZ256rm(b?)(k?)(z?)")>; @@ -5776,118 +5341,17 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PSZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZ256rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZrm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZ256rm(b?)i(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZrm(b?)i(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSYm")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZ256m(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], + (instregex + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Ym", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Z256m(b?)(k?)(z?)", + "VF(N)?M(ADD|SUB|ADDSUB|SUBADD)(132|213|231)P(D|S)Zm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDZ256m(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPSZ256m(b?)(k?)(z?)")>; @@ -5937,6 +5401,7 @@ def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWYrm")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWZrm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQYrm")>; def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQZ256rm(b?)(k?)(z?)")>; @@ -5984,7 +5449,7 @@ def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm(_Int)?(k?)(z?)")>; def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 11; @@ -6087,7 +5552,7 @@ def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSYr")>; def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ128r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ256r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSr")>; -def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSZr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSr")>; def SKXWriteResGroup173 : SchedWriteRes<[SKXPort5,SKXPort23]> { @@ -6208,7 +5673,7 @@ def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDYrr")>; def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ128rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ256rr(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDrr")>; -def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDZrr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDrr")>; def SKXWriteResGroup185 : SchedWriteRes<[SKXPort0,SKXPort23]> { @@ -6376,7 +5841,7 @@ def: InstRW<[SKXWriteResGroup201], (instregex "DIVPSrm")>; def: InstRW<[SKXWriteResGroup201], (instregex "SQRTSSm")>; def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSrm")>; -def: InstRW<[SKXWriteResGroup201], (instregex "VDIVSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVSSZrm(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup201], (instregex "VSQRTSSm")>; def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { @@ -6397,7 +5862,7 @@ def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDYr")>; def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ128r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ256r(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDr")>; -def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDZr(b?)(_Int)?(k?)(z?)")>; def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDr")>; def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23]> { @@ -6410,7 +5875,7 @@ def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSYrm")>; def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSZ256rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSZ128m(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSm")>; -def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTSSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTSSZm(_Int)?(k?)(z?)")>; def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 18; @@ -6488,9 +5953,9 @@ def SKXWriteResGroup214 : SchedWriteRes<[]> { let Latency = 20; let NumMicroOps = 0; } -def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERDPSZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERQPSZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup214], (instregex "VPGATHERDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm, + VGATHERQPSZrm, + VPGATHERDDZ128rm)>; def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { let Latency = 20; @@ -6509,7 +5974,7 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23]> { def: InstRW<[SKXWriteResGroup216], (instregex "DIVPDrm")>; def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDZ128rm(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDrm")>; -def: InstRW<[SKXWriteResGroup216], (instregex "VDIVSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVSDZrm(_Int)?(k?)(z?)")>; def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let Latency = 20; @@ -6523,10 +5988,10 @@ def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ256rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm, + VGATHERQPSZ256rm, + VPGATHERQDZ128rm, + VPGATHERQDZ256rm)>; def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { let Latency = 20; @@ -6573,52 +6038,52 @@ def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERDPDZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERQPDZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERDQZ128rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm, + VGATHERQPDZ128rm, + VPGATHERDQZ128rm, + VPGATHERQQZ128rm)>; def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { let Latency = 22; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm, + VGATHERDPDrm, + VGATHERQPDrm, + VGATHERQPSrm, + VPGATHERDDrm, + VPGATHERDQrm, + VPGATHERQDrm, + VPGATHERQQrm, + VPGATHERDDrm, + VPGATHERQDrm, + VPGATHERDQrm, + VPGATHERQQrm, + VGATHERDPSrm, + VGATHERQPSrm, + VGATHERDPDrm, + VGATHERQPDrm)>; def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { let Latency = 25; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPDYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm, + VGATHERQPDYrm, + VGATHERQPSYrm, + VPGATHERDDYrm, + VPGATHERDQYrm, + VPGATHERQDYrm, + VPGATHERQQYrm, + VPGATHERDDYrm, + VPGATHERQDYrm, + VPGATHERDQYrm, + VPGATHERQQYrm, + VGATHERDPSYrm, + VGATHERQPSYrm, + VGATHERDPDYrm)>; def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { let Latency = 22; @@ -6659,7 +6124,7 @@ def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23]> { def: InstRW<[SKXWriteResGroup229], (instregex "SQRTPDm")>; def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDZ128m(b?)(k?)(z?)")>; def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDm")>; -def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTSDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTSDZm(_Int)?(k?)(z?)")>; def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 24; @@ -6697,11 +6162,11 @@ def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERDPDZ256rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERQPDZ256rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERDQZ256rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQDZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm, + VGATHERQPDZ256rm, + VPGATHERDQZ256rm, + VPGATHERQDZrm, + VPGATHERQQZ256rm)>; def SKXWriteResGroup235 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015,SKXPort0156]> { let Latency = 25; @@ -6731,10 +6196,10 @@ def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERDPDZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERQPDZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERDQZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm, + VGATHERQPDZrm, + VPGATHERDQZrm, + VPGATHERQQZrm)>; def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 27; @@ -6749,8 +6214,8 @@ def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup240], (instregex "VGATHERDPSZ256rm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup240], (instregex "VPGATHERDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm, + VPGATHERDDZ256rm)>; def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> { let Latency = 28; @@ -6787,8 +6252,8 @@ def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort01 let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup245], (instregex "VGATHERDPSZrm(b?)(k?)(z?)")>; -def: InstRW<[SKXWriteResGroup245], (instregex "VPGATHERDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm, + VPGATHERDDZrm)>; def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort015]> { let Latency = 31; @@ -6802,8 +6267,8 @@ def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,S let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[SKXWriteResGroup247], (instregex "IN32ri")>; -def: InstRW<[SKXWriteResGroup247], (instregex "IN32rr")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN(16|32)ri")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN(16|32)rr")>; def: InstRW<[SKXWriteResGroup247], (instregex "IN8ri")>; def: InstRW<[SKXWriteResGroup247], (instregex "IN8rr")>; @@ -6812,8 +6277,8 @@ def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237, let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[SKXWriteResGroup248], (instregex "OUT32ir")>; -def: InstRW<[SKXWriteResGroup248], (instregex "OUT32rr")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT(16|32)ir")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT(16|32)rr")>; def: InstRW<[SKXWriteResGroup248], (instregex "OUT8ir")>; def: InstRW<[SKXWriteResGroup248], (instregex "OUT8rr")>; @@ -6830,7 +6295,7 @@ def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156 let NumMicroOps = 31; let ResourceCycles = [1,8,1,21]; } -def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64?)")>; +def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64)?")>; def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { let Latency = 38; @@ -6881,7 +6346,6 @@ def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06, let ResourceCycles = [2,8,5,10,39]; } def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; -def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { let Latency = 63; @@ -6938,7 +6402,6 @@ def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKX let ResourceCycles = [9,1,11,16,1,11,21,30]; } def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; -def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { let Latency = 140; diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 0346046e7580..2e21a97541b2 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -385,8 +385,6 @@ def IIC_SSE_CVT_PD_RR : InstrItinClass; def IIC_SSE_CVT_PD_RM : InstrItinClass; def IIC_SSE_CVT_PS_RR : InstrItinClass; def IIC_SSE_CVT_PS_RM : InstrItinClass; -def IIC_SSE_CVT_PI2PS_RR : InstrItinClass; -def IIC_SSE_CVT_PI2PS_RM : InstrItinClass; def IIC_SSE_CVT_Scalar_RR : InstrItinClass; def IIC_SSE_CVT_Scalar_RM : InstrItinClass; def IIC_SSE_CVT_SS2SI32_RM : InstrItinClass; @@ -396,6 +394,8 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass; def IIC_SSE_CVT_SD2SI_RM : InstrItinClass; def IIC_SSE_CVT_SD2SI_RR : InstrItinClass; +def IIC_AVX_ZERO : InstrItinClass; + // MMX def IIC_MMX_MOV_MM_RM : InstrItinClass; def IIC_MMX_MOV_REG_MM : InstrItinClass; @@ -449,6 +449,7 @@ def IIC_CMPX_LOCK_16B : InstrItinClass; def IIC_XADD_LOCK_MEM : InstrItinClass; def IIC_XADD_LOCK_MEM8 : InstrItinClass; +def IIC_FCMOV : InstrItinClass; def IIC_FILD : InstrItinClass; def IIC_FLD : InstrItinClass; def IIC_FLD80 : InstrItinClass; @@ -477,6 +478,8 @@ def IIC_FXTRACT : InstrItinClass; def IIC_FPREM1 : InstrItinClass; def IIC_FPSTP : InstrItinClass; def IIC_FPREM : InstrItinClass; +def IIC_FSIGN : InstrItinClass; +def IIC_FSQRT : InstrItinClass; def IIC_FYL2XP1 : InstrItinClass; def IIC_FSINCOS : InstrItinClass; def IIC_FRNDINT : InstrItinClass; @@ -493,16 +496,31 @@ def IIC_INT : InstrItinClass; def IIC_INT3 : InstrItinClass; def IIC_INVD : InstrItinClass; def IIC_INVLPG : InstrItinClass; +def IIC_INVPCID : InstrItinClass; def IIC_IRET : InstrItinClass; def IIC_HLT : InstrItinClass; def IIC_LXS : InstrItinClass; def IIC_LTR : InstrItinClass; +def IIC_MPX : InstrItinClass; +def IIC_PKU : InstrItinClass; +def IIC_PTWRITE : InstrItinClass; +def IIC_RDPID : InstrItinClass; +def IIC_RDRAND : InstrItinClass; +def IIC_RDSEED : InstrItinClass; def IIC_RDTSC : InstrItinClass; +def IIC_RDTSCP : InstrItinClass; def IIC_RSM : InstrItinClass; def IIC_SIDT : InstrItinClass; def IIC_SGDT : InstrItinClass; def IIC_SLDT : InstrItinClass; +def IIC_SMAP : InstrItinClass; +def IIC_SMX : InstrItinClass; def IIC_STR : InstrItinClass; +def IIC_SKINIT : InstrItinClass; +def IIC_SVM : InstrItinClass; +def IIC_VMX : InstrItinClass; +def IIC_CLGI : InstrItinClass; +def IIC_STGI : InstrItinClass; def IIC_SWAPGS : InstrItinClass; def IIC_SYSCALL : InstrItinClass; def IIC_SYS_ENTER_EXIT : InstrItinClass; @@ -532,6 +550,8 @@ def IIC_PUSH_CS : InstrItinClass; def IIC_PUSH_SR : InstrItinClass; def IIC_POP_SR : InstrItinClass; def IIC_POP_SR_SS : InstrItinClass; +def IIC_SEGMENT_BASE_R : InstrItinClass; +def IIC_SEGMENT_BASE_W : InstrItinClass; def IIC_VERR : InstrItinClass; def IIC_VERW_REG : InstrItinClass; def IIC_VERW_MEM : InstrItinClass; @@ -557,6 +577,10 @@ def IIC_PUSH_A : InstrItinClass; def IIC_BSWAP : InstrItinClass; def IIC_BIT_SCAN_MEM : InstrItinClass; def IIC_BIT_SCAN_REG : InstrItinClass; +def IIC_LZCNT_RR : InstrItinClass; +def IIC_LZCNT_RM : InstrItinClass; +def IIC_TZCNT_RR : InstrItinClass; +def IIC_TZCNT_RM : InstrItinClass; def IIC_MOVS : InstrItinClass; def IIC_STOS : InstrItinClass; def IIC_SCAS : InstrItinClass; @@ -669,13 +693,3 @@ def GenericPostRAModel : GenericX86Model { let PostRAScheduler = 1; } -include "X86ScheduleAtom.td" -include "X86SchedSandyBridge.td" -include "X86SchedHaswell.td" -include "X86SchedBroadwell.td" -include "X86ScheduleSLM.td" -include "X86ScheduleZnver1.td" -include "X86ScheduleBtVer2.td" -include "X86SchedSkylakeClient.td" -include "X86SchedSkylakeServer.td" - diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 1a070f79de69..e052ad98104c 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -364,6 +364,7 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, @@ -394,6 +395,8 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, InstrItinData, InstrStage<1, [Port1]>] >, + InstrItinData] >, + InstrItinData] >, // System instructions InstrItinData] >, @@ -406,6 +409,7 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, InstrItinData] >, + InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index a2f02962444c..beb0fcd883cc 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -140,24 +140,26 @@ def WriteSHLDrri : SchedWriteRes<[JALU01]> { let ResourceCycles = [6]; let NumMicroOps = 6; } -def: InstRW<[WriteSHLDrri], (instregex "SHLD(16|32|64)rri8")>; -def: InstRW<[WriteSHLDrri], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[WriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8, + SHRD16rri8, SHRD32rri8, SHRD64rri8)>; def WriteSHLDrrCL : SchedWriteRes<[JALU01]> { let Latency = 4; let ResourceCycles = [8]; let NumMicroOps = 7; } -def: InstRW<[WriteSHLDrrCL], (instregex "SHLD(16|32|64)rrCL")>; -def: InstRW<[WriteSHLDrrCL], (instregex "SHRD(16|32|64)rrCL")>; +def: InstRW<[WriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL, + SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>; def WriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> { let Latency = 9; let ResourceCycles = [1, 22]; let NumMicroOps = 8; } -def: InstRW<[WriteSHLDm], (instregex "SHLD(16|32|64)mr(i8|CL)")>; -def: InstRW<[WriteSHLDm], (instregex "SHRD(16|32|64)mr(i8|CL)")>; +def: InstRW<[WriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8, + SHLD16mrCL, SHLD32mrCL, SHLD64mrCL, + SHRD16mri8, SHRD32mri8, SHRD64mri8, + SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>; //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. @@ -166,7 +168,10 @@ def: InstRW<[WriteSHLDm], (instregex "SHRD(16|32|64)mr(i8|CL)")>; def : WriteRes { let Latency = 5; } def : WriteRes; -def : WriteRes; +def : WriteRes; + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; //////////////////////////////////////////////////////////////////////////////// // Idioms that clear a register, like xorps %xmm0, %xmm0. @@ -375,13 +380,13 @@ def WriteFHAddY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; } -def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; +def : InstRW<[WriteFHAddY], (instrs VHADDPDYrr, VHADDPSYrr, VHSUBPDYrr, VHSUBPSYrr)>; def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { let Latency = 8; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; +def : InstRW<[WriteFHAddYLd], (instrs VHADDPDYrm, VHADDPSYrm, VHSUBPDYrm, VHSUBPSYrm)>; //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. @@ -411,28 +416,28 @@ def WriteDPPS: SchedWriteRes<[JFPU0, JFPU1]> { let ResourceCycles = [3,3]; let NumMicroOps = 5; } -def : InstRW<[WriteDPPS], (instregex "(V)?DPPSrri")>; +def : InstRW<[WriteDPPS], (instrs DPPSrri, VDPPSrri)>; def WriteDPPSLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { let Latency = 16; let ResourceCycles = [1,3,3]; let NumMicroOps = 6; } -def : InstRW<[WriteDPPSLd], (instregex "(V)?DPPSrmi")>; +def : InstRW<[WriteDPPSLd], (instrs DPPSrmi, VDPPSrmi)>; def WriteDPPD: SchedWriteRes<[JFPU0, JFPU1]> { let Latency = 9; let ResourceCycles = [3,3]; let NumMicroOps = 3; } -def : InstRW<[WriteDPPD], (instregex "(V)?DPPDrri")>; +def : InstRW<[WriteDPPD], (instrs DPPDrri, VDPPDrri)>; def WriteDPPDLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { let Latency = 14; let ResourceCycles = [1,3,3]; let NumMicroOps = 3; } -def : InstRW<[WriteDPPDLd], (instregex "(V)?DPPDrmi")>; +def : InstRW<[WriteDPPDLd], (instrs DPPDrmi, VDPPDrmi)>; //////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. @@ -442,13 +447,13 @@ def WriteEXTRQ: SchedWriteRes<[JFPU01]> { let Latency = 1; let ResourceCycles = [1]; } -def : InstRW<[WriteEXTRQ], (instregex "EXTRQ")>; +def : InstRW<[WriteEXTRQ], (instrs EXTRQ, EXTRQI)>; def WriteINSERTQ: SchedWriteRes<[JFPU01]> { let Latency = 2; let ResourceCycles = [4]; } -def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; +def : InstRW<[WriteINSERTQ], (instrs INSERTQ, INSERTQI)>; //////////////////////////////////////////////////////////////////////////////// // F16C instructions. @@ -457,48 +462,47 @@ def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; def WriteCVT3: SchedWriteRes<[JFPU1]> { let Latency = 3; } -def : InstRW<[WriteCVT3], (instregex "VCVTPS2PHrr")>; -def : InstRW<[WriteCVT3], (instregex "VCVTPH2PSrr")>; +def : InstRW<[WriteCVT3], (instrs VCVTPS2PHrr, VCVTPH2PSrr)>; def WriteCVT3St: SchedWriteRes<[JFPU1, JSAGU]> { let Latency = 3; let ResourceCycles = [1, 1]; } -def : InstRW<[WriteCVT3St], (instregex "VCVTPS2PHmr")>; +def : InstRW<[WriteCVT3St], (instrs VCVTPS2PHmr)>; def WriteCVT3Ld: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 8; let ResourceCycles = [1, 1]; } -def : InstRW<[WriteCVT3Ld], (instregex "VCVTPH2PSrm")>; +def : InstRW<[WriteCVT3Ld], (instrs VCVTPH2PSrm)>; def WriteCVTPS2PHY: SchedWriteRes<[JFPU1, JFPU01]> { let Latency = 6; let ResourceCycles = [2,2]; let NumMicroOps = 3; } -def : InstRW<[WriteCVTPS2PHY], (instregex "VCVTPS2PHYrr")>; +def : InstRW<[WriteCVTPS2PHY], (instrs VCVTPS2PHYrr)>; def WriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JFPU01, JSAGU]> { let Latency = 11; let ResourceCycles = [2,2,1]; let NumMicroOps = 3; } -def : InstRW<[WriteCVTPS2PHYSt], (instregex "VCVTPS2PHYmr")>; +def : InstRW<[WriteCVTPS2PHYSt], (instrs VCVTPS2PHYmr)>; def WriteCVTPH2PSY: SchedWriteRes<[JFPU1]> { let Latency = 3; let ResourceCycles = [2]; let NumMicroOps = 2; } -def : InstRW<[WriteCVTPH2PSY], (instregex "VCVTPH2PSYrr")>; +def : InstRW<[WriteCVTPH2PSY], (instrs VCVTPH2PSYrr)>; def WriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 8; let ResourceCycles = [1,2]; let NumMicroOps = 2; } -def : InstRW<[WriteCVTPH2PSYLd], (instregex "VCVTPH2PSYrm")>; +def : InstRW<[WriteCVTPH2PSYLd], (instrs VCVTPH2PSYrm)>; //////////////////////////////////////////////////////////////////////////////// // AVX instructions. @@ -509,119 +513,154 @@ def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> { let ResourceCycles = [6, 6]; let NumMicroOps = 10; } -def : InstRW<[WriteVDPPSY], (instregex "VDPPSYrr")>; +def : InstRW<[WriteVDPPSY], (instrs VDPPSYrri)>; def WriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPU0]> { let Latency = 17; let ResourceCycles = [1, 6, 6]; let NumMicroOps = 11; } -def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instregex "VDPPSYrm")>; +def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>; def WriteFAddY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; } -def : InstRW<[WriteFAddY], (instregex "VADD(SUB)?P(S|D)Yrr", "VSUBP(S|D)Yrr")>; +def : InstRW<[WriteFAddY], (instrs VADDPDYrr, VADDPSYrr, + VSUBPDYrr, VSUBPSYrr, + VADDSUBPDYrr, VADDSUBPSYrr)>; def WriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { let Latency = 8; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteFAddYLd, ReadAfterLd], (instregex "VADD(SUB)?P(S|D)Yrm", "VSUBP(S|D)Yrm")>; +def : InstRW<[WriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm, + VSUBPDYrm, VSUBPSYrm, + VADDSUBPDYrm, VADDSUBPSYrm)>; def WriteFDivY: SchedWriteRes<[JFPU1]> { let Latency = 38; let ResourceCycles = [38]; } -def : InstRW<[WriteFDivY], (instregex "VDIVP(D|S)Yrr")>; +def : InstRW<[WriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>; def WriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 43; let ResourceCycles = [1, 38]; } -def : InstRW<[WriteFDivYLd, ReadAfterLd], (instregex "VDIVP(S|D)Yrm")>; +def : InstRW<[WriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>; def WriteVMULYPD: SchedWriteRes<[JFPU1]> { let Latency = 4; let ResourceCycles = [4]; } -def : InstRW<[WriteVMULYPD], (instregex "VMULPDYrr")>; +def : InstRW<[WriteVMULYPD], (instrs VMULPDYrr)>; def WriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 9; let ResourceCycles = [1, 4]; } -def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instregex "VMULPDYrm")>; +def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>; def WriteVMULYPS: SchedWriteRes<[JFPU1]> { let Latency = 2; let ResourceCycles = [2]; } -def : InstRW<[WriteVMULYPS], (instregex "VMULPSYrr", "VRCPPSYr", "VRSQRTPSYr")>; +def : InstRW<[WriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>; def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 7; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; +def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>; + +def WriteVMULPD: SchedWriteRes<[JFPU1]> { + let Latency = 4; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVMULPD], (instrs MULPDrr, MULSDrr, VMULPDrr, VMULSDrr)>; + +def WriteVMULPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 9; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>; def WriteVCVTY: SchedWriteRes<[JSTC]> { let Latency = 3; let ResourceCycles = [2]; } -def : InstRW<[WriteVCVTY], (instregex "VCVTDQ2P(S|D)Yrr")>; -def : InstRW<[WriteVCVTY], (instregex "VROUNDYP(S|D)r")>; -def : InstRW<[WriteVCVTY], (instregex "VCVTPS2DQYrr")>; -def : InstRW<[WriteVCVTY], (instregex "VCVTTPS2DQYrr")>; +def : InstRW<[WriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr, + VCVTPS2DQYrr, VCVTTPS2DQYrr, + VROUNDYPDr, VROUNDYPSr)>; def WriteVCVTYLd: SchedWriteRes<[JLAGU, JSTC]> { let Latency = 8; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTDQ2P(S|D)Yrm")>; -def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VROUNDYP(S|D)m")>; -def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTPS2DQYrm")>; -def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instregex "VCVTTPS2DQYrm")>; +def : InstRW<[WriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm, + VCVTPS2DQYrm, VCVTTPS2DQYrm, + VROUNDYPDm, VROUNDYPSm)>; + +def WriteVMOVNTDQSt: SchedWriteRes<[JSTC, JSAGU]> { + let Latency = 2; +} +def : InstRW<[WriteVMOVNTDQSt], (instrs MOVNTDQmr, VMOVNTDQmr)>; -def WriteVMONTPSt: SchedWriteRes<[JSTC, JLAGU]> { +def WriteMOVNTSt: SchedWriteRes<[JSTC, JSAGU]> { + let Latency = 3; +} +def : InstRW<[WriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>; + +def WriteVMOVNTPYSt: SchedWriteRes<[JSTC, JSAGU]> { let Latency = 3; let ResourceCycles = [2,1]; } -def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTP(S|D)Ymr")>; -def : InstRW<[WriteVMONTPSt], (instregex "VMOVNTDQYmr")>; +def : InstRW<[WriteVMOVNTPYSt], (instrs VMOVNTDQYmr, VMOVNTPDYmr, VMOVNTPSYmr)>; + +def WriteFCmp: SchedWriteRes<[JFPU0]> { + let Latency = 2; +} + +def : InstRW<[WriteFCmp], (instregex "(V)?M(AX|IN)(P|S)(D|S)rr", + "(V)?CMPP(S|D)rri", "(V)?CMPS(S|D)rr")>; + +def WriteFCmpLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 7; +} + +def : InstRW<[WriteFCmpLd], (instregex "(V)?M(AX|IN)(P|S)(D|S)rm", + "(V)?CMPP(S|D)rmi", "(V)?CMPS(S|D)rm")>; def WriteVCVTPDY: SchedWriteRes<[JSTC, JFPU01]> { let Latency = 6; let ResourceCycles = [2, 4]; } -def : InstRW<[WriteVCVTPDY], (instregex "VCVTPD2(DQ|PS)Yrr")>; -def : InstRW<[WriteVCVTPDY], (instregex "VCVTTPD2DQYrr")>; +def : InstRW<[WriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>; def WriteVCVTPDYLd: SchedWriteRes<[JLAGU, JSTC, JFPU01]> { let Latency = 11; let ResourceCycles = [1, 2, 4]; } -def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTPD2(DQ|PS)Yrm")>; -def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instregex "VCVTTPD2DQYrm")>; +def : InstRW<[WriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; def WriteVBlendVPY: SchedWriteRes<[JFPU01]> { let Latency = 3; let ResourceCycles = [6]; } -def : InstRW<[WriteVBlendVPY], (instregex "VBLENDVP(S|D)Yrr", "VPERMILP(D|S)Yrr")>; +def : InstRW<[WriteVBlendVPY], (instrs VBLENDVPDYrr, VBLENDVPSYrr, VPERMILPDYrr, VPERMILPSYrr)>; def WriteVBlendVPYLd: SchedWriteRes<[JLAGU, JFPU01]> { let Latency = 8; let ResourceCycles = [1, 6]; } -def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instregex "VBLENDVP(S|D)Yrm")>; +def : InstRW<[WriteVBlendVPYLd, ReadAfterLd], (instrs VBLENDVPDYrm, VBLENDVPSYrm)>; def WriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01]> { let Latency = 6; let ResourceCycles = [1, 4]; } -def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instregex "VBROADCASTS(S|D)Yrm")>; +def : InstRW<[WriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; def WriteFPAY22: SchedWriteRes<[JFPU0]> { let Latency = 2; @@ -639,37 +678,37 @@ def WriteVHAddSubY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; } -def : InstRW<[WriteVHAddSubY], (instregex "VH(ADD|SUB)P(D|S)Yrr")>; +def : InstRW<[WriteVHAddSubY], (instrs VHADDPDYrr, VHADDPSYrr, VHSUBPDYrr, VHSUBPSYrr)>; def WriteVHAddSubYLd: SchedWriteRes<[JLAGU, JFPU0]> { let Latency = 8; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteVHAddSubYLd], (instregex "VH(ADD|SUB)P(D|S)Yrm")>; +def : InstRW<[WriteVHAddSubYLd], (instrs VHADDPDYrm, VHADDPSYrm, VHSUBPDYrm, VHSUBPSYrm)>; def WriteVMaskMovLd: SchedWriteRes<[JLAGU,JFPU01]> { let Latency = 6; let ResourceCycles = [1, 2]; } -def : InstRW<[WriteVMaskMovLd], (instregex "VMASKMOVP(D|S)rm")>; +def : InstRW<[WriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>; def WriteVMaskMovYLd: SchedWriteRes<[JLAGU,JFPU01]> { let Latency = 6; let ResourceCycles = [1, 4]; } -def : InstRW<[WriteVMaskMovYLd], (instregex "VMASKMOVP(D|S)Yrm")>; +def : InstRW<[WriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>; def WriteVMaskMovSt: SchedWriteRes<[JFPU01,JSAGU]> { let Latency = 6; let ResourceCycles = [4, 1]; } -def : InstRW<[WriteVMaskMovSt], (instregex "VMASKMOVP(D|S)mr")>; +def : InstRW<[WriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>; def WriteVMaskMovYSt: SchedWriteRes<[JFPU01,JSAGU]> { let Latency = 6; let ResourceCycles = [4, 1]; } -def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>; +def : InstRW<[WriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>; // TODO: In fact we have latency '2+i'. The +i represents an additional 1 cycle transfer // operation which moves the floating point result to the integer unit. During this @@ -678,7 +717,7 @@ def : InstRW<[WriteVMaskMovYSt], (instregex "VMASKMOVP(D|S)Ymr")>; def WriteVMOVMSK: SchedWriteRes<[JFPU0]> { let Latency = 3; } -def : InstRW<[WriteVMOVMSK], (instregex "VMOVMSKP(D|S)(Y)?rr")>; +def : InstRW<[WriteVMOVMSK], (instrs VMOVMSKPDrr, VMOVMSKPDYrr, VMOVMSKPSrr, VMOVMSKPSYrr)>; // TODO: In fact we have latency '3+i'. The +i represents an additional 1 cycle transfer // operation which moves the floating point result to the integer unit. During this @@ -689,63 +728,59 @@ def WriteVTESTY: SchedWriteRes<[JFPU01, JFPU0]> { let ResourceCycles = [2, 2]; let NumMicroOps = 3; } -def : InstRW<[WriteVTESTY], (instregex "VTESTP(S|D)Yrr")>; -def : InstRW<[WriteVTESTY], (instregex "VPTESTYrr")>; +def : InstRW<[WriteVTESTY], (instrs VPTESTYrr, VTESTPDYrr, VTESTPSYrr)>; def WriteVTESTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPU0]> { let Latency = 9; let ResourceCycles = [1, 2, 2]; let NumMicroOps = 3; } -def : InstRW<[WriteVTESTYLd], (instregex "VTESTP(S|D)Yrm")>; -def : InstRW<[WriteVTESTYLd], (instregex "VPTESTYrm")>; +def : InstRW<[WriteVTESTYLd], (instrs VPTESTYrm, VTESTPDYrm, VTESTPSYrm)>; def WriteVTEST: SchedWriteRes<[JFPU0]> { let Latency = 3; } -def : InstRW<[WriteVTEST], (instregex "VTESTP(S|D)rr")>; -def : InstRW<[WriteVTEST], (instregex "VPTESTrr")>; +def : InstRW<[WriteVTEST], (instrs PTESTrr, VPTESTrr, VTESTPDrr, VTESTPSrr)>; def WriteVTESTLd: SchedWriteRes<[JLAGU, JFPU0]> { let Latency = 8; } -def : InstRW<[WriteVTESTLd], (instregex "VTESTP(S|D)rm")>; -def : InstRW<[WriteVTESTLd], (instregex "VPTESTrm")>; +def : InstRW<[WriteVTESTLd], (instrs PTESTrm, VPTESTrm, VTESTPDrm, VTESTPSrm)>; def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { let Latency = 54; let ResourceCycles = [54]; } -def : InstRW<[WriteVSQRTYPD], (instregex "VSQRTPDYr")>; +def : InstRW<[WriteVSQRTYPD], (instrs VSQRTPDYr)>; def WriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 59; let ResourceCycles = [1, 54]; } -def : InstRW<[WriteVSQRTYPDLd], (instregex "VSQRTPDYm")>; +def : InstRW<[WriteVSQRTYPDLd], (instrs VSQRTPDYm)>; def WriteVSQRTYPS: SchedWriteRes<[JFPU1]> { let Latency = 42; let ResourceCycles = [42]; } -def : InstRW<[WriteVSQRTYPS], (instregex "VSQRTPSYr")>; +def : InstRW<[WriteVSQRTYPS], (instrs VSQRTPSYr)>; def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { let Latency = 47; let ResourceCycles = [1, 42]; } -def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>; +def : InstRW<[WriteVSQRTYPSLd], (instrs VSQRTPSYm)>; def WriteJVZEROALL: SchedWriteRes<[]> { let Latency = 90; let NumMicroOps = 73; } -def : InstRW<[WriteJVZEROALL], (instregex "VZEROALL")>; +def : InstRW<[WriteJVZEROALL], (instrs VZEROALL)>; def WriteJVZEROUPPER: SchedWriteRes<[]> { let Latency = 46; let NumMicroOps = 37; } -def : InstRW<[WriteJVZEROUPPER], (instregex "VZEROUPPER")>; +def : InstRW<[WriteJVZEROUPPER], (instrs VZEROUPPER)>; } // SchedModel diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 6a2a998b5ff3..35ec7488db72 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -32,7 +32,6 @@ def SLMModel : SchedMachineModel { let SchedModel = SLMModel in { // Silvermont has 5 reservation stations for micro-ops - def IEC_RSV0 : ProcResource<1>; def IEC_RSV1 : ProcResource<1>; def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; } @@ -78,6 +77,9 @@ def : WriteRes { let Latency = 3; } def : WriteRes; def : WriteRes; +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; diff --git a/lib/Target/X86/X86ScheduleZnver1.td b/lib/Target/X86/X86ScheduleZnver1.td index 5ebe8a28422e..a4e5327213c2 100644 --- a/lib/Target/X86/X86ScheduleZnver1.td +++ b/lib/Target/X86/X86ScheduleZnver1.td @@ -140,6 +140,9 @@ defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + // IDIV def : WriteRes { let Latency = 41; @@ -742,7 +745,7 @@ def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>; def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> { let Latency = 12; } -def : InstRW<[ZnWriteFIST], (instregex "IST_(F|FP)(16|32)m")>; +def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>; def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> { let Latency = 8; @@ -761,7 +764,7 @@ def : InstRW<[ZnWriteFPU3], (instregex "LD_F1")>; // FLDPI FLDL2E etc. def : InstRW<[ZnWriteFPU3], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; -def : InstRW<[WriteMicrocoded], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>; +def : InstRW<[WriteMicrocoded], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F")>; // FNSTSW. // AX. @@ -1629,8 +1632,8 @@ def ZnWriteFMADDr : SchedWriteRes<[ZnFPU03]> { } def : InstRW<[ZnWriteFMADDr], (instregex - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", - "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(213|132|231)(Y)?r", + "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)r", "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; @@ -1641,8 +1644,8 @@ def ZnWriteFMADDm : SchedWriteRes<[ZnAGU, ZnFPU03]> { } def : InstRW<[ZnWriteFMADDm], (instregex - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", - "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)(213|132|231)P(S|D)(Y)?m", + "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)m", "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index d00655635965..e131f1a1e4bd 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -89,8 +89,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *ValC = dyn_cast(Val); - if (const char *bzeroEntry = ValC && - ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { + if (const char *bzeroName = (ValC && ValC->isNullValue()) + ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) + : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -106,7 +107,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( CLI.setDebugLoc(dl) .setChain(Chain) .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), + DAG.getExternalSymbol(bzeroName, IntPtr), std::move(Args)) .setDiscardResult(); @@ -247,7 +248,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; if (Repeats.BytesLeft() > 0 && - DAG.getMachineFunction().getFunction()->optForMinSize()) { + DAG.getMachineFunction().getFunction().optForMinSize()) { // When agressively optimizing for size, avoid generating the code to // handle BytesLeft. Repeats.AVT = MVT::i8; diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp index 2cebb76022ef..c7ddf93f8e85 100644 --- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -12,10 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "X86ShuffleDecodeConstantPool.h" #include "Utils/X86ShuffleDecode.h" #include "llvm/ADT/APInt.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/Constants.h" //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 963a9c30de0d..217b22c2bf70 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -22,8 +22,6 @@ #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -35,8 +33,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include -#include #if defined(_MSC_VER) #include @@ -178,28 +174,6 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, return X86II::MO_NO_FLAG; } -/// This function returns the name of a function which has an interface like -/// the non-standard bzero function, if such a function exists on the -/// current subtarget and it is considered preferable over memset with zero -/// passed as the second argument. Otherwise it returns null. -const char *X86Subtarget::getBZeroEntry() const { - // Darwin 10 has a __bzero entry point for this purpose. - if (getTargetTriple().isMacOSX() && - !getTargetTriple().isMacOSXVersionLT(10, 6)) - return "__bzero"; - - return nullptr; -} - -bool X86Subtarget::hasSinCos() const { - if (getTargetTriple().isMacOSX()) { - return !getTargetTriple().isMacOSXVersionLT(10, 9) && is64Bit(); - } else if (getTargetTriple().isOSFuchsia()) { - return true; - } - return false; -} - /// Return true if the subtarget allows calls to immediate address. bool X86Subtarget::isLegalToCallImmediateAddr() const { // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32 @@ -280,12 +254,19 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { GatherOverhead = 2; if (hasAVX512()) ScatterOverhead = 2; + + // Consume the vector width attribute or apply any target specific limit. + if (PreferVectorWidthOverride) + PreferVectorWidth = PreferVectorWidthOverride; + else if (Prefer256Bit) + PreferVectorWidth = 256; } void X86Subtarget::initializeEnvironment() { X86SSELevel = NoSSE; X863DNowLevel = NoThreeDNow; HasX87 = false; + HasNOPL = false; HasCMov = false; HasX86_64 = false; HasPOPCNT = false; @@ -328,6 +309,7 @@ void X86Subtarget::initializeEnvironment() { HasVNNI = false; HasBITALG = false; HasSHA = false; + HasPREFETCHWT1 = false; HasPRFCHW = false; HasRDSEED = false; HasLAHFSAHF = false; @@ -339,6 +321,9 @@ void X86Subtarget::initializeEnvironment() { HasSGX = false; HasCLFLUSHOPT = false; HasCLWB = false; + HasRDPID = false; + UseRetpoline = false; + UseRetpolineExternalThunk = false; IsPMULLDSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; @@ -346,6 +331,9 @@ void X86Subtarget::initializeEnvironment() { HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; + HasPOPCNTFalseDeps = false; + HasLZCNTFalseDeps = false; + HasFastVariableShuffle = false; HasFastPartialYMMorZMMWrite = false; HasFastGather = false; HasFastScalarFSQRT = false; @@ -369,6 +357,8 @@ void X86Subtarget::initializeEnvironment() { X86ProcFamily = Others; GatherOverhead = 1024; ScatterOverhead = 1024; + PreferVectorWidth = UINT32_MAX; + Prefer256Bit = false; } X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, @@ -380,10 +370,12 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, - unsigned StackAlignOverride) + unsigned StackAlignOverride, + unsigned PreferVectorWidthOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TM(TM), TargetTriple(TT), StackAlignOverride(StackAlignOverride), + PreferVectorWidthOverride(PreferVectorWidthOverride), In64BitMode(TargetTriple.getArch() == Triple::x86_64), In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index be4d46c470de..e34735bffa55 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -92,6 +92,10 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// True if the processor supports X87 instructions. bool HasX87; + /// True if this processor has NOPL instruction + /// (generally pentium pro+). + bool HasNOPL; + /// True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; @@ -201,7 +205,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool HasCLZERO; /// Processor has Prefetch with intent to Write instruction - bool HasPFPREFETCHWT1; + bool HasPREFETCHWT1; /// True if SHLD instructions are slow. bool IsSHLDSlow; @@ -228,6 +232,16 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// True if POPCNT instruction has a false dependency on the destination register. + bool HasPOPCNTFalseDeps; + + /// True if LZCNT/TZCNT instructions have a false dependency on the destination register. + bool HasLZCNTFalseDeps; + + /// True if its preferable to combine to a single shuffle using a variable + /// mask over multiple fixed shuffles. + bool HasFastVariableShuffle; + /// True if there is no performance penalty to writing only the lower parts /// of a YMM or ZMM register without clearing the upper part. bool HasFastPartialYMMorZMMWrite; @@ -337,6 +351,17 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor supports Cache Line Write Back instruction bool HasCLWB; + /// Processor support RDPID instruction + bool HasRDPID; + + /// Use a retpoline thunk rather than indirect calls to block speculative + /// execution. + bool UseRetpoline; + + /// When using a retpoline thunk, call an externally provided thunk rather + /// than emitting one inside the compiler. + bool UseRetpolineExternalThunk; + /// Use software floating point for code generation. bool UseSoftFloat; @@ -348,6 +373,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// unsigned MaxInlineSizeThreshold; + /// Indicates target prefers 256 bit instructions. + bool Prefer256Bit; + /// What processor and OS we're targeting. Triple TargetTriple; @@ -364,6 +392,13 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Override the stack alignment. unsigned StackAlignOverride; + /// Preferred vector width from function attribute. + unsigned PreferVectorWidthOverride; + + /// Resolved preferred vector width from function attribute and subtarget + /// features. + unsigned PreferVectorWidth; + /// True if compiling for 64-bit, false for 16-bit or 32-bit. bool In64BitMode; @@ -389,7 +424,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// of the specified triple. /// X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const X86TargetMachine &TM, unsigned StackAlignOverride); + const X86TargetMachine &TM, unsigned StackAlignOverride, + unsigned PreferVectorWidthOverride); const X86TargetLowering *getTargetLowering() const override { return &TLInfo; @@ -465,6 +501,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } bool hasX87() const { return HasX87; } + bool hasNOPL() const { return HasNOPL; } bool hasCMov() const { return HasCMov; } bool hasSSE1() const { return X86SSELevel >= SSE1; } bool hasSSE2() const { return X86SSELevel >= SSE2; } @@ -513,7 +550,14 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasRTM() const { return HasRTM; } bool hasADX() const { return HasADX; } bool hasSHA() const { return HasSHA; } - bool hasPRFCHW() const { return HasPRFCHW; } + bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; } + bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } + bool hasSSEPrefetch() const { + // We implicitly enable these when we have a write prefix supporting cache + // level OR if we have prfchw, but don't already have a read prefetch from + // 3dnow. + return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); + } bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } @@ -527,6 +571,11 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasPOPCNTFalseDeps() const { return HasPOPCNTFalseDeps; } + bool hasLZCNTFalseDeps() const { return HasLZCNTFalseDeps; } + bool hasFastVariableShuffle() const { + return HasFastVariableShuffle; + } bool hasFastPartialYMMorZMMWrite() const { return HasFastPartialYMMorZMMWrite; } @@ -560,6 +609,22 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasIBT() const { return HasIBT; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } + bool hasRDPID() const { return HasRDPID; } + bool useRetpoline() const { return UseRetpoline; } + bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } + + unsigned getPreferVectorWidth() const { return PreferVectorWidth; } + + // Helper functions to determine when we should allow widening to 512-bit + // during codegen. + // TODO: Currently we're always allowing widening on CPUs without VLX, + // because for many cases we don't have a better option. + bool canExtendTo512DQ() const { + return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512); + } + bool canExtendTo512BW() const { + return hasBWI() && canExtendTo512DQ(); + } bool isXRaySupported() const override { return is64Bit(); } @@ -682,16 +747,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Return true if the subtarget allows calls to immediate address. bool isLegalToCallImmediateAddr() const; - /// This function returns the name of a function which has an interface - /// like the non-standard bzero function, if such a function exists on - /// the current subtarget and it is considered prefereable over - /// memset with zero passed as the second argument. Otherwise it - /// returns null. - const char *getBZeroEntry() const; - - /// This function returns true if the target has sincos() routine in its - /// compiler runtime or math libraries. - bool hasSinCos() const; + /// If we are using retpolines, we need to expand indirectbr to avoid it + /// lowering to an actual indirect jump. + bool enableIndirectBrExpand() const override { return useRetpoline(); } /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ea8c9862230e..5f67949f8ef2 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -26,7 +26,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/ExecutionDepsFix.h" +#include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" @@ -60,7 +60,7 @@ void initializeWinEHStatePassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); -void initializeX86ExecutionDepsFixPass(PassRegistry &); +void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); } // end namespace llvm @@ -78,7 +78,7 @@ extern "C" void LLVMInitializeX86Target() { initializeFixupLEAPassPass(PR); initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); - initializeX86ExecutionDepsFixPass(PR); + initializeX86ExecutionDomainFixPass(PR); initializeX86DomainReassignmentPass(PR); } @@ -255,7 +255,24 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { if (SoftFloat) Key += FS.empty() ? "+soft-float" : ",+soft-float"; - FS = Key.substr(CPU.size()); + // Keep track of the key width after all features are added so we can extract + // the feature string out later. + unsigned CPUFSWidth = Key.size(); + + // Translate vector width function attribute into subtarget features. This + // overrides any CPU specific turning parameter + unsigned PreferVectorWidthOverride = 0; + if (F.hasFnAttribute("prefer-vector-width")) { + StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString(); + unsigned Width; + if (!Val.getAsInteger(0, Width)) { + Key += ",prefer-vector-width="; + Key += Val; + PreferVectorWidthOverride = Width; + } + } + + FS = Key.slice(CPU.size(), CPUFSWidth); auto &I = SubtargetMap[Key]; if (!I) { @@ -264,7 +281,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, - Options.StackAlignmentOverride); + Options.StackAlignmentOverride, + PreferVectorWidthOverride); } return I.get(); } @@ -281,10 +299,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, // X86 TTI query. //===----------------------------------------------------------------------===// -TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(X86TTIImpl(this, F)); - }); +TargetTransformInfo +X86TargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(X86TTIImpl(this, F)); } //===----------------------------------------------------------------------===// @@ -322,23 +339,27 @@ class X86PassConfig : public TargetPassConfig { void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreEmitPass() override; + void addPreEmitPass2() override; void addPreSched2() override; }; -class X86ExecutionDepsFix : public ExecutionDepsFix { +class X86ExecutionDomainFix : public ExecutionDomainFix { public: static char ID; - X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {} + X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {} StringRef getPassName() const override { return "X86 Execution Dependency Fix"; } }; -char X86ExecutionDepsFix::ID; +char X86ExecutionDomainFix::ID; } // end anonymous namespace -INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", - "X86 Execution Dependency Fix", false, false) +INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix", + "X86 Execution Domain Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix", + "X86 Execution Domain Fix", false, false) TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(*this, PM); @@ -351,6 +372,11 @@ void X86PassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createInterleavedAccessPass()); + + // Add passes that handle indirect branch removal and insertion of a retpoline + // thunk. These will be a no-op unless a function subtarget has the retpoline + // feature enabled. + addPass(createIndirectBrExpandPass()); } bool X86PassConfig::addInstSelector() { @@ -424,8 +450,12 @@ void X86PassConfig::addPostRegAlloc() { void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } void X86PassConfig::addPreEmitPass() { - if (getOptLevel() != CodeGenOpt::None) - addPass(new X86ExecutionDepsFix()); + if (getOptLevel() != CodeGenOpt::None) { + addPass(new X86ExecutionDomainFix()); + addPass(createBreakFalseDeps()); + } + + addPass(createX86IndirectBranchTrackingPass()); if (UseVZeroUpper) addPass(createX86IssueVZeroUpperPass()); @@ -437,3 +467,7 @@ void X86PassConfig::addPreEmitPass() { addPass(createX86EvexToVexInsts()); } } + +void X86PassConfig::addPreEmitPass2() { + addPass(createX86RetpolineThunksPass()); +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 952bd1321ff9..5b21cd82b5b1 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -45,7 +45,7 @@ class X86TargetMachine final : public LLVMTargetMachine { // attributes of each function. const X86Subtarget *getSubtargetImpl() const = delete; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; // Set up the pass pipeline. TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 9b07491c75c3..e24c8dfcd54f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -130,12 +130,13 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { } unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const { + unsigned PreferVectorWidth = ST->getPreferVectorWidth(); if (Vector) { - if (ST->hasAVX512()) + if (ST->hasAVX512() && PreferVectorWidth >= 512) return 512; - if (ST->hasAVX()) + if (ST->hasAVX() && PreferVectorWidth >= 256) return 256; - if (ST->hasSSE1()) + if (ST->hasSSE1() && PreferVectorWidth >= 128) return 128; return 0; } @@ -754,7 +755,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // type remains the same. if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { MVT LegalVT = LT.second; - if (LegalVT.getVectorElementType().getSizeInBits() == + if (LegalVT.isVector() && + LegalVT.getVectorElementType().getSizeInBits() == Tp->getVectorElementType()->getPrimitiveSizeInBits() && LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) { @@ -2522,7 +2524,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) { // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only // enable gather with a -march. return (DataWidth == 32 || DataWidth == 64) && - (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())); + (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())); } bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) { @@ -2839,21 +2841,16 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace) { - auto isSupportedOnAVX512 = [](Type *VecTy, bool &RequiresBW) { - RequiresBW = false; + auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { Type *EltTy = VecTy->getVectorElementType(); if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || EltTy->isIntegerTy(32) || EltTy->isPointerTy()) return true; - if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8)) { - RequiresBW = true; - return true; - } + if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8)) + return HasBW; return false; }; - bool RequiresBW; - bool HasAVX512Solution = isSupportedOnAVX512(VecTy, RequiresBW); - if (ST->hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->hasBWI())) + if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); if (ST->hasAVX2()) diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 5999591d1814..224262830b12 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -235,7 +235,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { // If the call has no RegMask, skip it as well. It usually happens on // helper function calls (such as '_chkstk', '_ftol2') where standard // calling convention is not used (RegMask is not used to mark register - // clobbered and register usage (def/imp-def/use) is well-defined and + // clobbered and register usage (def/implicit-def/use) is well-defined and // explicitly specified. if (IsCall && !callHasRegMask(MI)) continue; @@ -285,7 +285,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); EverMadeChange = false; - IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR; + IsX86INTR = MF.getFunction().getCallingConv() == CallingConv::X86_INTR; bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI); diff --git a/lib/Target/X86/X86WinAllocaExpander.cpp b/lib/Target/X86/X86WinAllocaExpander.cpp index 8a186e94d9cf..1046696587d9 100644 --- a/lib/Target/X86/X86WinAllocaExpander.cpp +++ b/lib/Target/X86/X86WinAllocaExpander.cpp @@ -279,9 +279,9 @@ bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) { SlotSize = TRI->getSlotSize(); StackProbeSize = 4096; - if (MF.getFunction()->hasFnAttribute("stack-probe-size")) { + if (MF.getFunction().hasFnAttribute("stack-probe-size")) { MF.getFunction() - ->getFnAttribute("stack-probe-size") + .getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); } diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 0472a85f50da..6d6dedc60736 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { } bool WinEHStatePass::runOnFunction(Function &F) { + // Don't insert state stores or exception handler thunks for + // available_externally functions. The handler needs to reference the LSDA, + // which will not be emitted in this case. + if (F.hasAvailableExternallyLinkage()) + return false; + // Check the personality. Do nothing if this personality doesn't use funclets. if (!F.hasPersonalityFn()) return false; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 3d8712dd03ec..62b2c8eee152 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -238,7 +238,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF, report_fatal_error("emitPrologue unsupported alignment: " + Twine(MFI.getMaxAlignment())); - const AttributeList &PAL = MF.getFunction()->getAttributes(); + const AttributeList &PAL = MF.getFunction().getAttributes(); if (PAL.hasAttrSomewhere(Attribute::Nest)) BuildMI(MBB, MBBI, dl, TII.get(XCore::LDWSP_ru6), XCore::R11).addImm(0); // FIX: Needs addMemOperand() but can't use getFixedStack() or getStack(). @@ -324,7 +324,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF, if (XFI->hasEHSpillSlot()) { // The unwinder requires stack slot & CFI offsets for the exception info. // We do not save/spill these registers. - const Function *Fn = MF.getFunction(); + const Function *Fn = &MF.getFunction(); const Constant *PersonalityFn = Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; SmallVector SpillList; @@ -359,7 +359,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, if (RetOpcode == XCore::EH_RETURN) { // 'Restore' the exception info the unwinder has placed into the stack // slots. - const Function *Fn = MF.getFunction(); + const Function *Fn = &MF.getFunction(); const Constant *PersonalityFn = Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; SmallVector SpillList; @@ -542,7 +542,7 @@ void XCoreFrameLowering::determineCalleeSaves(MachineFunction &MF, const MachineRegisterInfo &MRI = MF.getRegInfo(); bool LRUsed = MRI.isPhysRegModified(XCore::LR); - if (!LRUsed && !MF.getFunction()->isVarArg() && + if (!LRUsed && !MF.getFunction().isVarArg() && MF.getFrameInfo().estimateStackSize(MF)) // If we need to extend the stack it is more efficient to use entsp / retsp. // We force the LR to be saved so these instructions are used. diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 7a9c6fc93f8a..c885332b07ad 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -443,7 +443,7 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate( } MachineConstantPool *ConstantPool = MBB.getParent()->getConstantPool(); const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Value); + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Value); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); return BuildMI(MBB, MI, dl, get(XCore::LDWCP_lru6), Reg) .addConstantPoolIndex(Idx) diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index 35089fabd5ae..b7b0daab9806 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -39,7 +39,7 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) { const TargetRegisterClass &RC = XCore::GRRegsRegClass; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - if (! MF.getFunction()->isVarArg()) { + if (! MF.getFunction().isVarArg()) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true); } else { diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index a6cf68370093..70376d40a37f 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -204,8 +204,7 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II, } bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { - return MF.getMMI().hasDebugInfo() || - MF.getFunction()->needsUnwindTableEntry(); + return MF.getMMI().hasDebugInfo() || MF.getFunction().needsUnwindTableEntry(); } const MCPhysReg * diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 3aa7187e0cd1..38925bfd51b0 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -108,8 +108,7 @@ extern "C" void LLVMInitializeXCoreTarget() { RegisterTargetMachine X(getTheXCoreTarget()); } -TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(XCoreTTIImpl(this, F)); - }); +TargetTransformInfo +XCoreTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(XCoreTTIImpl(this, F)); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 5baa3524d2a6..965b9b2c4d65 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -43,7 +43,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index 3891efae57bb..684617e79454 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h" -#include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFF.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" @@ -21,7 +20,6 @@ #include "llvm/Option/Option.h" #include "llvm/Support/Path.h" -#include #include using namespace llvm; @@ -175,7 +173,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { } } - if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true, true)) return 1; return 0; } diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index 8712ca4823c6..4a69fbfe4354 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -265,6 +265,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector Returns; CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); + NewF->setDSOLocal(true); // Remove old returns. for (ReturnInst *Return : Returns) @@ -440,16 +441,14 @@ static void scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock, DenseMap &ResolvedValues) { auto *PrevBB = Prev->getParent(); - auto *I = &*NewBlock->begin(); - while (auto PN = dyn_cast(I)) { - auto V = PN->getIncomingValueForBlock(PrevBB); + for (PHINode &PN : NewBlock->phis()) { + auto V = PN.getIncomingValueForBlock(PrevBB); // See if we already resolved it. auto VI = ResolvedValues.find(V); if (VI != ResolvedValues.end()) V = VI->second; // Remember the value. - ResolvedValues[PN] = V; - I = I->getNextNode(); + ResolvedValues[&PN] = V; } } diff --git a/lib/Transforms/IPO/AlwaysInliner.cpp b/lib/Transforms/IPO/AlwaysInliner.cpp index b7d96007c24a..5be728b3855a 100644 --- a/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/lib/Transforms/IPO/AlwaysInliner.cpp @@ -15,15 +15,12 @@ #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/IPO.h" @@ -53,7 +50,8 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) { for (CallSite CS : Calls) // FIXME: We really shouldn't be able to fail to inline at this point! // We should do something to log or check the inline failures here. - Changed |= InlineFunction(CS, IFI); + Changed |= + InlineFunction(CS, IFI, /*CalleeAAR=*/nullptr, InsertLifetime); // Remember to try and delete this function afterward. This both avoids // re-walking the rest of the module and avoids dealing with any iterator diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 3eff421d53e5..d3a7b0e76fef 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -719,7 +719,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, BasicBlock *BB = Load->getParent(); MemoryLocation Loc = MemoryLocation::get(Load); - if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) + if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. @@ -963,7 +963,7 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, return FAM.getResult(F); }; - Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None); + Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None); if (!NewF) continue; LocalChange = true; diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 397561746f86..28d38471069c 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_library(LLVMipo SampleProfile.cpp StripDeadPrototypes.cpp StripSymbols.cpp + SyntheticCountsPropagation.cpp ThinLTOBitcodeWriter.cpp WholeProgramDevirt.cpp diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index 7ad5c8c0216f..886029ea58d5 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/CrossDSOCFI.h" -#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" @@ -32,7 +31,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index d1147f7d844b..042cacb70ad0 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SetVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp index e48c3d732378..325a5d77aadb 100644 --- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -52,6 +52,7 @@ static Attribute::AttrKind parseAttrKind(StringRef Kind) { .Case("returns_twice", Attribute::ReturnsTwice) .Case("safestack", Attribute::SafeStack) .Case("sanitize_address", Attribute::SanitizeAddress) + .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress) .Case("sanitize_memory", Attribute::SanitizeMemory) .Case("sanitize_thread", Attribute::SanitizeThread) .Case("ssp", Attribute::StackProtect) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index f9850619f963..5352e32479bb 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -130,17 +130,18 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, SCCNodes.count(CS.getCalledFunction())) continue; FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS); + ModRefInfo MRI = createModRefInfo(MRB); // If the call doesn't access memory, we're done. - if (!(MRB & MRI_ModRef)) + if (isNoModRef(MRI)) continue; if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { // The call could access any memory. If that includes writes, give up. - if (MRB & MRI_Mod) + if (isModSet(MRI)) return MAK_MayWrite; // If it reads, note it. - if (MRB & MRI_Ref) + if (isRefSet(MRI)) ReadsMemory = true; continue; } @@ -162,10 +163,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; - if (MRB & MRI_Mod) + if (isModSet(MRI)) // Writes non-local memory. Give up. return MAK_MayWrite; - if (MRB & MRI_Ref) + if (isRefSet(MRI)) // Ok, it reads non-local memory. ReadsMemory = true; } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 3a1d6de342fe..b1eefb964546 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" @@ -44,7 +45,9 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include @@ -118,6 +121,12 @@ static cl::opt SummaryFile("summary-file", cl::desc("The summary file to use for function importing.")); +/// Used when testing importing from distributed indexes via opt +// -function-import. +static cl::opt + ImportAllIndex("import-all-index", + cl::desc("Import all external functions in index.")); + // Load lazily a module from \p FileName in \p Context. static std::unique_ptr loadFile(const std::string &FileName, LLVMContext &Context) { @@ -172,13 +181,8 @@ selectCallee(const ModuleSummaryIndex &Index, if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) // There is no point in importing these, we can't inline them return false; - if (isa(GVSummary)) - // Aliases can't point to "available_externally". - // FIXME: we should import alias as available_externally *function*, - // the destination module does not need to know it is an alias. - return false; - auto *Summary = cast(GVSummary); + auto *Summary = cast(GVSummary->getBaseObject()); // If this is a local function, make sure we import the copy // in the caller's module. The only time a local function can @@ -227,7 +231,7 @@ updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) { // it, rather than needing to perform this mapping on each walk. auto GUID = Index.getGUIDFromOriginalID(VI.getGUID()); if (GUID == 0) - return nullptr; + return ValueInfo(); return Index.getValueInfo(GUID); } @@ -275,9 +279,7 @@ static void computeImportForFunction( } // "Resolve" the summary - assert(!isa(CalleeSummary) && - "Unexpected alias in import list"); - const auto *ResolvedCalleeSummary = cast(CalleeSummary); + const auto *ResolvedCalleeSummary = cast(CalleeSummary->getBaseObject()); assert(ResolvedCalleeSummary->instCount() <= NewThreshold && "selectCallee() didn't honor the threshold"); @@ -432,6 +434,19 @@ void llvm::ComputeCrossModuleImport( #endif } +#ifndef NDEBUG +static void dumpImportListForModule(StringRef ModulePath, + FunctionImporter::ImportMapTy &ImportList) { + DEBUG(dbgs() << "* Module " << ModulePath << " imports from " + << ImportList.size() << " modules.\n"); + for (auto &Src : ImportList) { + auto SrcModName = Src.first(); + DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from " + << SrcModName << "\n"); + } +} +#endif + /// Compute all the imports for the given module in the Index. void llvm::ComputeCrossModuleImportForModule( StringRef ModulePath, const ModuleSummaryIndex &Index, @@ -446,13 +461,34 @@ void llvm::ComputeCrossModuleImportForModule( ComputeImportForModule(FunctionSummaryMap, Index, ImportList); #ifndef NDEBUG - DEBUG(dbgs() << "* Module " << ModulePath << " imports from " - << ImportList.size() << " modules.\n"); - for (auto &Src : ImportList) { - auto SrcModName = Src.first(); - DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from " - << SrcModName << "\n"); + dumpImportListForModule(ModulePath, ImportList); +#endif +} + +// Mark all external summaries in Index for import into the given module. +// Used for distributed builds using a distributed index. +void llvm::ComputeCrossModuleImportForModuleFromIndex( + StringRef ModulePath, const ModuleSummaryIndex &Index, + FunctionImporter::ImportMapTy &ImportList) { + for (auto &GlobalList : Index) { + // Ignore entries for undefined references. + if (GlobalList.second.SummaryList.empty()) + continue; + + auto GUID = GlobalList.first; + assert(GlobalList.second.SummaryList.size() == 1 && + "Expected individual combined index to have one summary per GUID"); + auto &Summary = GlobalList.second.SummaryList[0]; + // Skip the summaries for the importing module. These are included to + // e.g. record required linkage changes. + if (Summary->modulePath() == ModulePath) + continue; + // Doesn't matter what value we plug in to the map, just needs an entry + // to provoke importing by thinBackend. + ImportList[Summary->modulePath()][GUID] = 1; } +#ifndef NDEBUG + dumpImportListForModule(ModulePath, ImportList); #endif } @@ -481,7 +517,7 @@ void llvm::computeDeadSymbols( for (auto &S : Entry.second.SummaryList) if (S->isLive()) { DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); - Worklist.push_back(ValueInfo(&Entry)); + Worklist.push_back(ValueInfo(/*IsAnalysis=*/false, &Entry)); ++LiveSymbols; break; } @@ -642,23 +678,9 @@ void llvm::thinLTOResolveWeakForLinkerModule( /// Run internalization on \p TheModule based on symmary analysis. void llvm::thinLTOInternalizeModule(Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { - // Parse inline ASM and collect the list of symbols that are not defined in - // the current module. - StringSet<> AsmUndefinedRefs; - ModuleSymbolTable::CollectAsmSymbols( - TheModule, - [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { - if (Flags & object::BasicSymbolRef::SF_Undefined) - AsmUndefinedRefs.insert(Name); - }); - // Declare a callback for the internalize pass that will ask for every // candidate GlobalValue if it can be internalized or not. auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { - // Can't be internalized if referenced in inline asm. - if (AsmUndefinedRefs.count(GV.getName())) - return true; - // Lookup the linkage recorded in the summaries during global analysis. auto GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) { @@ -692,6 +714,20 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, internalizeModule(TheModule, MustPreserveGV); } +/// Make alias a clone of its aliasee. +static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) { + Function *Fn = cast(GA->getBaseObject()); + + ValueToValueMapTy VMap; + Function *NewFn = CloneFunction(Fn, VMap); + // Clone should use the original alias's linkage and name, and we ensure + // all uses of alias instead use the new clone (casted if necessary). + NewFn->setLinkage(GA->getLinkage()); + GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType())); + NewFn->takeName(GA); + return NewFn; +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -761,17 +797,36 @@ Expected FunctionImporter::importFunctions( GlobalsToImport.insert(&GV); } } -#ifndef NDEBUG for (GlobalAlias &GA : SrcModule->aliases()) { if (!GA.hasName()) continue; auto GUID = GA.getGUID(); - assert(!ImportGUIDs.count(GUID) && "Unexpected alias in import list"); - DEBUG(dbgs() << "Not importing alias " << GUID + auto Import = ImportGUIDs.count(GUID); + DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias " << GUID << " " << GA.getName() << " from " << SrcModule->getSourceFileName() << "\n"); + if (Import) { + if (Error Err = GA.materialize()) + return std::move(Err); + // Import alias as a copy of its aliasee. + GlobalObject *Base = GA.getBaseObject(); + if (Error Err = Base->materialize()) + return std::move(Err); + auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA); + DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID() + << " " << Base->getName() << " from " + << SrcModule->getSourceFileName() << "\n"); + if (EnableImportMetadata) { + // Add 'thinlto_src_module' metadata for statistics and debugging. + Fn->setMetadata( + "thinlto_src_module", + MDNode::get(DestModule.getContext(), + {MDString::get(DestModule.getContext(), + SrcModule->getSourceFileName())})); + } + GlobalsToImport.insert(Fn); + } } -#endif // Upgrade debug info after we're done materializing all the globals and we // have loaded all the required metadata! @@ -817,8 +872,15 @@ static bool doImportingForModule(Module &M) { // First step is collecting the import list. FunctionImporter::ImportMapTy ImportList; - ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index, - ImportList); + // If requested, simply import all functions in the index. This is used + // when testing distributed backend handling via the opt tool, when + // we have distributed indexes containing exactly the summaries to import. + if (ImportAllIndex) + ComputeCrossModuleImportForModuleFromIndex(M.getModuleIdentifier(), *Index, + ImportList); + else + ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index, + ImportList); // Conservatively mark all internal values as promoted. This interface is // only used when doing importing via the function importing pass. The pass diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 1f354e8e3aa7..ada9eb80e680 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -18,7 +18,6 @@ #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4bb2984e3b47..65dcd281009f 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2486,6 +2486,7 @@ OptimizeGlobalAliases(Module &M, // Give the aliasee the name, linkage and other attributes of the alias. Target->takeName(&*J); Target->setLinkage(J->getLinkage()); + Target->setDSOLocal(J->isDSOLocal()); Target->setVisibility(J->getVisibility()); Target->setDLLStorageClass(J->getDLLStorageClass()); diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 15d7515cc842..470f97b8ba61 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/InferFunctionAttrs.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index b7a7979bb562..b259a0abd63c 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -21,7 +20,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Transforms/IPO.h" diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 6cef866b7b84..5dd9fd35e305 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -956,6 +956,21 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, Name, &M); FDecl->setVisibility(Visibility); + + // Delete aliases pointing to this function, they'll be re-created in the + // merged output + SmallVector ToErase; + for (auto &U : F->uses()) { + if (auto *A = dyn_cast(U.getUser())) { + Function *AliasDecl = Function::Create( + F->getFunctionType(), GlobalValue::ExternalLinkage, "", &M); + AliasDecl->takeName(A); + A->replaceAllUsesWith(AliasDecl); + ToErase.push_back(A); + } + } + for (auto *A : ToErase) + A->eraseFromParent(); } else { // Function definition without type metadata, where some other translation // unit contained a declaration with type metadata. This normally happens @@ -1480,38 +1495,25 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet( for (auto &&MemSet : TypeMembers) GLB.addFragment(MemSet); - // Build the bitsets from this disjoint set. - if (Globals.empty() || isa(Globals[0]->getGlobal())) { - // Build a vector of global variables with the computed layout. - std::vector OrderedGVs(Globals.size()); - auto OGI = OrderedGVs.begin(); - for (auto &&F : GLB.Fragments) { - for (auto &&Offset : F) { - auto GV = dyn_cast(Globals[Offset]->getGlobal()); - if (!GV) - report_fatal_error("Type identifier may not contain both global " - "variables and functions"); - *OGI++ = Globals[Offset]; - } + // Build a vector of globals with the computed layout. + bool IsGlobalSet = + Globals.empty() || isa(Globals[0]->getGlobal()); + std::vector OrderedGTMs(Globals.size()); + auto OGTMI = OrderedGTMs.begin(); + for (auto &&F : GLB.Fragments) { + for (auto &&Offset : F) { + if (IsGlobalSet != isa(Globals[Offset]->getGlobal())) + report_fatal_error("Type identifier may not contain both global " + "variables and functions"); + *OGTMI++ = Globals[Offset]; } - - buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs); - } else { - // Build a vector of functions with the computed layout. - std::vector OrderedFns(Globals.size()); - auto OFI = OrderedFns.begin(); - for (auto &&F : GLB.Fragments) { - for (auto &&Offset : F) { - auto Fn = dyn_cast(Globals[Offset]->getGlobal()); - if (!Fn) - report_fatal_error("Type identifier may not contain both global " - "variables and functions"); - *OFI++ = Globals[Offset]; - } - } - - buildBitSetsFromFunctions(TypeIds, OrderedFns); } + + // Build the bitsets from this disjoint set. + if (IsGlobalSet) + buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs); + else + buildBitSetsFromFunctions(TypeIds, OrderedGTMs); } /// Lower all type tests in this module. @@ -1527,7 +1529,7 @@ LowerTypeTestsModule::LowerTypeTestsModule( } bool LowerTypeTestsModule::runForTesting(Module &M) { - ModuleSummaryIndex Summary; + ModuleSummaryIndex Summary(/*IsPerformingAnalysis=*/false); // Handle the command-line summary arguments. This code is for testing // purposes only, so we handle errors directly. @@ -1706,7 +1708,7 @@ bool LowerTypeTestsModule::lower() { GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); - auto &Info = TypeIdInfo[cast(Type)->getOperand(1)]; + auto &Info = TypeIdInfo[Type->getOperand(1)]; Info.Index = ++I; Info.RefGlobals.push_back(GTM); } @@ -1817,6 +1819,49 @@ bool LowerTypeTestsModule::lower() { allocateByteArrays(); + // Parse alias data to replace stand-in function declarations for aliases + // with an alias to the intended target. + if (ExportSummary) { + if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) { + for (auto AliasMD : AliasesMD->operands()) { + assert(AliasMD->getNumOperands() >= 4); + StringRef AliasName = + cast(AliasMD->getOperand(0))->getString(); + StringRef Aliasee = cast(AliasMD->getOperand(1))->getString(); + + if (!ExportedFunctions.count(Aliasee) || + ExportedFunctions[Aliasee].Linkage != CFL_Definition || + !M.getNamedAlias(Aliasee)) + continue; + + GlobalValue::VisibilityTypes Visibility = + static_cast( + cast(AliasMD->getOperand(2)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + bool Weak = + static_cast(cast(AliasMD->getOperand(3)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + + auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee)); + Alias->setVisibility(Visibility); + if (Weak) + Alias->setLinkage(GlobalValue::WeakAnyLinkage); + + if (auto *F = M.getFunction(AliasName)) { + Alias->takeName(F); + F->replaceAllUsesWith(Alias); + F->eraseFromParent(); + } else { + Alias->setName(AliasName); + } + } + } + } + return true; } diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index c00e13c4ae21..fc1f2874f853 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -67,17 +68,67 @@ using namespace llvm; STATISTIC(NumPartialInlined, "Number of callsites functions partially inlined into."); +STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with " + "cold outlined regions were partially " + "inlined into its caller(s)."); +STATISTIC(NumColdRegionsFound, + "Number of cold single entry/exit regions found."); +STATISTIC(NumColdRegionsOutlined, + "Number of cold single entry/exit regions outlined."); // Command line option to disable partial-inlining. The default is false: static cl::opt DisablePartialInlining("disable-partial-inlining", cl::init(false), - cl::Hidden, cl::desc("Disable partial ininling")); + cl::Hidden, cl::desc("Disable partial inlining")); +// Command line option to disable multi-region partial-inlining. The default is +// false: +static cl::opt DisableMultiRegionPartialInline( + "disable-mr-partial-inlining", cl::init(false), cl::Hidden, + cl::desc("Disable multi-region partial inlining")); + +// Command line option to force outlining in regions with live exit variables. +// The default is false: +static cl::opt + ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden, + cl::desc("Force outline regions with live exits")); + +// Command line option to enable marking outline functions with Cold Calling +// Convention. The default is false: +static cl::opt + MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden, + cl::desc("Mark outline function calls with ColdCC")); + +#ifndef NDEBUG +// Command line option to debug partial-inlining. The default is none: +static cl::opt TracePartialInlining("trace-partial-inlining", + cl::init(false), cl::Hidden, + cl::desc("Trace partial inlining.")); +#endif // This is an option used by testing: static cl::opt SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::desc("Skip Cost Analysis")); +// Used to determine if a cold region is worth outlining based on +// its inlining cost compared to the original function. Default is set at 10%. +// ie. if the cold region reduces the inlining cost of the original function by +// at least 10%. +static cl::opt MinRegionSizeRatio( + "min-region-size-ratio", cl::init(0.1), cl::Hidden, + cl::desc("Minimum ratio comparing relative sizes of each " + "outline candidate and original function")); +// Used to tune the minimum number of execution counts needed in the predecessor +// block to the cold edge. ie. confidence interval. +static cl::opt + MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden, + cl::desc("Minimum block executions to consider " + "its BranchProbabilityInfo valid")); +// Used to determine when an edge is considered cold. Default is set to 10%. ie. +// if the branch probability is 10% or less, then it is deemed as 'cold'. +static cl::opt ColdBranchRatio( + "cold-branch-ratio", cl::init(0.1), cl::Hidden, + cl::desc("Minimum BranchProbability to consider a region cold.")); static cl::opt MaxNumInlineBlocks( "max-num-inline-blocks", cl::init(5), cl::Hidden, @@ -125,23 +176,58 @@ struct FunctionOutliningInfo { SmallVector ReturnBlockPreds; }; +struct FunctionOutliningMultiRegionInfo { + FunctionOutliningMultiRegionInfo() + : ORI() {} + + // Container for outline regions + struct OutlineRegionInfo { + OutlineRegionInfo(SmallVector Region, + BasicBlock *EntryBlock, BasicBlock *ExitBlock, + BasicBlock *ReturnBlock) + : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock), + ReturnBlock(ReturnBlock) {} + SmallVector Region; + BasicBlock *EntryBlock; + BasicBlock *ExitBlock; + BasicBlock *ReturnBlock; + }; + + SmallVector ORI; +}; + struct PartialInlinerImpl { + PartialInlinerImpl( std::function *GetAC, std::function *GTTI, Optional> GBFI, - ProfileSummaryInfo *ProfSI) - : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} + ProfileSummaryInfo *ProfSI, + std::function *GORE) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI), + GetORE(GORE) {} bool run(Module &M); - Function *unswitchFunction(Function *F); - - // This class speculatively clones the the function to be partial inlined. + // Main part of the transformation that calls helper functions to find + // outlining candidates, clone & outline the function, and attempt to + // partially inline the resulting function. Returns true if + // inlining was successful, false otherwise. Also returns the outline + // function (only if we partially inlined early returns) as there is a + // possibility to further "peel" early return statements that were left in the + // outline function due to code size. + std::pair unswitchFunction(Function *F); + + // This class speculatively clones the function to be partial inlined. // At the end of partial inlining, the remaining callsites to the cloned // function that are not partially inlined will be fixed up to reference // the original function, and the cloned function will be erased. struct FunctionCloner { - FunctionCloner(Function *F, FunctionOutliningInfo *OI); + // Two constructors, one for single region outlining, the other for + // multi-region outlining. + FunctionCloner(Function *F, FunctionOutliningInfo *OI, + OptimizationRemarkEmitter &ORE); + FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI, + OptimizationRemarkEmitter &ORE); ~FunctionCloner(); // Prepare for function outlining: making sure there is only @@ -149,25 +235,34 @@ struct PartialInlinerImpl { // the return block. void NormalizeReturnBlock(); - // Do function outlining. + // Do function outlining for cold regions. + bool doMultiRegionFunctionOutlining(); + // Do function outlining for region after early return block(s). // NOTE: For vararg functions that do the vararg handling in the outlined // function, we temporarily generate IR that does not properly // forward varargs to the outlined function. Calling InlineFunction // will update calls to the outlined functions to properly forward // the varargs. - Function *doFunctionOutlining(); + Function *doSingleRegionFunctionOutlining(); Function *OrigFunc = nullptr; Function *ClonedFunc = nullptr; - Function *OutlinedFunc = nullptr; - BasicBlock *OutliningCallBB = nullptr; + + typedef std::pair FuncBodyCallerPair; + // Keep track of Outlined Functions and the basic block they're called from. + SmallVector OutlinedFunctions; + // ClonedFunc is inlined in one of its callers after function // outlining. bool IsFunctionInlined = false; // The cost of the region to be outlined. int OutlinedRegionCost = 0; + // ClonedOI is specific to outlining non-early return blocks. std::unique_ptr ClonedOI = nullptr; + // ClonedOMRI is specific to outlining cold regions. + std::unique_ptr ClonedOMRI = nullptr; std::unique_ptr ClonedFuncBFI = nullptr; + OptimizationRemarkEmitter &ORE; }; private: @@ -176,6 +271,7 @@ struct PartialInlinerImpl { std::function *GetTTI; Optional> GetBFI; ProfileSummaryInfo *PSI; + std::function *GetORE; // Return the frequency of the OutlininingBB relative to F's entry point. // The result is no larger than 1 and is represented using BP. @@ -186,8 +282,7 @@ struct PartialInlinerImpl { // Return true if the callee of CS should be partially inlined with // profit. bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, - BlockFrequency WeightedOutliningRcost, - OptimizationRemarkEmitter &ORE); + BlockFrequency WeightedOutliningRcost); // Try to inline DuplicateFunction (cloned from F with call to // the OutlinedFunction into its callers. Return true @@ -241,6 +336,8 @@ struct PartialInlinerImpl { static int computeBBInlineCost(BasicBlock *BB); std::unique_ptr computeOutliningInfo(Function *F); + std::unique_ptr + computeOutliningColdRegionsInfo(Function *F); }; struct PartialInlinerLegacyPass : public ModulePass { @@ -265,6 +362,7 @@ struct PartialInlinerLegacyPass : public ModulePass { &getAnalysis(); ProfileSummaryInfo *PSI = getAnalysis().getPSI(); + std::unique_ptr UPORE; std::function GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { @@ -276,12 +374,187 @@ struct PartialInlinerLegacyPass : public ModulePass { return TTIWP->getTTI(F); }; - return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); + std::function GetORE = + [&UPORE](Function &F) -> OptimizationRemarkEmitter & { + UPORE.reset(new OptimizationRemarkEmitter(&F)); + return *UPORE.get(); + }; + + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI, + &GetORE) + .run(M); } }; } // end anonymous namespace +std::unique_ptr +PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) { + BasicBlock *EntryBlock = &F->front(); + + DominatorTree DT(*F); + LoopInfo LI(DT); + BranchProbabilityInfo BPI(*F, LI); + std::unique_ptr ScopedBFI; + BlockFrequencyInfo *BFI; + if (!GetBFI) { + ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI)); + BFI = ScopedBFI.get(); + } else + BFI = &(*GetBFI)(*F); + + auto &ORE = (*GetORE)(*F); + + // Return if we don't have profiling information. + if (!PSI->hasInstrumentationProfile()) + return std::unique_ptr(); + + std::unique_ptr OutliningInfo = + llvm::make_unique(); + + auto IsSingleEntry = [](SmallVectorImpl &BlockList) { + BasicBlock *Dom = BlockList.front(); + return BlockList.size() > 1 && + std::distance(pred_begin(Dom), pred_end(Dom)) == 1; + }; + + auto IsSingleExit = + [&ORE](SmallVectorImpl &BlockList) -> BasicBlock * { + BasicBlock *ExitBlock = nullptr; + for (auto *Block : BlockList) { + for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) { + if (!is_contained(BlockList, *SI)) { + if (ExitBlock) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion", + &SI->front()) + << "Region dominated by " + << ore::NV("Block", BlockList.front()->getName()) + << " has more than one region exit edge."; + }); + return nullptr; + } else + ExitBlock = Block; + } + } + } + return ExitBlock; + }; + + auto BBProfileCount = [BFI](BasicBlock *BB) { + return BFI->getBlockProfileCount(BB) + ? BFI->getBlockProfileCount(BB).getValue() + : 0; + }; + + // Use the same computeBBInlineCost function to compute the cost savings of + // the outlining the candidate region. + int OverallFunctionCost = 0; + for (auto &BB : *F) + OverallFunctionCost += computeBBInlineCost(&BB); + +#ifndef NDEBUG + if (TracePartialInlining) + dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n"; +#endif + int MinOutlineRegionCost = + static_cast(OverallFunctionCost * MinRegionSizeRatio); + BranchProbability MinBranchProbability( + static_cast(ColdBranchRatio * MinBlockCounterExecution), + MinBlockCounterExecution); + bool ColdCandidateFound = false; + BasicBlock *CurrEntry = EntryBlock; + std::vector DFS; + DenseMap VisitedMap; + DFS.push_back(CurrEntry); + VisitedMap[CurrEntry] = true; + // Use Depth First Search on the basic blocks to find CFG edges that are + // considered cold. + // Cold regions considered must also have its inline cost compared to the + // overall inline cost of the original function. The region is outlined only + // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or + // more. + while (!DFS.empty()) { + auto *thisBB = DFS.back(); + DFS.pop_back(); + // Only consider regions with predecessor blocks that are considered + // not-cold (default: part of the top 99.99% of all block counters) + // AND greater than our minimum block execution count (default: 100). + if (PSI->isColdBB(thisBB, BFI) || + BBProfileCount(thisBB) < MinBlockCounterExecution) + continue; + for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) { + if (VisitedMap[*SI]) + continue; + VisitedMap[*SI] = true; + DFS.push_back(*SI); + // If branch isn't cold, we skip to the next one. + BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI); + if (SuccProb > MinBranchProbability) + continue; +#ifndef NDEBUG + if (TracePartialInlining) { + dbgs() << "Found cold edge: " << thisBB->getName() << "->" + << (*SI)->getName() << "\nBranch Probability = " << SuccProb + << "\n"; + } +#endif + SmallVector DominateVector; + DT.getDescendants(*SI, DominateVector); + // We can only outline single entry regions (for now). + if (!IsSingleEntry(DominateVector)) + continue; + BasicBlock *ExitBlock = nullptr; + // We can only outline single exit regions (for now). + if (!(ExitBlock = IsSingleExit(DominateVector))) + continue; + int OutlineRegionCost = 0; + for (auto *BB : DominateVector) + OutlineRegionCost += computeBBInlineCost(BB); + +#ifndef NDEBUG + if (TracePartialInlining) + dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n"; +#endif + + if (OutlineRegionCost < MinOutlineRegionCost) { + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", + &SI->front()) + << ore::NV("Callee", F) << " inline cost-savings smaller than " + << ore::NV("Cost", MinOutlineRegionCost); + }); + continue; + } + // For now, ignore blocks that belong to a SISE region that is a + // candidate for outlining. In the future, we may want to look + // at inner regions because the outer region may have live-exit + // variables. + for (auto *BB : DominateVector) + VisitedMap[BB] = true; + // ReturnBlock here means the block after the outline call + BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor(); + // assert(ReturnBlock && "ReturnBlock is NULL somehow!"); + FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo( + DominateVector, DominateVector.front(), ExitBlock, ReturnBlock); + RegInfo.Region = DominateVector; + OutliningInfo->ORI.push_back(RegInfo); +#ifndef NDEBUG + if (TracePartialInlining) { + dbgs() << "Found Cold Candidate starting at block: " + << DominateVector.front()->getName() << "\n"; + } +#endif + ColdCandidateFound = true; + NumColdRegionsFound++; + } + } + if (ColdCandidateFound) + return OutliningInfo; + else + return std::unique_ptr(); +} + std::unique_ptr PartialInlinerImpl::computeOutliningInfo(Function *F) { BasicBlock *EntryBlock = &F->front(); @@ -437,7 +710,7 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { // Check if there is PGO data or user annoated branch data: static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { - if (F->getEntryCount()) + if (F->hasProfileData()) return true; // Now check if any of the entry block has MD_prof data: for (auto *E : OI->Entries) { @@ -453,14 +726,19 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { + BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second; auto EntryFreq = Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); auto OutliningCallFreq = - Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); - - auto OutlineRegionRelFreq = - BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), - EntryFreq.getFrequency()); + Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB); + // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE + // we outlined any regions, so we may encounter situations where the + // OutliningCallFreq is *slightly* bigger than the EntryFreq. + if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) { + OutliningCallFreq = EntryFreq; + } + auto OutlineRegionRelFreq = BranchProbability::getBranchProbability( + OutliningCallFreq.getFrequency(), EntryFreq.getFrequency()); if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get())) return OutlineRegionRelFreq; @@ -487,8 +765,8 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { } bool PartialInlinerImpl::shouldPartialInline( - CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, - OptimizationRemarkEmitter &ORE) { + CallSite CS, FunctionCloner &Cloner, + BlockFrequency WeightedOutliningRcost) { using namespace ore; if (SkipCostAnalysis) @@ -500,6 +778,7 @@ bool PartialInlinerImpl::shouldPartialInline( Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); + auto &ORE = (*GetORE)(*Caller); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, *GetAssumptionCache, GetBFI, PSI, &ORE); @@ -584,6 +863,7 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { case Instruction::GetElementPtr: if (cast(I)->hasAllZeroIndices()) continue; + break; default: break; } @@ -616,22 +896,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { std::tuple PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { - // Now compute the cost of the call sequence to the outlined function - // 'OutlinedFunction' in BB 'OutliningCallBB': - int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); - - // Now compute the cost of the extracted/outlined function itself: - int OutlinedFunctionCost = 0; - for (BasicBlock &BB : *Cloner.OutlinedFunc) { - OutlinedFunctionCost += computeBBInlineCost(&BB); + int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0; + for (auto FuncBBPair : Cloner.OutlinedFunctions) { + Function *OutlinedFunc = FuncBBPair.first; + BasicBlock* OutliningCallBB = FuncBBPair.second; + // Now compute the cost of the call sequence to the outlined function + // 'OutlinedFunction' in BB 'OutliningCallBB': + OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB); + + // Now compute the cost of the extracted/outlined function itself: + for (BasicBlock &BB : *OutlinedFunc) + OutlinedFunctionCost += computeBBInlineCost(&BB); } - assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && "Outlined function cost should be no less than the outlined region"); + // The code extractor introduces a new root and exit stub blocks with // additional unconditional branches. Those branches will be eliminated // later with bb layout. The cost should be adjusted accordingly: - OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; + OutlinedFunctionCost -= + 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size(); int OutliningRuntimeOverhead = OutliningFuncCallCost + @@ -685,9 +969,9 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( } } -PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, - FunctionOutliningInfo *OI) - : OrigFunc(F) { +PartialInlinerImpl::FunctionCloner::FunctionCloner( + Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE) + : OrigFunc(F), ORE(ORE) { ClonedOI = llvm::make_unique(); // Clone the function, so that we can hack away on it. @@ -708,6 +992,38 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, F->replaceAllUsesWith(ClonedFunc); } +PartialInlinerImpl::FunctionCloner::FunctionCloner( + Function *F, FunctionOutliningMultiRegionInfo *OI, + OptimizationRemarkEmitter &ORE) + : OrigFunc(F), ORE(ORE) { + ClonedOMRI = llvm::make_unique(); + + // Clone the function, so that we can hack away on it. + ValueToValueMapTy VMap; + ClonedFunc = CloneFunction(F, VMap); + + // Go through all Outline Candidate Regions and update all BasicBlock + // information. + for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : + OI->ORI) { + SmallVector Region; + for (BasicBlock *BB : RegionInfo.Region) { + Region.push_back(cast(VMap[BB])); + } + BasicBlock *NewEntryBlock = cast(VMap[RegionInfo.EntryBlock]); + BasicBlock *NewExitBlock = cast(VMap[RegionInfo.ExitBlock]); + BasicBlock *NewReturnBlock = nullptr; + if (RegionInfo.ReturnBlock) + NewReturnBlock = cast(VMap[RegionInfo.ReturnBlock]); + FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo( + Region, NewEntryBlock, NewExitBlock, NewReturnBlock); + ClonedOMRI->ORI.push_back(MappedRegionInfo); + } + // Go ahead and update all uses to the duplicate, so that we can just + // use the inliner functionality when we're done hacking. + F->replaceAllUsesWith(ClonedFunc); +} + void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); @@ -724,6 +1040,11 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { return FirstPhi; }; + // Shouldn't need to normalize PHIs if we're not outlining non-early return + // blocks. + if (!ClonedOI) + return; + // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some @@ -774,16 +1095,90 @@ void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { DeadPhis.push_back(OldPhi); } ++I; - } - for (auto *DP : DeadPhis) - DP->eraseFromParent(); + } + for (auto *DP : DeadPhis) + DP->eraseFromParent(); + + for (auto E : ClonedOI->ReturnBlockPreds) { + E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); + } +} + +bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { + + auto ComputeRegionCost = [](SmallVectorImpl &Region) { + int Cost = 0; + for (BasicBlock* BB : Region) + Cost += computeBBInlineCost(BB); + return Cost; + }; + + assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline"); + + if (ClonedOMRI->ORI.empty()) + return false; - for (auto E : ClonedOI->ReturnBlockPreds) { - E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.recalculate(*ClonedFunc); + + // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. + LoopInfo LI(DT); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + + SetVector Inputs, Outputs, Sinks; + for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : + ClonedOMRI->ORI) { + int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region); + + CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false); + + CE.findInputsOutputs(Inputs, Outputs, Sinks); + +#ifndef NDEBUG + if (TracePartialInlining) { + dbgs() << "inputs: " << Inputs.size() << "\n"; + dbgs() << "outputs: " << Outputs.size() << "\n"; + for (Value *value : Inputs) + dbgs() << "value used in func: " << *value << "\n"; + for (Value *output : Outputs) + dbgs() << "instr used in func: " << *output << "\n"; } +#endif + // Do not extract regions that have live exit variables. + if (Outputs.size() > 0 && !ForceLiveExit) + continue; + + Function *OutlinedFunc = CE.extractCodeRegion(); + + if (OutlinedFunc) { + CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); + BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent(); + assert(OutliningCallBB->getParent() == ClonedFunc); + OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB)); + NumColdRegionsOutlined++; + OutlinedRegionCost += CurrentOutlinedRegionCost; + + if (MarkOutlinedColdCC) { + OutlinedFunc->setCallingConv(CallingConv::Cold); + OCS.setCallingConv(CallingConv::Cold); + } + } else + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", + &RegionInfo.Region.front()->front()) + << "Failed to extract region at block " + << ore::NV("Block", RegionInfo.Region.front()); + }); + } + + return !OutlinedFunctions.empty(); } -Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { +Function * +PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { // Returns true if the block is to be partial inlined into the caller // (i.e. not to be extracted to the out of line function) auto ToBeInlined = [&, this](BasicBlock *BB) { @@ -792,6 +1187,16 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { ClonedOI->Entries.end()); }; + assert(ClonedOI && "Expecting OutlineInfo for single region outline"); + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.recalculate(*ClonedFunc); + + // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. + LoopInfo LI(DT); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Gather up the blocks that we're going to extract. std::vector ToExtract; ToExtract.push_back(ClonedOI->NonReturnBlock); @@ -807,27 +1212,27 @@ Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { OutlinedRegionCost += computeBBInlineCost(&BB); } - // The CodeExtractor needs a dominator tree. - DominatorTree DT; - DT.recalculate(*ClonedFunc); - - // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. - LoopInfo LI(DT); - BranchProbabilityInfo BPI(*ClonedFunc, LI); - ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); - // Extract the body of the if. - OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, - ClonedFuncBFI.get(), &BPI, - /* AllowVarargs */ true) - .extractCodeRegion(); + Function *OutlinedFunc = + CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI, + /* AllowVarargs */ true) + .extractCodeRegion(); if (OutlinedFunc) { - OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) - .getInstruction() - ->getParent(); + BasicBlock *OutliningCallBB = + PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) + .getInstruction() + ->getParent(); assert(OutliningCallBB->getParent() == ClonedFunc); - } + OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB)); + } else + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed", + &ToExtract.front()->front()) + << "Failed to extract region at block " + << ore::NV("Block", ToExtract.front()); + }); return OutlinedFunc; } @@ -838,65 +1243,121 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() { ClonedFunc->replaceAllUsesWith(OrigFunc); ClonedFunc->eraseFromParent(); if (!IsFunctionInlined) { - // Remove the function that is speculatively created if there is no + // Remove each function that was speculatively created if there is no // reference. - if (OutlinedFunc) - OutlinedFunc->eraseFromParent(); + for (auto FuncBBPair : OutlinedFunctions) { + Function *Func = FuncBBPair.first; + Func->eraseFromParent(); + } } } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { +std::pair PartialInlinerImpl::unswitchFunction(Function *F) { + if (F->hasAddressTaken()) - return nullptr; + return {false, nullptr}; // Let inliner handle it if (F->hasFnAttribute(Attribute::AlwaysInline)) - return nullptr; + return {false, nullptr}; if (F->hasFnAttribute(Attribute::NoInline)) - return nullptr; + return {false, nullptr}; if (PSI->isFunctionEntryCold(F)) - return nullptr; + return {false, nullptr}; if (F->user_begin() == F->user_end()) - return nullptr; + return {false, nullptr}; + + auto &ORE = (*GetORE)(*F); + + // Only try to outline cold regions if we have a profile summary, which + // implies we have profiling information. + if (PSI->hasProfileSummary() && F->hasProfileData() && + !DisableMultiRegionPartialInline) { + std::unique_ptr OMRI = + computeOutliningColdRegionsInfo(F); + if (OMRI) { + FunctionCloner Cloner(F, OMRI.get(), ORE); + +#ifndef NDEBUG + if (TracePartialInlining) { + dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n"; + dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold() + << "\n"; + } +#endif + bool DidOutline = Cloner.doMultiRegionFunctionOutlining(); + + if (DidOutline) { +#ifndef NDEBUG + if (TracePartialInlining) { + dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n"; + Cloner.ClonedFunc->print(dbgs()); + dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n"; + } +#endif - std::unique_ptr OI = computeOutliningInfo(F); + if (tryPartialInline(Cloner)) + return {true, nullptr}; + } + } + } + // Fall-thru to regular partial inlining if we: + // i) can't find any cold regions to outline, or + // ii) can't inline the outlined function anywhere. + std::unique_ptr OI = computeOutliningInfo(F); if (!OI) - return nullptr; + return {false, nullptr}; - FunctionCloner Cloner(F, OI.get()); + FunctionCloner Cloner(F, OI.get(), ORE); Cloner.NormalizeReturnBlock(); - Function *OutlinedFunction = Cloner.doFunctionOutlining(); + + Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining(); + + if (!OutlinedFunction) + return {false, nullptr}; bool AnyInline = tryPartialInline(Cloner); if (AnyInline) - return OutlinedFunction; + return {true, OutlinedFunction}; - return nullptr; + return {false, nullptr}; } bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { - int NonWeightedRcost; - int SizeCost; - - if (Cloner.OutlinedFunc == nullptr) + if (Cloner.OutlinedFunctions.empty()) return false; + int SizeCost = 0; + BlockFrequency WeightedRcost; + int NonWeightedRcost; std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); - auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); - auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; - - // The call sequence to the outlined function is larger than the original - // outlined region size, it does not increase the chances of inlining - // the function with outlining (The inliner uses the size increase to + // Only calculate RelativeToEntryFreq when we are doing single region + // outlining. + BranchProbability RelativeToEntryFreq; + if (Cloner.ClonedOI) { + RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); + } else + // RelativeToEntryFreq doesn't make sense when we have more than one + // outlined call because each call will have a different relative frequency + // to the entry block. We can consider using the average, but the + // usefulness of that information is questionable. For now, assume we never + // execute the calls to outlined functions. + RelativeToEntryFreq = BranchProbability(0, 1); + + WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; + + // The call sequence(s) to the outlined function(s) are larger than the sum of + // the original outlined region size(s), it does not increase the chances of + // inlining the function with outlining (The inliner uses the size increase to // model the cost of inlining a callee). if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { - OptimizationRemarkEmitter ORE(Cloner.OrigFunc); + auto &ORE = (*GetORE)(*Cloner.OrigFunc); DebugLoc DLoc; BasicBlock *Block; std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); @@ -919,11 +1380,12 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { Cloner.ClonedFunc->user_end()); DenseMap CallSiteToProfCountMap; - if (Cloner.OrigFunc->getEntryCount()) + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); + if (CalleeEntryCount) computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); - uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + uint64_t CalleeEntryCountV = + (CalleeEntryCount ? CalleeEntryCount.getCount() : 0); bool AnyInline = false; for (User *User : Users) { @@ -932,11 +1394,11 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (IsLimitReached()) continue; - OptimizationRemarkEmitter ORE(CS.getCaller()); - if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) + if (!shouldPartialInline(CS, Cloner, WeightedRcost)) continue; + auto &ORE = (*GetORE)(*CS.getCaller()); // Construct remark before doing the inlining, as after successful inlining // the callsite is removed. OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()); @@ -944,7 +1406,11 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { << ore::NV("Caller", CS.getCaller()); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); - if (!InlineFunction(CS, IFI, nullptr, true, Cloner.OutlinedFunc)) + // We can only forward varargs when we outlined a single region, else we + // bail on vararg functions. + if (!InlineFunction(CS, IFI, nullptr, true, + (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first + : nullptr))) continue; ORE.emit(OR); @@ -958,13 +1424,24 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { AnyInline = true; NumPartialInlining++; // Update the stats - NumPartialInlined++; + if (Cloner.ClonedOI) + NumPartialInlined++; + else + NumColdOutlinePartialInlined++; + } if (AnyInline) { Cloner.IsFunctionInlined = true; if (CalleeEntryCount) - Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + Cloner.OrigFunc->setEntryCount( + CalleeEntryCount.setCount(CalleeEntryCountV)); + auto &ORE = (*GetORE)(*Cloner.OrigFunc); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) + << "Partially inlined into at least one caller"; + }); + } return AnyInline; @@ -998,8 +1475,10 @@ bool PartialInlinerImpl::run(Module &M) { if (Recursive) continue; - if (Function *NewFunc = unswitchFunction(CurrFunc)) { - Worklist.push_back(NewFunc); + std::pair Result = unswitchFunction(CurrFunc); + if (Result.second) + Worklist.push_back(Result.second); + if (Result.first) { Changed = true; } } @@ -1040,9 +1519,15 @@ PreservedAnalyses PartialInlinerPass::run(Module &M, return FAM.getResult(F); }; + std::function GetORE = + [&FAM](Function &F) -> OptimizationRemarkEmitter & { + return FAM.getResult(F); + }; + ProfileSummaryInfo *PSI = &AM.getResult(M); - if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE) + .run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index abab7e194ada..3855e6245d8e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -26,11 +26,9 @@ #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -632,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager( addInstructionCombiningPass(MPM); } + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + MPM.add(createCFGSimplificationPass(1, true, true, false, true)); + if (RunSLPAfterLoopVectorization && SLPVectorize) { MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (OptLevel > 1 && ExtraVectorizerPasses) { @@ -640,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager( } addExtensionsToPM(EP_Peephole, MPM); - // Switches to lookup tables and other transforms that may not be considered - // canonical by other IR passes. - MPM.add(createCFGSimplificationPass(1, true, true, false)); addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 3fd59847a005..46b088189040 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -24,7 +24,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 8930e9b2b957..a8d1be7a583e 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -69,6 +69,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include #include @@ -84,7 +85,7 @@ using namespace llvm; using namespace sampleprof; - +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "sample-profile" // Command line option to specify the file to read samples from. This is @@ -180,8 +181,9 @@ class SampleProfileLoader { StringRef Name, bool IsThinLTOPreLink, std::function GetAssumptionCache, std::function GetTargetTransformInfo) - : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), - Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {} + : GetAC(std::move(GetAssumptionCache)), + GetTTI(std::move(GetTargetTransformInfo)), Filename(Name), + IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -823,10 +825,10 @@ bool SampleProfileLoader::inlineHotFunctions( if (R != SymbolMap.end() && R->getValue() && !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() && - isLegalToPromote(I, R->getValue(), &Reason)) { + isLegalToPromote(CallSite(I), R->getValue(), &Reason)) { uint64_t C = FS->getEntrySamples(); - Instruction *DI = promoteIndirectCall( - I, R->getValue(), C, Sum, false, ORE); + Instruction *DI = + pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE); Sum -= C; PromotedInsns.insert(I); // If profile mismatches, we should not attempt to inline DI. @@ -1465,7 +1467,9 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { // Sets the GUIDs that are inlined in the profiled binary. This is used // for ThinLink to make correct liveness analysis, and also make the IR // match the profiled binary before annotation. - F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs); + F.setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); @@ -1546,14 +1550,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) { // Populate the symbol map. for (const auto &N_F : M.getValueSymbolTable()) { - std::string OrigName = N_F.getKey(); + StringRef OrigName = N_F.getKey(); Function *F = dyn_cast(N_F.getValue()); if (F == nullptr) continue; SymbolMap[OrigName] = F; auto pos = OrigName.find('.'); - if (pos != std::string::npos) { - std::string NewName = OrigName.substr(0, pos); + if (pos != StringRef::npos) { + StringRef NewName = OrigName.substr(0, pos); auto r = SymbolMap.insert(std::make_pair(NewName, F)); // Failiing to insert means there is already an entry in SymbolMap, // thus there are multiple functions that are mapped to the same @@ -1582,7 +1586,10 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { - F.setEntryCount(0); + // Initialize the entry count to -1, which will be treated conservatively + // by getEntryCount as the same as unknown (None). If we have samples this + // will be overwritten in emitAnnotations. + F.setEntryCount(ProfileCount(-1, Function::PCT_Real)); std::unique_ptr OwnedORE; if (AM) { auto &FAM = diff --git a/lib/Transforms/IPO/SyntheticCountsPropagation.cpp b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp new file mode 100644 index 000000000000..f599adfe779e --- /dev/null +++ b/lib/Transforms/IPO/SyntheticCountsPropagation.cpp @@ -0,0 +1,129 @@ +//=- SyntheticCountsPropagation.cpp - Propagate function counts --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that synthesizes entry counts for +// functions and attaches !prof metadata to functions with the synthesized +// counts. The presence of !prof metadata with counter name set to +// 'synthesized_function_entry_count' indicate that the value of the counter is +// an estimation of the likely execution count of the function. This transform +// is applied only in non PGO mode as functions get 'real' profile-based +// function entry counts in the PGO mode. +// +// The transformation works by first assigning some initial values to the entry +// counts of all functions and then doing a top-down traversal of the +// callgraph-scc to propagate the counts. For each function the set of callsites +// and their relative block frequency is gathered. The relative block frequency +// multiplied by the entry count of the caller and added to the callee's entry +// count. For non-trivial SCCs, the new counts are computed from the previous +// counts and updated in one shot. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/SyntheticCountsUtils.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using Scaled64 = ScaledNumber; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "synthetic-counts-propagation" + +/// Initial synthetic count assigned to functions. +static cl::opt + InitialSyntheticCount("initial-synthetic-count", cl::Hidden, cl::init(10), + cl::ZeroOrMore, + cl::desc("Initial value of synthetic entry count.")); + +/// Initial synthetic count assigned to inline functions. +static cl::opt InlineSyntheticCount( + "inline-synthetic-count", cl::Hidden, cl::init(15), cl::ZeroOrMore, + cl::desc("Initial synthetic entry count for inline functions.")); + +/// Initial synthetic count assigned to cold functions. +static cl::opt ColdSyntheticCount( + "cold-synthetic-count", cl::Hidden, cl::init(5), cl::ZeroOrMore, + cl::desc("Initial synthetic entry count for cold functions.")); + +// Assign initial synthetic entry counts to functions. +static void +initializeCounts(Module &M, function_ref SetCount) { + auto MayHaveIndirectCalls = [](Function &F) { + for (auto *U : F.users()) { + if (!isa(U) && !isa(U)) + return true; + } + return false; + }; + + for (Function &F : M) { + uint64_t InitialCount = InitialSyntheticCount; + if (F.isDeclaration()) + continue; + if (F.hasFnAttribute(Attribute::AlwaysInline) || + F.hasFnAttribute(Attribute::InlineHint)) { + // Use a higher value for inline functions to account for the fact that + // these are usually beneficial to inline. + InitialCount = InlineSyntheticCount; + } else if (F.hasLocalLinkage() && !MayHaveIndirectCalls(F)) { + // Local functions without inline hints get counts only through + // propagation. + InitialCount = 0; + } else if (F.hasFnAttribute(Attribute::Cold) || + F.hasFnAttribute(Attribute::NoInline)) { + // Use a lower value for noinline and cold functions. + InitialCount = ColdSyntheticCount; + } + SetCount(&F, InitialCount); + } +} + +PreservedAnalyses SyntheticCountsPropagation::run(Module &M, + ModuleAnalysisManager &MAM) { + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); + DenseMap Counts; + // Set initial entry counts. + initializeCounts(M, [&](Function *F, uint64_t Count) { Counts[F] = Count; }); + + // Compute the relative block frequency for a callsite. Use scaled numbers + // and not integers since the relative block frequency could be less than 1. + auto GetCallSiteRelFreq = [&](CallSite CS) { + Function *Caller = CS.getCaller(); + auto &BFI = FAM.getResult(*Caller); + BasicBlock *CSBB = CS.getInstruction()->getParent(); + Scaled64 EntryFreq(BFI.getEntryFreq(), 0); + Scaled64 BBFreq(BFI.getBlockFreq(CSBB).getFrequency(), 0); + BBFreq /= EntryFreq; + return BBFreq; + }; + + CallGraph CG(M); + // Propgate the entry counts on the callgraph. + propagateSyntheticCounts( + CG, GetCallSiteRelFreq, [&](Function *F) { return Counts[F]; }, + [&](Function *F, uint64_t New) { Counts[F] += New; }); + + // Set the counts as metadata. + for (auto Entry : Counts) + Entry.first->setEntryCount( + ProfileCount(Entry.second, Function::PCT_Synthetic)); + + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 7d6d538bc116..f5a3d4452c77 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -19,7 +19,6 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" @@ -40,9 +39,17 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, continue; auto Name = ExportGV.getName(); - GlobalValue *ImportGV = ImportM.getNamedValue(Name); - if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV)) - continue; + GlobalValue *ImportGV = nullptr; + if (!PromoteExtra.count(&ExportGV)) { + ImportGV = ImportM.getNamedValue(Name); + if (!ImportGV) + continue; + ImportGV->removeDeadConstantUsers(); + if (ImportGV->use_empty()) { + ImportGV->eraseFromParent(); + continue; + } + } std::string NewName = (Name + ModuleId).str(); @@ -83,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) { if (isa(MD) && cast(MD)->isDistinct()) { Metadata *&GlobalMD = LocalToGlobal[MD]; if (!GlobalMD) { - std::string NewName = - (to_string(LocalToGlobal.size()) + ModuleId).str(); + std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str(); GlobalMD = MDString::get(M.getContext(), NewName); } @@ -351,6 +357,31 @@ void splitAndWriteThinLTOBitcode( NMD->addOperand(MD); } + SmallVector FunctionAliases; + for (auto &A : M.aliases()) { + if (!isa(A.getAliasee())) + continue; + + auto *F = cast(A.getAliasee()); + auto &Ctx = MergedM->getContext(); + SmallVector Elts; + + Elts.push_back(MDString::get(Ctx, A.getName())); + Elts.push_back(MDString::get(Ctx, F->getName())); + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility()))); + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker()))); + + FunctionAliases.push_back(MDTuple::get(Ctx, Elts)); + } + + if (!FunctionAliases.empty()) { + NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases"); + for (auto MD : FunctionAliases) + NMD->addOperand(MD); + } + simplifyExternals(*MergedM); // FIXME: Try to re-use BSI and PFI from the original module here. diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index ec34deb9a08d..aa1755bb0972 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -56,7 +56,6 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -282,24 +281,11 @@ struct VirtualCallSite { DebugLoc DLoc = CS->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // a optimization remark emitter on the fly, when we need it. - std::unique_ptr OwnedORE; - OptimizationRemarkEmitter *ORE; - if (OREGetter) - ORE = &OREGetter(F); - else { - OwnedORE = make_unique(F); - ORE = OwnedORE.get(); - } - using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block) - << NV("Optimization", OptName) << ": devirtualized a call to " - << NV("FunctionName", TargetName)); + OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, OptName, DLoc, Block) + << NV("Optimization", OptName) + << ": devirtualized a call to " + << NV("FunctionName", TargetName)); } void replaceAndErase( @@ -540,7 +526,16 @@ struct WholeProgramDevirt : public ModulePass { if (skipModule(M)) return false; - auto OREGetter = function_ref(); + // In the new pass manager, we can request the optimization + // remark emitter pass on a per-function-basis, which the + // OREGetter will do for us. + // In the old pass manager, this is harder, so we just build + // an optimization remark emitter on the fly, when we need it. + std::unique_ptr ORE; + auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & { + ORE = make_unique(F); + return *ORE; + }; if (UseCommandLine) return DevirtModule::runForTesting(M, LegacyAARGetter(*this), OREGetter); @@ -589,7 +584,7 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M, bool DevirtModule::runForTesting( Module &M, function_ref AARGetter, function_ref OREGetter) { - ModuleSummaryIndex Summary; + ModuleSummaryIndex Summary(/*IsPerformingAnalysis=*/false); // Handle the command-line summary arguments. This code is for testing // purposes only, so we handle errors directly. @@ -1347,6 +1342,7 @@ void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { Constant *Bit = importConstant(Slot, CSByConstantArg.first, "bit", Int8Ty, ResByArg.Bit); applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit); + break; } default: break; @@ -1499,23 +1495,10 @@ bool DevirtModule::run() { for (const auto &DT : DevirtTargets) { Function *F = DT.second; - // In the new pass manager, we can request the optimization - // remark emitter pass on a per-function-basis, which the - // OREGetter will do for us. - // In the old pass manager, this is harder, so we just build - // a optimization remark emitter on the fly, when we need it. - std::unique_ptr OwnedORE; - OptimizationRemarkEmitter *ORE; - if (OREGetter) - ORE = &OREGetter(F); - else { - OwnedORE = make_unique(F); - ORE = OwnedORE.get(); - } - using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) - << "devirtualized " << NV("FunctionName", F->getName())); + OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) + << "devirtualized " + << NV("FunctionName", F->getName())); } } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d28d615f47ea..688897644848 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1520,8 +1520,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateNot(Op1); if (Constant *C = dyn_cast(Op0)) { + Value *X; + // C - zext(bool) -> bool ? C - 1 : C + if (match(Op1, m_ZExt(m_Value(X))) && + X->getType()->getScalarSizeInBits() == 1) + return SelectInst::Create(X, SubOne(C), C); + // C - ~X == X + (1+C) - Value *X = nullptr; if (match(Op1, m_Not(m_Value(X)))) return BinaryOperator::CreateAdd(X, AddOne(C)); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a81f295b91d4..2364202e5b69 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2397,5 +2397,25 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) return CastedXor; + // Canonicalize the shifty way to code absolute value to the common pattern. + // There are 4 potential commuted variants. Move the 'ashr' candidate to Op1. + // We're relying on the fact that we only do this transform when the shift has + // exactly 2 uses and the add has exactly 1 use (otherwise, we might increase + // instructions). + if (Op0->getNumUses() == 2) + std::swap(Op0, Op1); + + const APInt *ShAmt; + Type *Ty = I.getType(); + if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) && + Op1->getNumUses() == 2 && *ShAmt == Ty->getScalarSizeInBits() - 1 && + match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) { + // B = ashr i32 A, 31 ; smear the sign bit + // xor (add A, B), B ; add -1 and flip bits if negative + // --> (A < 0) ? -A : A + Value *Cmp = Builder.CreateICmpSLT(A, ConstantInt::getNullValue(Ty)); + return SelectInst::Create(Cmp, Builder.CreateNeg(A), A); + } + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index a00e6f73ab8c..32821e6d9dee 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -189,8 +189,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); + // FIXME: Check & simplify source & dest alignments separately if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); + MI->setAlignment(MinAlign); return MI; } @@ -265,8 +266,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); if (MI->getAlignment() < Alignment) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - Alignment, false)); + MI->setAlignment(Alignment); return MI; } @@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { - auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), - Args.end(), SQ.getWithInstruction(&CI))) + if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false)) return replaceInstUsesWith(CI, N); return nullptr; - case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; - // TODO should this be in InstSimplify? - // bswap(bswap(x)) -> x - if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); - // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - @@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - - case Intrinsic::bitreverse: { - Value *IIOperand = II->getArgOperand(0); - Value *X = nullptr; - - // TODO should this be in InstSimplify? - // bitreverse(bitreverse(x)) -> x - if (match(IIOperand, m_BitReverse(m_Value(X)))) - return replaceInstUsesWith(CI, X); - break; - } - case Intrinsic::masked_load: if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); @@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::powi: if (ConstantInt *Power = dyn_cast(II->getArgOperand(1))) { - // powi(x, 0) -> 1.0 - if (Power->isZero()) - return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); - // powi(x, 1) -> x - if (Power->isOne()) - return replaceInstUsesWith(CI, II->getArgOperand(0)); + // 0 and 1 are handled in instsimplify + // powi(x, -1) -> 1/x if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); + // powi(x, 2) -> x*x + if (Power->equalsInt(2)) + return BinaryOperator::CreateFMul(II->getArgOperand(0), + II->getArgOperand(0)); } break; @@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // The compare intrinsic uses the above assumptions and therefore // doesn't require additional flags. if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && - match(Arg1, m_Zero()) && + match(Arg1, m_Zero()) && isa(Arg0) && cast(Arg0)->getFastMathFlags().noInfs())) { if (Arg0IsZero) std::swap(A, B); @@ -3607,7 +3587,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::lifetime_start: // Asan needs to poison memory to detect invalid access which is possible // even for empty lifetime range. - if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress)) + if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress)) break; if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start, @@ -4393,6 +4374,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, cast(Caller)->getCallingConv()); cast(NewCaller)->setAttributes(NewPAL); } + NewCaller->setDebugLoc(Caller->getDebugLoc()); return NewCaller; } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7ec2ff7689c9..6df09dfb3a4d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,9 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -1895,11 +1893,8 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, APInt ShiftedC = C.ashr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } - if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) { - // This is the same code as the SGT case, but assert the pre-condition - // that is needed for this to work with equality predicates. - assert(C.ashr(*ShiftAmt).shl(*ShiftAmt) == C && - "Compare known true or false was not folded"); + if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) { APInt ShiftedC = C.ashr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } @@ -1928,11 +1923,8 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, APInt ShiftedC = C.lshr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } - if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) { - // This is the same code as the UGT case, but assert the pre-condition - // that is needed for this to work with equality predicates. - assert(C.lshr(*ShiftAmt).shl(*ShiftAmt) == C && - "Compare known true or false was not folded"); + if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) { APInt ShiftedC = C.lshr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } @@ -4084,13 +4076,13 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } - // If Min and Max are known to be the same, then SimplifyDemandedBits - // figured out that the LHS is a constant. Constant fold this now, so that + // If Min and Max are known to be the same, then SimplifyDemandedBits figured + // out that the LHS or RHS is a constant. Constant fold this now, so that // code below can assume that Min != Max. if (!isa(Op0) && Op0Min == Op0Max) - return new ICmpInst(Pred, ConstantInt::get(Op0->getType(), Op0Min), Op1); + return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1); if (!isa(Op1) && Op1Min == Op1Max) - return new ICmpInst(Pred, Op0, ConstantInt::get(Op1->getType(), Op1Min)); + return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min)); // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 5d2402361ad3..d4f06e18b957 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -18,13 +18,14 @@ #include "llvm/Analysis/Loads.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "instcombine" @@ -561,6 +562,28 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value return NewStore; } +/// Returns true if instruction represent minmax pattern like: +/// select ((cmp load V1, load V2), V1, V2). +static bool isMinMaxWithLoads(Value *V) { + assert(V->getType()->isPointerTy() && "Expected pointer type."); + // Ignore possible ty* to ixx* bitcast. + V = peekThroughBitcast(V); + // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax + // pattern. + CmpInst::Predicate Pred; + Instruction *L1; + Instruction *L2; + Value *LHS; + Value *RHS; + if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)), + m_Value(LHS), m_Value(RHS)))) + return false; + return (match(L1, m_Load(m_Specific(LHS))) && + match(L2, m_Load(m_Specific(RHS)))) || + (match(L1, m_Load(m_Specific(RHS))) && + match(L2, m_Load(m_Specific(LHS)))); +} + /// \brief Combine loads to match the type of their uses' value after looking /// through intervening bitcasts. /// @@ -598,10 +621,14 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // integers instead of any other type. We only do this when the loaded type // is sized and has a size exactly the same as its store size and the store // size is a legal integer type. + // Do not perform canonicalization if minmax pattern is found (to avoid + // infinite loop). if (!Ty->isIntegerTy() && Ty->isSized() && DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) && DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) && - !DL.isNonIntegralPointerType(Ty)) { + !DL.isNonIntegralPointerType(Ty) && + !isMinMaxWithLoads( + peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) { if (all_of(LI.users(), [&LI](User *U) { auto *SI = dyn_cast(U); return SI && SI->getPointerOperand() != &LI && @@ -931,6 +958,16 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, return nullptr; } +static bool canSimplifyNullStoreOrGEP(StoreInst &SI) { + if (SI.getPointerAddressSpace() != 0) + return false; + + auto *Ptr = SI.getPointerOperand(); + if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) + Ptr = GEPI->getOperand(0); + return isa(Ptr); +} + static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { if (GetElementPtrInst *GEPI = dyn_cast(Op)) { const Value *GEPI0 = GEPI->getOperand(0); @@ -1298,6 +1335,46 @@ static bool equivalentAddressValues(Value *A, Value *B) { return false; } +/// Converts store (bitcast (load (bitcast (select ...)))) to +/// store (load (select ...)), where select is minmax: +/// select ((cmp load V1, load V2), V1, V2). +static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, + StoreInst &SI) { + // bitcast? + if (!match(SI.getPointerOperand(), m_BitCast(m_Value()))) + return false; + // load? integer? + Value *LoadAddr; + if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr))))) + return false; + auto *LI = cast(SI.getValueOperand()); + if (!LI->getType()->isIntegerTy()) + return false; + if (!isMinMaxWithLoads(LoadAddr)) + return false; + + if (!all_of(LI->users(), [LI, LoadAddr](User *U) { + auto *SI = dyn_cast(U); + return SI && SI->getPointerOperand() != LI && + peekThroughBitcast(SI->getPointerOperand()) != LoadAddr && + !SI->getPointerOperand()->isSwiftError(); + })) + return false; + + IC.Builder.SetInsertPoint(LI); + LoadInst *NewLI = combineLoadToNewType( + IC, *LI, LoadAddr->getType()->getPointerElementType()); + // Replace all the stores with stores of the newly loaded value. + for (auto *UI : LI->users()) { + auto *USI = cast(UI); + IC.Builder.SetInsertPoint(USI); + combineStoreToNewValue(IC, *USI, NewLI); + } + IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType())); + IC.eraseInstFromFunction(*LI); + return true; +} + Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); @@ -1322,6 +1399,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (unpackStoreToAggregate(*this, SI)) return eraseInstFromFunction(SI); + if (removeBitcastsFromLoadStoreOnMinMax(*this, SI)) + return eraseInstFromFunction(SI); + // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { Worklist.Add(NewGEPI); @@ -1392,7 +1472,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { + // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG + if (canSimplifyNullStoreOrGEP(SI)) { if (!isa(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast(Val)) diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 87666360c1a0..6e7e11a15aea 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" #include #include #include @@ -728,6 +729,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } } + // sqrt(a) * sqrt(b) -> sqrt(a * b) + if (AllowReassociate && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *Opnd0 = nullptr; + Value *Opnd1 = nullptr; + if (match(Op0, m_Intrinsic(m_Value(Opnd0))) && + match(Op1, m_Intrinsic(m_Value(Opnd1)))) { + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Value *FMulVal = Builder.CreateFMul(Opnd0, Opnd1); + Value *Sqrt = Intrinsic::getDeclaration(I.getModule(), + Intrinsic::sqrt, I.getType()); + Value *SqrtCall = Builder.CreateCall(Sqrt, FMulVal); + return replaceInstUsesWith(I, SqrtCall); + } + } + // Handle symmetric situation in a 2-iteration loop Value *Opnd0 = Op0; Value *Opnd1 = Op1; @@ -873,6 +891,7 @@ bool InstCombiner::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { /// @brief Common integer divide transforms Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + bool IsSigned = I.getOpcode() == Instruction::SDiv; // The RHS is known non-zero. if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) { @@ -890,7 +909,6 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (match(Op1, m_APInt(C2))) { Value *X; const APInt *C1; - bool IsSigned = I.getOpcode() == Instruction::SDiv; // (X / C1) / C2 -> X / (C1*C2) if ((IsSigned && match(LHS, m_SDiv(m_Value(X), m_APInt(C1)))) || @@ -981,13 +999,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return &I; // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y - Value *X = nullptr, *Z = nullptr; - if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 - bool isSigned = I.getOpcode() == Instruction::SDiv; - if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || - (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) + Value *X, *Z; + if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) // (X - Z) / Y; Y = Op1 + if ((IsSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || + (!IsSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1))))) return BinaryOperator::Create(I.getOpcode(), X, Op1); - } + + // (X << Y) / X -> 1 << Y + Value *Y; + if (IsSigned && match(Op0, m_NSWShl(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNSWShl(ConstantInt::get(I.getType(), 1), Y); + if (!IsSigned && match(Op0, m_NUWShl(m_Specific(Op1), m_Value(Y)))) + return BinaryOperator::CreateNUWShl(ConstantInt::get(I.getType(), 1), Y); return nullptr; } @@ -1451,6 +1474,42 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } + if (AllowReassociate && + Op0->hasOneUse() && Op1->hasOneUse()) { + Value *A; + // sin(a) / cos(a) -> tan(a) + if (match(Op0, m_Intrinsic(m_Value(A))) && + match(Op1, m_Intrinsic(m_Specific(A)))) { + if (hasUnaryFloatFn(&TLI, I.getType(), LibFunc_tan, + LibFunc_tanf, LibFunc_tanl)) { + IRBuilder<> B(&I); + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I.getFastMathFlags()); + Value *Tan = emitUnaryFloatFnCall( + A, TLI.getName(LibFunc_tan), B, + CallSite(Op0).getCalledFunction()->getAttributes()); + return replaceInstUsesWith(I, Tan); + } + } + + // cos(a) / sin(a) -> 1/tan(a) + if (match(Op0, m_Intrinsic(m_Value(A))) && + match(Op1, m_Intrinsic(m_Specific(A)))) { + if (hasUnaryFloatFn(&TLI, I.getType(), LibFunc_tan, + LibFunc_tanf, LibFunc_tanl)) { + IRBuilder<> B(&I); + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(I.getFastMathFlags()); + Value *Tan = emitUnaryFloatFnCall( + A, TLI.getName(LibFunc_tan), B, + CallSite(Op0).getCalledFunction()->getAttributes()); + Value *One = ConstantFP::get(Tan->getType(), 1.0); + Value *Div = B.CreateFDiv(One, Tan); + return replaceInstUsesWith(I, Div); + } + } + } + Value *LHS; Value *RHS; @@ -1631,9 +1690,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // Handle cases involving: rem X, (select Cond, Y, Z) - if (simplifyDivRemOfSelectWithZeroOp(I)) - return &I; - return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 45d448075d68..7ee018dbc49b 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 6f26f7f5cd19..62ff22acc751 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -300,12 +300,13 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, TI->getType()); } - // Only handle binary operators with one-use here. As with the cast case - // above, it may be possible to relax the one-use constraint, but that needs - // be examined carefully since it may not reduce the total number of - // instructions. - BinaryOperator *BO = dyn_cast(TI); - if (!BO || !TI->hasOneUse() || !FI->hasOneUse()) + // Only handle binary operators (including two-operand getelementptr) with + // one-use here. As with the cast case above, it may be possible to relax the + // one-use constraint, but that needs be examined carefully since it may not + // reduce the total number of instructions. + if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 || + (!isa(TI) && !isa(TI)) || + !TI->hasOneUse() || !FI->hasOneUse()) return nullptr; // Figure out if the operations have any operands in common. @@ -342,7 +343,18 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; - return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); + if (auto *BO = dyn_cast(TI)) { + return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); + } + if (auto *TGEP = dyn_cast(TI)) { + auto *FGEP = cast(FI); + Type *ElementType = TGEP->getResultElementType(); + return TGEP->isInBounds() && FGEP->isInBounds() + ? GetElementPtrInst::CreateInBounds(ElementType, Op0, {Op1}) + : GetElementPtrInst::Create(ElementType, Op0, {Op1}); + } + llvm_unreachable("Expected BinaryOperator or GEP"); + return nullptr; } static bool isSelect01(const APInt &C1I, const APInt &C2I) { @@ -1289,6 +1301,63 @@ static Instruction *foldSelectCmpXchg(SelectInst &SI) { return nullptr; } +/// Reduce a sequence of min/max with a common operand. +static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS, + Value *RHS, + InstCombiner::BuilderTy &Builder) { + assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max"); + // TODO: Allow FP min/max with nnan/nsz. + if (!LHS->getType()->isIntOrIntVectorTy()) + return nullptr; + + // Match 3 of the same min/max ops. Example: umin(umin(), umin()). + Value *A, *B, *C, *D; + SelectPatternResult L = matchSelectPattern(LHS, A, B); + SelectPatternResult R = matchSelectPattern(RHS, C, D); + if (SPF != L.Flavor || L.Flavor != R.Flavor) + return nullptr; + + // Look for a common operand. The use checks are different than usual because + // a min/max pattern typically has 2 uses of each op: 1 by the cmp and 1 by + // the select. + Value *MinMaxOp = nullptr; + Value *ThirdOp = nullptr; + if (LHS->getNumUses() <= 2 && RHS->getNumUses() > 2) { + // If the LHS is only used in this chain and the RHS is used outside of it, + // reuse the RHS min/max because that will eliminate the LHS. + if (D == A || C == A) { + // min(min(a, b), min(c, a)) --> min(min(c, a), b) + // min(min(a, b), min(a, d)) --> min(min(a, d), b) + MinMaxOp = RHS; + ThirdOp = B; + } else if (D == B || C == B) { + // min(min(a, b), min(c, b)) --> min(min(c, b), a) + // min(min(a, b), min(b, d)) --> min(min(b, d), a) + MinMaxOp = RHS; + ThirdOp = A; + } + } else if (RHS->getNumUses() <= 2) { + // Reuse the LHS. This will eliminate the RHS. + if (D == A || D == B) { + // min(min(a, b), min(c, a)) --> min(min(a, b), c) + // min(min(a, b), min(c, b)) --> min(min(a, b), c) + MinMaxOp = LHS; + ThirdOp = C; + } else if (C == A || C == B) { + // min(min(a, b), min(b, d)) --> min(min(a, b), d) + // min(min(a, b), min(c, b)) --> min(min(a, b), d) + MinMaxOp = LHS; + ThirdOp = D; + } + } + if (!MinMaxOp || !ThirdOp) + return nullptr; + + CmpInst::Predicate P = getCmpPredicateForMinMax(SPF); + Value *CmpABC = Builder.CreateICmp(P, MinMaxOp, ThirdOp); + return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp); +} + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -1551,6 +1620,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType); return replaceInstUsesWith(SI, NewCast); } + + // MAX(~a, ~b) -> ~MIN(a, b) + // MIN(~a, ~b) -> ~MAX(a, b) + Value *A, *B; + if (match(LHS, m_Not(m_Value(A))) && match(RHS, m_Not(m_Value(B))) && + (LHS->getNumUses() <= 2 || RHS->getNumUses() <= 2)) { + CmpInst::Predicate InvertedPred = + getCmpPredicateForMinMax(getInverseMinMaxSelectPattern(SPF)); + Value *InvertedCmp = Builder.CreateICmp(InvertedPred, A, B); + Value *NewSel = Builder.CreateSelect(InvertedCmp, A, B); + return BinaryOperator::CreateNot(NewSel); + } + + if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder)) + return I; } if (SPF) { @@ -1570,28 +1654,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return R; } - // MAX(~a, ~b) -> ~MIN(a, b) - if ((SPF == SPF_SMAX || SPF == SPF_UMAX) && - IsFreeToInvert(LHS, LHS->hasNUses(2)) && - IsFreeToInvert(RHS, RHS->hasNUses(2))) { - // For this transform to be profitable, we need to eliminate at least two - // 'not' instructions if we're going to add one 'not' instruction. - int NumberOfNots = - (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) + - (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) + - (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value()))); - - if (NumberOfNots >= 2) { - Value *NewLHS = Builder.CreateNot(LHS); - Value *NewRHS = Builder.CreateNot(RHS); - Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS) - : Builder.CreateICmpULT(NewLHS, NewRHS); - Value *NewSI = - Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS)); - return replaceInstUsesWith(SI, NewSI); - } - } - // TODO. // ABS(-X) -> ABS(X) } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 44bbb84686ab..a04a3cec09e6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -87,8 +87,7 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl, // Equal shift amounts in opposite directions become bitwise 'and': // lshr (shl X, C), C --> and X, C' // shl (lshr X, C), C --> and X, C' - unsigned InnerShAmt = InnerShiftConst->getZExtValue(); - if (InnerShAmt == OuterShAmt) + if (*InnerShiftConst == OuterShAmt) return true; // If the 2nd shift is bigger than the 1st, we can fold: @@ -98,7 +97,8 @@ static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl, // Also, check that the inner shift is valid (less than the type width) or // we'll crash trying to produce the bit mask for the 'and'. unsigned TypeWidth = InnerShift->getType()->getScalarSizeInBits(); - if (InnerShAmt > OuterShAmt && InnerShAmt < TypeWidth) { + if (InnerShiftConst->ugt(OuterShAmt) && InnerShiftConst->ult(TypeWidth)) { + unsigned InnerShAmt = InnerShiftConst->getZExtValue(); unsigned MaskShift = IsInnerShl ? TypeWidth - InnerShAmt : InnerShAmt - OuterShAmt; APInt Mask = APInt::getLowBitsSet(TypeWidth, OuterShAmt) << MaskShift; @@ -135,7 +135,7 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift, ConstantInt *CI = nullptr; if ((IsLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!IsLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { - if (CI->getZExtValue() == NumBits) { + if (CI->getValue() == NumBits) { // TODO: Check that the input bits are already zero with MaskedValueIsZero #if 0 // If this is a truncate of a logical shr, we can truncate it to a smaller @@ -818,7 +818,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { Type *Ty = I.getType(); unsigned BitWidth = Ty->getScalarSizeInBits(); const APInt *ShAmtAPInt; - if (match(Op1, m_APInt(ShAmtAPInt))) { + if (match(Op1, m_APInt(ShAmtAPInt)) && ShAmtAPInt->ult(BitWidth)) { unsigned ShAmt = ShAmtAPInt->getZExtValue(); // If the shift amount equals the difference in width of the destination @@ -832,7 +832,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However, // we can handle (X <>s C2 since it only shifts in sign bits. const APInt *ShOp1; - if (match(Op0, m_NSWShl(m_Value(X), m_APInt(ShOp1)))) { + if (match(Op0, m_NSWShl(m_Value(X), m_APInt(ShOp1))) && + ShOp1->ult(BitWidth)) { unsigned ShlAmt = ShOp1->getZExtValue(); if (ShlAmt < ShAmt) { // (X <>s C2 --> X >>s (C2 - C1) @@ -850,7 +851,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } } - if (match(Op0, m_AShr(m_Value(X), m_APInt(ShOp1)))) { + if (match(Op0, m_AShr(m_Value(X), m_APInt(ShOp1))) && + ShOp1->ult(BitWidth)) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized arithmetic shifts replicate the sign bit. AmtSum = std::min(AmtSum, BitWidth - 1); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 7d5d28f6fc48..73746bfda449 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -333,7 +333,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits InputKnown(SrcBitWidth); if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) return I; - Known = Known.zextOrTrunc(BitWidth); + Known = InputKnown.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) Known.Zero.setBitsFrom(SrcBitWidth); @@ -435,12 +435,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, const APInt *SA; if (match(I->getOperand(1), m_APInt(SA))) { const APInt *ShrAmt; - if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) { - Instruction *Shr = cast(I->getOperand(0)); - if (Value *R = simplifyShrShlDemandedBits( - Shr, *ShrAmt, I, *SA, DemandedMask, Known)) - return R; - } + if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) + if (Instruction *Shr = dyn_cast(I->getOperand(0))) + if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA, + DemandedMask, Known)) + return R; uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6c99007475c1..aeac8910af6b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { - unsigned IndexVal = IdxC->getZExtValue(); unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); - // InstSimplify handles cases where the index is invalid. - assert(IndexVal < VectorWidth); + // InstSimplify should handle cases where the index is invalid. + if (!IdxC->getValue().ule(VectorWidth)) + return nullptr; + + unsigned IndexVal = IdxC->getZExtValue(); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use @@ -781,6 +783,10 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *ScalarOp = IE.getOperand(1); Value *IdxOp = IE.getOperand(2); + if (auto *V = SimplifyInsertElementInst( + VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE))) + return replaceInstUsesWith(IE, V); + // Inserting an undef or into an undefined place, remove this. if (isa(ScalarOp) || isa(IdxOp)) replaceInstUsesWith(IE, VecOp); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index f272f8273d14..b332e75c7feb 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3276,8 +3276,8 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM.getCachedResult(F); - // FIXME: The AliasAnalysis is not yet supported in the new pass manager - if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, ORE, + auto *AA = &AM.getResult(F); + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3286,6 +3286,7 @@ PreservedAnalyses InstCombinePass::run(Function &F, PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); + PA.preserve(); PA.preserve(); return PA; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index c707dfc0b50a..68b4146e2542 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -100,7 +100,7 @@ static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64; static const uint64_t kSmallX86_64ShadowOffsetBase = 0x7FFFFFFF; // < 2G. static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL; static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000; -static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; +static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44; static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; @@ -2494,7 +2494,6 @@ bool AddressSanitizer::runOnFunction(Function &F) { } bool UseCalls = - CompileKernel || (ClInstrumentationWithCallsThreshold >= 0 && ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); const DataLayout &DL = F.getParent()->getDataLayout(); @@ -2702,9 +2701,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() { unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(Ty); - const std::string &Name = Arg.hasName() ? Arg.getName().str() : - "Arg" + llvm::to_string(Arg.getArgNo()); - AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval"); + AllocaInst *AI = IRB.CreateAlloca( + Ty, nullptr, + (Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) + + ".byval"); AI->setAlignment(Align); Arg.replaceAllUsesWith(AI); @@ -2869,8 +2869,12 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *FakeStack; Value *LocalStackBase; + Value *LocalStackBaseAlloca; + bool Deref; if (DoStackMalloc) { + LocalStackBaseAlloca = + IRB.CreateAlloca(IntptrTy, nullptr, "asan_local_stack_base"); // void *FakeStack = __asan_option_detect_stack_use_after_return // ? __asan_stack_malloc_N(LocalStackSize) // : nullptr; @@ -2901,24 +2905,31 @@ void FunctionStackPoisoner::processStaticAllocas() { IRBIf.SetCurrentDebugLocation(EntryDebugLocation); Value *AllocaValue = DoDynamicAlloca ? createAllocaForLayout(IRBIf, L, true) : StaticAlloca; + IRB.SetInsertPoint(InsBefore); IRB.SetCurrentDebugLocation(EntryDebugLocation); LocalStackBase = createPHI(IRB, NoFakeStack, AllocaValue, Term, FakeStack); + IRB.SetCurrentDebugLocation(EntryDebugLocation); + IRB.CreateStore(LocalStackBase, LocalStackBaseAlloca); + Deref = true; } else { // void *FakeStack = nullptr; // void *LocalStackBase = alloca(LocalStackSize); FakeStack = ConstantInt::get(IntptrTy, 0); LocalStackBase = DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca; + LocalStackBaseAlloca = LocalStackBase; + Deref = false; } // Replace Alloca instructions with base+offset. for (const auto &Desc : SVD) { AllocaInst *AI = Desc.AI; + replaceDbgDeclareForAlloca(AI, LocalStackBaseAlloca, DIB, Deref, + Desc.Offset, DIExpression::NoDeref); Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 16e2e6b4e730..075e5672cff8 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -46,6 +46,10 @@ template class CFGMST { // This map records the auxiliary information for each BB. DenseMap> BBInfos; + // Whehter the function has an exit block with no successors. + // (For function with an infinite loop, this block may be absent) + bool ExitBlockFound = false; + // Find the root group of the G and compress the path from G to the root. BBInfo *findAndCompressGroup(BBInfo *G) { if (G->Group != G) @@ -95,14 +99,20 @@ template class CFGMST { void buildEdges() { DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); - const BasicBlock *BB = &(F.getEntryBlock()); + const BasicBlock *Entry = &(F.getEntryBlock()); uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + Edge *EntryIncoming = nullptr, *EntryOutgoing = nullptr, + *ExitOutgoing = nullptr, *ExitIncoming = nullptr; + uint64_t MaxEntryOutWeight = 0, MaxExitOutWeight = 0, MaxExitInWeight = 0; + // Add a fake edge to the entry. - addEdge(nullptr, BB, EntryWeight); + EntryIncoming = &addEdge(nullptr, Entry, EntryWeight); + DEBUG(dbgs() << " Edge: from fake node to " << Entry->getName() + << " w = " << EntryWeight << "\n"); // Special handling for single BB functions. - if (succ_empty(BB)) { - addEdge(BB, nullptr, EntryWeight); + if (succ_empty(Entry)) { + addEdge(Entry, nullptr, EntryWeight); return; } @@ -126,16 +136,62 @@ template class CFGMST { } if (BPI != nullptr) Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); - addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; + auto *E = &addEdge(&*BB, TargetBB, Weight); + E->IsCritical = Critical; DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " << TargetBB->getName() << " w=" << Weight << "\n"); + + // Keep track of entry/exit edges: + if (&*BB == Entry) { + if (Weight > MaxEntryOutWeight) { + MaxEntryOutWeight = Weight; + EntryOutgoing = E; + } + } + + auto *TargetTI = TargetBB->getTerminator(); + if (TargetTI && !TargetTI->getNumSuccessors()) { + if (Weight > MaxExitInWeight) { + MaxExitInWeight = Weight; + ExitIncoming = E; + } + } } } else { - addEdge(&*BB, nullptr, BBWeight); - DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit" + ExitBlockFound = true; + Edge *ExitO = &addEdge(&*BB, nullptr, BBWeight); + if (BBWeight > MaxExitOutWeight) { + MaxExitOutWeight = BBWeight; + ExitOutgoing = ExitO; + } + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to fake exit" << " w = " << BBWeight << "\n"); } } + + // Entry/exit edge adjustment heurisitic: + // prefer instrumenting entry edge over exit edge + // if possible. Those exit edges may never have a chance to be + // executed (for instance the program is an event handling loop) + // before the profile is asynchronously dumped. + // + // If EntryIncoming and ExitOutgoing has similar weight, make sure + // ExitOutging is selected as the min-edge. Similarly, if EntryOutgoing + // and ExitIncoming has similar weight, make sure ExitIncoming becomes + // the min-edge. + uint64_t EntryInWeight = EntryWeight; + + if (EntryInWeight >= MaxExitOutWeight && + EntryInWeight * 2 < MaxExitOutWeight * 3) { + EntryIncoming->Weight = MaxExitOutWeight; + ExitOutgoing->Weight = EntryInWeight + 1; + } + + if (MaxEntryOutWeight >= MaxExitInWeight && + MaxEntryOutWeight * 2 < MaxExitInWeight * 3) { + EntryOutgoing->Weight = MaxExitInWeight; + ExitIncoming->Weight = MaxEntryOutWeight + 1; + } } // Sort CFG edges based on its weight. @@ -167,6 +223,10 @@ template class CFGMST { for (auto &Ei : AllEdges) { if (Ei->Removed) continue; + // If we detect infinite loops, force + // instrumenting the entry edge: + if (!ExitBlockFound && Ei->SrcBB == nullptr) + continue; if (unionGroups(Ei->SrcBB, Ei->DestBB)) Ei->InMST = true; } diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index f2806e278e6e..66fdcb3ccc49 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMInstrumentation SanitizerCoverage.cpp ThreadSanitizer.cpp EfficiencySanitizer.cpp + HWAddressSanitizer.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 09bcbb282653..9c90d27d6d52 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1382,20 +1382,19 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { Value *LenShadow = IRB.CreateMul( I.getLength(), ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); - Value *AlignShadow; - if (ClPreserveAlignment) { - AlignShadow = IRB.CreateMul(I.getAlignmentCst(), - ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8)); - } else { - AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8); - } Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); - IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow, - AlignShadow, I.getVolatileCst()}); + auto *MTI = cast( + IRB.CreateCall(I.getCalledValue(), + {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); + // FIXME: Set the source & dest alignments of MTI based on the separate + // source & dest alignments of I + if (ClPreserveAlignment) { + MTI->setAlignment(I.getAlignment() * (DFSF.DFS.ShadowWidth / 8)); + } else { + MTI->setAlignment(DFSF.DFS.ShadowWidth / 8); + } } void DFSanVisitor::visitReturnInst(ReturnInst &RI) { diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp new file mode 100644 index 000000000000..df2fe37a6d43 --- /dev/null +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -0,0 +1,558 @@ +//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file is a part of HWAddressSanitizer, an address sanity checker +/// based on tagged addressing. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" + +using namespace llvm; + +#define DEBUG_TYPE "hwasan" + +static const char *const kHwasanModuleCtorName = "hwasan.module_ctor"; +static const char *const kHwasanInitName = "__hwasan_init"; + +// Accesses sizes are powers of two: 1, 2, 4, 8, 16. +static const size_t kNumberOfAccessSizes = 5; + +static const size_t kShadowScale = 4; +static const unsigned kAllocaAlignment = 1U << kShadowScale; +static const unsigned kPointerTagShift = 56; + +static cl::opt ClMemoryAccessCallbackPrefix( + "hwasan-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), cl::Hidden, + cl::init("__hwasan_")); + +static cl::opt + ClInstrumentWithCalls("hwasan-instrument-with-calls", + cl::desc("instrument reads and writes with callbacks"), + cl::Hidden, cl::init(false)); + +static cl::opt ClInstrumentReads("hwasan-instrument-reads", + cl::desc("instrument read instructions"), + cl::Hidden, cl::init(true)); + +static cl::opt ClInstrumentWrites( + "hwasan-instrument-writes", cl::desc("instrument write instructions"), + cl::Hidden, cl::init(true)); + +static cl::opt ClInstrumentAtomics( + "hwasan-instrument-atomics", + cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, + cl::init(true)); + +static cl::opt ClRecover( + "hwasan-recover", + cl::desc("Enable recovery mode (continue-after-error)."), + cl::Hidden, cl::init(false)); + +static cl::opt ClInstrumentStack("hwasan-instrument-stack", + cl::desc("instrument stack (allocas)"), + cl::Hidden, cl::init(true)); + +static cl::opt ClGenerateTagsWithCalls( + "hwasan-generate-tags-with-calls", + cl::desc("generate new tags with runtime library calls"), cl::Hidden, + cl::init(false)); + +static cl::opt ClMappingOffset( + "hwasan-mapping-offset", + cl::desc("offset of hwasan shadow mapping [EXPERIMENTAL]"), cl::Hidden, + cl::init(0)); + +static cl::opt ClEnableKhwasan( + "hwasan-kernel", cl::desc("Enable KernelHWAddressSanitizer instrumentation"), + cl::Hidden, cl::init(false)); + +namespace { + +/// \brief An instrumentation pass implementing detection of addressability bugs +/// using tagged pointers. +class HWAddressSanitizer : public FunctionPass { +public: + // Pass identification, replacement for typeid. + static char ID; + + HWAddressSanitizer(bool Recover = false) + : FunctionPass(ID), Recover(Recover || ClRecover) {} + + StringRef getPassName() const override { return "HWAddressSanitizer"; } + + bool runOnFunction(Function &F) override; + bool doInitialization(Module &M) override; + + void initializeCallbacks(Module &M); + void instrumentMemAccessInline(Value *PtrLong, bool IsWrite, + unsigned AccessSizeIndex, + Instruction *InsertBefore); + bool instrumentMemAccess(Instruction *I); + Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + uint64_t *TypeSize, unsigned *Alignment, + Value **MaybeMask); + + bool isInterestingAlloca(const AllocaInst &AI); + bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag); + bool instrumentStack(SmallVectorImpl &Allocas, + SmallVectorImpl &RetVec); + Value *getNextTagWithCall(IRBuilder<> &IRB); + Value *getStackBaseTag(IRBuilder<> &IRB); + Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI, + unsigned AllocaNo); + Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); + +private: + LLVMContext *C; + Type *IntptrTy; + Type *Int8Ty; + + bool Recover; + + Function *HwasanCtorFunction; + + Function *HwasanMemoryAccessCallback[2][kNumberOfAccessSizes]; + Function *HwasanMemoryAccessCallbackSized[2]; + + Function *HwasanTagMemoryFunc; + Function *HwasanGenerateTagFunc; +}; + +} // end anonymous namespace + +char HWAddressSanitizer::ID = 0; + +INITIALIZE_PASS_BEGIN( + HWAddressSanitizer, "hwasan", + "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false) +INITIALIZE_PASS_END( + HWAddressSanitizer, "hwasan", + "HWAddressSanitizer: detect memory bugs using tagged addressing.", false, false) + +FunctionPass *llvm::createHWAddressSanitizerPass(bool Recover) { + return new HWAddressSanitizer(Recover); +} + +/// \brief Module-level initialization. +/// +/// inserts a call to __hwasan_init to the module's constructor list. +bool HWAddressSanitizer::doInitialization(Module &M) { + DEBUG(dbgs() << "Init " << M.getName() << "\n"); + auto &DL = M.getDataLayout(); + + Triple TargetTriple(M.getTargetTriple()); + + C = &(M.getContext()); + IRBuilder<> IRB(*C); + IntptrTy = IRB.getIntPtrTy(DL); + Int8Ty = IRB.getInt8Ty(); + + HwasanCtorFunction = nullptr; + if (!ClEnableKhwasan) { + std::tie(HwasanCtorFunction, std::ignore) = + createSanitizerCtorAndInitFunctions(M, kHwasanModuleCtorName, + kHwasanInitName, + /*InitArgTypes=*/{}, + /*InitArgs=*/{}); + appendToGlobalCtors(M, HwasanCtorFunction, 0); + } + return true; +} + +void HWAddressSanitizer::initializeCallbacks(Module &M) { + IRBuilder<> IRB(*C); + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + const std::string TypeStr = AccessIsWrite ? "store" : "load"; + const std::string EndingStr = Recover ? "_noabort" : ""; + + HwasanMemoryAccessCallbackSized[AccessIsWrite] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr, + FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false))); + + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = + checkSanitizerInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + TypeStr + + itostr(1ULL << AccessSizeIndex) + EndingStr, + FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false))); + } + } + + HwasanTagMemoryFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction( + "__hwasan_tag_memory", IRB.getVoidTy(), IntptrTy, Int8Ty, IntptrTy)); + HwasanGenerateTagFunc = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty)); +} + +Value *HWAddressSanitizer::isInterestingMemoryAccess(Instruction *I, + bool *IsWrite, + uint64_t *TypeSize, + unsigned *Alignment, + Value **MaybeMask) { + // Skip memory accesses inserted by another instrumentation. + if (I->getMetadata("nosanitize")) return nullptr; + + Value *PtrOperand = nullptr; + const DataLayout &DL = I->getModule()->getDataLayout(); + if (LoadInst *LI = dyn_cast(I)) { + if (!ClInstrumentReads) return nullptr; + *IsWrite = false; + *TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); + *Alignment = LI->getAlignment(); + PtrOperand = LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(I)) { + if (!ClInstrumentWrites) return nullptr; + *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); + *Alignment = SI->getAlignment(); + PtrOperand = SI->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast(I)) { + if (!ClInstrumentAtomics) return nullptr; + *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); + *Alignment = 0; + PtrOperand = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { + if (!ClInstrumentAtomics) return nullptr; + *IsWrite = true; + *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); + *Alignment = 0; + PtrOperand = XCHG->getPointerOperand(); + } + + if (PtrOperand) { + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast(PtrOperand->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return nullptr; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (PtrOperand->isSwiftError()) + return nullptr; + } + + return PtrOperand; +} + +static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { + size_t Res = countTrailingZeros(TypeSize / 8); + assert(Res < kNumberOfAccessSizes); + return Res; +} + +void HWAddressSanitizer::instrumentMemAccessInline(Value *PtrLong, bool IsWrite, + unsigned AccessSizeIndex, + Instruction *InsertBefore) { + IRBuilder<> IRB(InsertBefore); + Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, kPointerTagShift), IRB.getInt8Ty()); + Value *AddrLong = + IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(), + ~(0xFFULL << kPointerTagShift))); + Value *ShadowLong = IRB.CreateLShr(AddrLong, kShadowScale); + if (ClMappingOffset) + ShadowLong = IRB.CreateAdd( + ShadowLong, ConstantInt::get(PtrLong->getType(), ClMappingOffset, + /*isSigned=*/false)); + Value *MemTag = + IRB.CreateLoad(IRB.CreateIntToPtr(ShadowLong, IRB.getInt8PtrTy())); + Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag); + + TerminatorInst *CheckTerm = + SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, !Recover, + MDBuilder(*C).createBranchWeights(1, 100000)); + + IRB.SetInsertPoint(CheckTerm); + // The signal handler will find the data address in x0. + InlineAsm *Asm = InlineAsm::get( + FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false), + "hlt #" + + itostr(0x100 + Recover * 0x20 + IsWrite * 0x10 + AccessSizeIndex), + "{x0}", + /*hasSideEffects=*/true); + IRB.CreateCall(Asm, PtrLong); +} + +bool HWAddressSanitizer::instrumentMemAccess(Instruction *I) { + DEBUG(dbgs() << "Instrumenting: " << *I << "\n"); + bool IsWrite = false; + unsigned Alignment = 0; + uint64_t TypeSize = 0; + Value *MaybeMask = nullptr; + Value *Addr = + isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask); + + if (!Addr) + return false; + + if (MaybeMask) + return false; //FIXME + + IRBuilder<> IRB(I); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (isPowerOf2_64(TypeSize) && + (TypeSize / 8 <= (1UL << (kNumberOfAccessSizes - 1))) && + (Alignment >= (1UL << kShadowScale) || Alignment == 0 || + Alignment >= TypeSize / 8)) { + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + if (ClInstrumentWithCalls) { + IRB.CreateCall(HwasanMemoryAccessCallback[IsWrite][AccessSizeIndex], + AddrLong); + } else { + instrumentMemAccessInline(AddrLong, IsWrite, AccessSizeIndex, I); + } + } else { + IRB.CreateCall(HwasanMemoryAccessCallbackSized[IsWrite], + {AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8)}); + } + + return true; +} + +static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) { + uint64_t ArraySize = 1; + if (AI.isArrayAllocation()) { + const ConstantInt *CI = dyn_cast(AI.getArraySize()); + assert(CI && "non-constant array size"); + ArraySize = CI->getZExtValue(); + } + Type *Ty = AI.getAllocatedType(); + uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty); + return SizeInBytes * ArraySize; +} + +bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, + Value *Tag) { + size_t Size = (getAllocaSizeInBytes(*AI) + kAllocaAlignment - 1) & + ~(kAllocaAlignment - 1); + + Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty()); + if (ClInstrumentWithCalls) { + IRB.CreateCall(HwasanTagMemoryFunc, + {IRB.CreatePointerCast(AI, IntptrTy), JustTag, + ConstantInt::get(IntptrTy, Size)}); + } else { + size_t ShadowSize = Size >> kShadowScale; + Value *ShadowPtr = IRB.CreateIntToPtr( + IRB.CreateLShr(IRB.CreatePointerCast(AI, IntptrTy), kShadowScale), + IRB.getInt8PtrTy()); + // If this memset is not inlined, it will be intercepted in the hwasan + // runtime library. That's OK, because the interceptor skips the checks if + // the address is in the shadow region. + // FIXME: the interceptor is not as fast as real memset. Consider lowering + // llvm.memset right here into either a sequence of stores, or a call to + // hwasan_tag_memory. + IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, /*Align=*/1); + } + return true; +} + +static unsigned RetagMask(unsigned AllocaNo) { + // A list of 8-bit numbers that have at most one run of non-zero bits. + // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these + // masks. + // The list does not include the value 255, which is used for UAR. + static unsigned FastMasks[] = { + 0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24, + 28, 30, 31, 32, 48, 56, 60, 62, 63, 64, 96, 112, 120, + 124, 126, 127, 128, 192, 224, 240, 248, 252, 254}; + return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))]; +} + +Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) { + return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy); +} + +Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { + if (ClGenerateTagsWithCalls) + return nullptr; + // FIXME: use addressofreturnaddress (but implement it in aarch64 backend + // first). + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + auto GetStackPointerFn = + Intrinsic::getDeclaration(M, Intrinsic::frameaddress); + Value *StackPointer = IRB.CreateCall( + GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())}); + + // Extract some entropy from the stack pointer for the tags. + // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ + // between functions). + Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy); + Value *StackTag = + IRB.CreateXor(StackPointerLong, IRB.CreateLShr(StackPointerLong, 20), + "hwasan.stack.base.tag"); + return StackTag; +} + +Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag, + AllocaInst *AI, unsigned AllocaNo) { + if (ClGenerateTagsWithCalls) + return getNextTagWithCall(IRB); + return IRB.CreateXor(StackTag, + ConstantInt::get(IntptrTy, RetagMask(AllocaNo))); +} + +Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) { + if (ClGenerateTagsWithCalls) + return getNextTagWithCall(IRB); + return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, 0xFFU)); +} + +bool HWAddressSanitizer::instrumentStack( + SmallVectorImpl &Allocas, + SmallVectorImpl &RetVec) { + Function *F = Allocas[0]->getParent()->getParent(); + Instruction *InsertPt = &*F->getEntryBlock().begin(); + IRBuilder<> IRB(InsertPt); + + Value *StackTag = getStackBaseTag(IRB); + + // Ideally, we want to calculate tagged stack base pointer, and rewrite all + // alloca addresses using that. Unfortunately, offsets are not known yet + // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a + // temp, shift-OR it into each alloca address and xor with the retag mask. + // This generates one extra instruction per alloca use. + for (unsigned N = 0; N < Allocas.size(); ++N) { + auto *AI = Allocas[N]; + IRB.SetInsertPoint(AI->getNextNode()); + + // Replace uses of the alloca with tagged address. + Value *Tag = getAllocaTag(IRB, StackTag, AI, N); + Value *AILong = IRB.CreatePointerCast(AI, IntptrTy); + std::string Name = + AI->hasName() ? AI->getName().str() : "alloca." + itostr(N); + Value *Replacement = IRB.CreateIntToPtr( + IRB.CreateOr(AILong, IRB.CreateShl(Tag, kPointerTagShift)), + AI->getType(), Name + ".hwasan"); + + for (auto UI = AI->use_begin(), UE = AI->use_end(); UI != UE;) { + Use &U = *UI++; + if (U.getUser() != AILong) + U.set(Replacement); + } + + tagAlloca(IRB, AI, Tag); + + for (auto RI : RetVec) { + IRB.SetInsertPoint(RI); + + // Re-tag alloca memory with the special UAR tag. + Value *Tag = getUARTag(IRB, StackTag); + tagAlloca(IRB, AI, Tag); + } + } + + return true; +} + +bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { + return (AI.getAllocatedType()->isSized() && + // FIXME: instrument dynamic allocas, too + AI.isStaticAlloca() && + // alloca() may be called with 0 size, ignore it. + getAllocaSizeInBytes(AI) > 0 && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + !isAllocaPromotable(&AI) && + // inalloca allocas are not treated as static, and we don't want + // dynamic alloca instrumentation for them as well. + !AI.isUsedWithInAlloca() && + // swifterror allocas are register promoted by ISel + !AI.isSwiftError()); +} + +bool HWAddressSanitizer::runOnFunction(Function &F) { + if (&F == HwasanCtorFunction) + return false; + + if (!F.hasFnAttribute(Attribute::SanitizeHWAddress)) + return false; + + DEBUG(dbgs() << "Function: " << F.getName() << "\n"); + + initializeCallbacks(*F.getParent()); + + bool Changed = false; + SmallVector ToInstrument; + SmallVector AllocasToInstrument; + SmallVector RetVec; + for (auto &BB : F) { + for (auto &Inst : BB) { + if (ClInstrumentStack) + if (AllocaInst *AI = dyn_cast(&Inst)) { + // Realign all allocas. We don't want small uninteresting allocas to + // hide in instrumented alloca's padding. + if (AI->getAlignment() < kAllocaAlignment) + AI->setAlignment(kAllocaAlignment); + // Instrument some of them. + if (isInterestingAlloca(*AI)) + AllocasToInstrument.push_back(AI); + continue; + } + + if (isa(Inst) || isa(Inst) || isa(Inst)) + RetVec.push_back(&Inst); + + Value *MaybeMask = nullptr; + bool IsWrite; + unsigned Alignment; + uint64_t TypeSize; + Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize, + &Alignment, &MaybeMask); + if (Addr || isa(Inst)) + ToInstrument.push_back(&Inst); + } + } + + if (!AllocasToInstrument.empty()) + Changed |= instrumentStack(AllocasToInstrument, RetVec); + + for (auto Inst : ToInstrument) + Changed |= instrumentMemAccess(Inst); + + return Changed; +} diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 8b9bbb499558..49b8a67a6c14 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CallPromotionUtils.h" #include #include #include @@ -214,49 +215,6 @@ class ICallPromotionFunc { } // end anonymous namespace -bool llvm::isLegalToPromote(Instruction *Inst, Function *F, - const char **Reason) { - // Check the return type. - Type *CallRetType = Inst->getType(); - if (!CallRetType->isVoidTy()) { - Type *FuncRetType = F->getReturnType(); - if (FuncRetType != CallRetType && - !CastInst::isBitCastable(FuncRetType, CallRetType)) { - if (Reason) - *Reason = "Return type mismatch"; - return false; - } - } - - // Check if the arguments are compatible with the parameters - FunctionType *DirectCalleeType = F->getFunctionType(); - unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); - CallSite CS(Inst); - unsigned ArgNum = CS.arg_size(); - - if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) { - if (Reason) - *Reason = "The number of arguments mismatch"; - return false; - } - - for (unsigned I = 0; I < ParamNum; ++I) { - Type *PTy = DirectCalleeType->getFunctionParamType(I); - Type *ATy = CS.getArgument(I)->getType(); - if (PTy == ATy) - continue; - if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) { - if (Reason) - *Reason = "Argument type mismatch"; - return false; - } - } - - DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to " - << F->getName() << "\n"); - return true; -} - // Indirect-call promotion heuristic. The direct targets are sorted based on // the count. Stop at the first target that is not promoted. std::vector @@ -317,7 +275,7 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( } const char *Reason = nullptr; - if (!isLegalToPromote(Inst, TargetFunction, &Reason)) { + if (!isLegalToPromote(CallSite(Inst), TargetFunction, &Reason)) { using namespace ore; ORE.emit([&]() { @@ -335,23 +293,11 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( return Ret; } -// Create a diamond structure for If_Then_Else. Also update the profile -// count. Do the fix-up for the invoke instruction. -static void createIfThenElse(Instruction *Inst, Function *DirectCallee, - uint64_t Count, uint64_t TotalCount, - BasicBlock **DirectCallBB, - BasicBlock **IndirectCallBB, - BasicBlock **MergeBB) { - CallSite CS(Inst); - Value *OrigCallee = CS.getCalledValue(); - - IRBuilder<> BBBuilder(Inst); - LLVMContext &Ctx = Inst->getContext(); - Value *BCI1 = - BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), ""); - Value *BCI2 = - BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), ""); - Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, ""); +Instruction *llvm::pgo::promoteIndirectCall(Instruction *Inst, + Function *DirectCallee, + uint64_t Count, uint64_t TotalCount, + bool AttachProfToDirectCall, + OptimizationRemarkEmitter *ORE) { uint64_t ElseCount = TotalCount - Count; uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount); @@ -359,231 +305,9 @@ static void createIfThenElse(Instruction *Inst, Function *DirectCallee, MDBuilder MDB(Inst->getContext()); MDNode *BranchWeights = MDB.createBranchWeights( scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale)); - TerminatorInst *ThenTerm, *ElseTerm; - SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm, - BranchWeights); - *DirectCallBB = ThenTerm->getParent(); - (*DirectCallBB)->setName("if.true.direct_targ"); - *IndirectCallBB = ElseTerm->getParent(); - (*IndirectCallBB)->setName("if.false.orig_indirect"); - *MergeBB = Inst->getParent(); - (*MergeBB)->setName("if.end.icp"); - - // Special handing of Invoke instructions. - InvokeInst *II = dyn_cast(Inst); - if (!II) - return; - - // We don't need branch instructions for invoke. - ThenTerm->eraseFromParent(); - ElseTerm->eraseFromParent(); - - // Add jump from Merge BB to the NormalDest. This is needed for the newly - // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB. - BranchInst::Create(II->getNormalDest(), *MergeBB); -} - -// Find the PHI in BB that have the CallResult as the operand. -static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) { - BasicBlock *From = Inst->getParent(); - for (auto &I : *BB) { - PHINode *PHI = dyn_cast(&I); - if (!PHI) - continue; - int IX = PHI->getBasicBlockIndex(From); - if (IX == -1) - continue; - Value *V = PHI->getIncomingValue(IX); - if (dyn_cast(V) == Inst) - return true; - } - return false; -} - -// This method fixes up PHI nodes in BB where BB is the UnwindDest of an -// invoke instruction. In BB, there may be PHIs with incoming block being -// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke -// instructions to its own BB, OrigBB is no longer the predecessor block of BB. -// Instead two new predecessors are added: IndirectCallBB and DirectCallBB, -// so the PHI node's incoming BBs need to be fixed up accordingly. -static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB, - BasicBlock *OrigBB, - BasicBlock *IndirectCallBB, - BasicBlock *DirectCallBB) { - for (auto &I : *BB) { - PHINode *PHI = dyn_cast(&I); - if (!PHI) - continue; - int IX = PHI->getBasicBlockIndex(OrigBB); - if (IX == -1) - continue; - Value *V = PHI->getIncomingValue(IX); - PHI->addIncoming(V, IndirectCallBB); - PHI->setIncomingBlock(IX, DirectCallBB); - } -} - -// This method fixes up PHI nodes in BB where BB is the NormalDest of an -// invoke instruction. In BB, there may be PHIs with incoming block being -// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke -// instructions to its own BB, a new incoming edge will be added to the original -// NormalDstBB from the IndirectCallBB. -static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB, - BasicBlock *OrigBB, - BasicBlock *IndirectCallBB, - Instruction *NewInst) { - for (auto &I : *BB) { - PHINode *PHI = dyn_cast(&I); - if (!PHI) - continue; - int IX = PHI->getBasicBlockIndex(OrigBB); - if (IX == -1) - continue; - Value *V = PHI->getIncomingValue(IX); - if (dyn_cast(V) == Inst) { - PHI->setIncomingBlock(IX, IndirectCallBB); - PHI->addIncoming(NewInst, OrigBB); - continue; - } - PHI->addIncoming(V, IndirectCallBB); - } -} - -// Add a bitcast instruction to the direct-call return value if needed. -static Instruction *insertCallRetCast(const Instruction *Inst, - Instruction *DirectCallInst, - Function *DirectCallee) { - if (Inst->getType()->isVoidTy()) - return DirectCallInst; - - Type *CallRetType = Inst->getType(); - Type *FuncRetType = DirectCallee->getReturnType(); - if (FuncRetType == CallRetType) - return DirectCallInst; - - BasicBlock *InsertionBB; - if (CallInst *CI = dyn_cast(DirectCallInst)) - InsertionBB = CI->getParent(); - else - InsertionBB = (dyn_cast(DirectCallInst))->getNormalDest(); - - return (new BitCastInst(DirectCallInst, CallRetType, "", - InsertionBB->getTerminator())); -} - -// Create a DirectCall instruction in the DirectCallBB. -// Parameter Inst is the indirect-call (invoke) instruction. -// DirectCallee is the decl of the direct-call (invoke) target. -// DirecallBB is the BB that the direct-call (invoke) instruction is inserted. -// MergeBB is the bottom BB of the if-then-else-diamond after the -// transformation. For invoke instruction, the edges from DirectCallBB and -// IndirectCallBB to MergeBB are removed before this call (during -// createIfThenElse). Stores the pointer to the Instruction that cast -// the direct call in \p CastInst. -static Instruction *createDirectCallInst(const Instruction *Inst, - Function *DirectCallee, - BasicBlock *DirectCallBB, - BasicBlock *MergeBB, - Instruction *&CastInst) { - Instruction *NewInst = Inst->clone(); - if (CallInst *CI = dyn_cast(NewInst)) { - CI->setCalledFunction(DirectCallee); - CI->mutateFunctionType(DirectCallee->getFunctionType()); - } else { - // Must be an invoke instruction. Direct invoke's normal destination is - // fixed up to MergeBB. MergeBB is the place where return cast is inserted. - // Also since IndirectCallBB does not have an edge to MergeBB, there is no - // need to insert new PHIs into MergeBB. - InvokeInst *II = dyn_cast(NewInst); - assert(II); - II->setCalledFunction(DirectCallee); - II->mutateFunctionType(DirectCallee->getFunctionType()); - II->setNormalDest(MergeBB); - } - - DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(), - NewInst); - - // Clear the value profile data. - NewInst->setMetadata(LLVMContext::MD_prof, nullptr); - CallSite NewCS(NewInst); - FunctionType *DirectCalleeType = DirectCallee->getFunctionType(); - unsigned ParamNum = DirectCalleeType->getFunctionNumParams(); - for (unsigned I = 0; I < ParamNum; ++I) { - Type *ATy = NewCS.getArgument(I)->getType(); - Type *PTy = DirectCalleeType->getParamType(I); - if (ATy != PTy) { - BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst); - NewCS.setArgument(I, BI); - } - } - - CastInst = insertCallRetCast(Inst, NewInst, DirectCallee); - return NewInst; -} - -// Create a PHI to unify the return values of calls. -static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult, - Function *DirectCallee) { - if (Inst->getType()->isVoidTy()) - return; - - if (Inst->use_empty()) - return; - - BasicBlock *RetValBB = CallResult->getParent(); - - BasicBlock *PHIBB; - if (InvokeInst *II = dyn_cast(CallResult)) - RetValBB = II->getNormalDest(); - - PHIBB = RetValBB->getSingleSuccessor(); - if (getCallRetPHINode(PHIBB, Inst)) - return; - PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0); - PHIBB->getInstList().push_front(CallRetPHI); - Inst->replaceAllUsesWith(CallRetPHI); - CallRetPHI->addIncoming(Inst, Inst->getParent()); - CallRetPHI->addIncoming(CallResult, RetValBB); -} - -// This function does the actual indirect-call promotion transformation: -// For an indirect-call like: -// Ret = (*Foo)(Args); -// It transforms to: -// if (Foo == DirectCallee) -// Ret1 = DirectCallee(Args); -// else -// Ret2 = (*Foo)(Args); -// Ret = phi(Ret1, Ret2); -// It adds type casts for the args do not match the parameters and the return -// value. Branch weights metadata also updated. -// If \p AttachProfToDirectCall is true, a prof metadata is attached to the -// new direct call to contain \p Count. This is used by SamplePGO inliner to -// check callsite hotness. -// Returns the promoted direct call instruction. -Instruction *llvm::promoteIndirectCall(Instruction *Inst, - Function *DirectCallee, uint64_t Count, - uint64_t TotalCount, - bool AttachProfToDirectCall, - OptimizationRemarkEmitter *ORE) { - assert(DirectCallee != nullptr); - BasicBlock *BB = Inst->getParent(); - // Just to suppress the non-debug build warning. - (void)BB; - DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); - DEBUG(dbgs() << *BB << "\n"); - - BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB; - createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, - &IndirectCallBB, &MergeBB); - - // If the return type of the NewInst is not the same as the Inst, a CastInst - // is needed for type casting. Otherwise CastInst is the same as NewInst. - Instruction *CastInst = nullptr; Instruction *NewInst = - createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB, CastInst); + promoteCallWithIfThenElse(CallSite(Inst), DirectCallee, BranchWeights); if (AttachProfToDirectCall) { SmallVector Weights; @@ -592,33 +316,6 @@ Instruction *llvm::promoteIndirectCall(Instruction *Inst, NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } - // Move Inst from MergeBB to IndirectCallBB. - Inst->removeFromParent(); - IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(), - Inst); - - if (InvokeInst *II = dyn_cast(Inst)) { - // At this point, the original indirect invoke instruction has the original - // UnwindDest and NormalDest. For the direct invoke instruction, the - // NormalDest points to MergeBB, and MergeBB jumps to the original - // NormalDest. MergeBB might have a new bitcast instruction for the return - // value. The PHIs are with the original NormalDest. Since we now have two - // incoming edges to NormalDest and UnwindDest, we have to do some fixups. - // - // UnwindDest will not use the return value. So pass nullptr here. - fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB, - DirectCallBB); - // We don't need to update the operand from NormalDest for DirectCallBB. - // Pass nullptr here. - fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, - IndirectCallBB, CastInst); - } - - insertCallRetPHI(Inst, CastInst, DirectCallee); - - DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); - DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); - using namespace ore; if (ORE) @@ -639,8 +336,8 @@ uint32_t ICallPromotionFunc::tryToPromote( for (auto &C : Candidates) { uint64_t Count = C.Count; - promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO, - &ORE); + pgo::promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, + SamplePGO, &ORE); assert(TotalCount >= Count); TotalCount -= Count; NumOfPGOICallPromotion++; diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index db8fa8977947..9b70f95480e4 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -43,7 +43,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include @@ -245,6 +244,9 @@ class PGOCounterPromoter { } bool run(int64_t *NumPromoted) { + // Skip 'infinite' loops: + if (ExitBlocks.size() == 0) + return false; unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); if (MaxProm == 0) return false; diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index ed5e9dba3966..8e9eea96ced7 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -66,6 +66,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializePGOMemOPSizeOptLegacyPassPass(Registry); initializeInstrProfilingLegacyPassPass(Registry); initializeMemorySanitizerPass(Registry); + initializeHWAddressSanitizerPass(Registry); initializeThreadSanitizerPass(Registry); initializeSanitizerCoverageModulePass(Registry); initializeDataFlowSanitizerPass(Registry); diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 44190a2c312d..b3c39b5b1665 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -320,6 +320,14 @@ static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = { 0x380000000000, // OriginBase }; +// x86_64 NetBSD +static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = { + 0, // AndMask + 0x500000000000, // XorMask + 0, // ShadowBase + 0x100000000000, // OriginBase +}; + static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { &Linux_I386_MemoryMapParams, &Linux_X86_64_MemoryMapParams, @@ -345,6 +353,11 @@ static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_X86_64_MemoryMapParams, }; +static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = { + nullptr, + &NetBSD_X86_64_MemoryMapParams, +}; + namespace { /// \brief An instrumentation pass implementing detection of uninitialized @@ -577,6 +590,15 @@ bool MemorySanitizer::doInitialization(Module &M) { report_fatal_error("unsupported architecture"); } break; + case Triple::NetBSD: + switch (TargetTriple.getArch()) { + case Triple::x86_64: + MapParams = NetBSD_X86_MemoryMapParams.bits64; + break; + default: + report_fatal_error("unsupported architecture"); + } + break; case Triple::Linux: switch (TargetTriple.getArch()) { case Triple::x86_64: @@ -777,21 +799,19 @@ struct MemorySanitizerVisitor : public InstVisitor { } void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, - unsigned Alignment, bool AsCall) { + Value *OriginPtr, unsigned Alignment, bool AsCall) { const DataLayout &DL = F.getParent()->getDataLayout(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType()); if (Shadow->getType()->isAggregateType()) { - paintOrigin(IRB, updateOrigin(Origin, IRB), - getOriginPtr(Addr, IRB, Alignment), StoreSize, + paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize, OriginAlignment); } else { Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); Constant *ConstantShadow = dyn_cast_or_null(ConvertedShadow); if (ConstantShadow) { if (ClCheckConstantShadow && !ConstantShadow->isZeroValue()) - paintOrigin(IRB, updateOrigin(Origin, IRB), - getOriginPtr(Addr, IRB, Alignment), StoreSize, + paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize, OriginAlignment); return; } @@ -812,8 +832,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Instruction *CheckTerm = SplitBlockAndInsertIfThen( Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights); IRBuilder<> IRBNew(CheckTerm); - paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), - getOriginPtr(Addr, IRBNew, Alignment), StoreSize, + paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize, OriginAlignment); } } @@ -825,10 +844,14 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Val = SI->getValueOperand(); Value *Addr = SI->getPointerOperand(); Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val); - Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); - - StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, SI->getAlignment()); + Value *ShadowPtr, *OriginPtr; + Type *ShadowTy = Shadow->getType(); + unsigned Alignment = SI->getAlignment(); + unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); + std::tie(ShadowPtr, OriginPtr) = + getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); + + StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); if (ClCheckAccessAddress) @@ -838,8 +861,8 @@ struct MemorySanitizerVisitor : public InstVisitor { SI->setOrdering(addReleaseOrdering(SI->getOrdering())); if (MS.TrackOrigins && !SI->isAtomic()) - storeOrigin(IRB, Addr, Shadow, getOrigin(Val), SI->getAlignment(), - InstrumentWithCalls); + storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr, + OriginAlignment, InstrumentWithCalls); } } @@ -1018,39 +1041,50 @@ struct MemorySanitizerVisitor : public InstVisitor { return OffsetLong; } - /// \brief Compute the shadow address that corresponds to a given application - /// address. + /// \brief Compute the shadow and origin addresses corresponding to a given + /// application address. /// /// Shadow = ShadowBase + Offset - Value *getShadowPtr(Value *Addr, Type *ShadowTy, - IRBuilder<> &IRB) { - Value *ShadowLong = getShadowPtrOffset(Addr, IRB); + /// Origin = (OriginBase + Offset) & ~3ULL + std::pair getShadowOriginPtrUserspace( + Value *Addr, IRBuilder<> &IRB, Type *ShadowTy, unsigned Alignment, + Instruction **FirstInsn) { + Value *ShadowOffset = getShadowPtrOffset(Addr, IRB); + Value *ShadowLong = ShadowOffset; uint64_t ShadowBase = MS.MapParams->ShadowBase; - if (ShadowBase != 0) + *FirstInsn = dyn_cast(ShadowLong); + if (ShadowBase != 0) { ShadowLong = IRB.CreateAdd(ShadowLong, ConstantInt::get(MS.IntptrTy, ShadowBase)); - return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); + } + Value *ShadowPtr = + IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0)); + Value *OriginPtr = nullptr; + if (MS.TrackOrigins) { + Value *OriginLong = ShadowOffset; + uint64_t OriginBase = MS.MapParams->OriginBase; + if (OriginBase != 0) + OriginLong = IRB.CreateAdd(OriginLong, + ConstantInt::get(MS.IntptrTy, OriginBase)); + if (Alignment < kMinOriginAlignment) { + uint64_t Mask = kMinOriginAlignment - 1; + OriginLong = + IRB.CreateAnd(OriginLong, ConstantInt::get(MS.IntptrTy, ~Mask)); + } + OriginPtr = + IRB.CreateIntToPtr(OriginLong, PointerType::get(IRB.getInt32Ty(), 0)); + } + return std::make_pair(ShadowPtr, OriginPtr); } - /// \brief Compute the origin address that corresponds to a given application - /// address. - /// - /// OriginAddr = (OriginBase + Offset) & ~3ULL - Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB, unsigned Alignment) { - Value *OriginLong = getShadowPtrOffset(Addr, IRB); - uint64_t OriginBase = MS.MapParams->OriginBase; - if (OriginBase != 0) - OriginLong = - IRB.CreateAdd(OriginLong, - ConstantInt::get(MS.IntptrTy, OriginBase)); - if (Alignment < kMinOriginAlignment) { - uint64_t Mask = kMinOriginAlignment - 1; - OriginLong = IRB.CreateAnd(OriginLong, - ConstantInt::get(MS.IntptrTy, ~Mask)); - } - return IRB.CreateIntToPtr(OriginLong, - PointerType::get(IRB.getInt32Ty(), 0)); + std::pair getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB, + Type *ShadowTy, + unsigned Alignment) { + Instruction *FirstInsn = nullptr; + std::pair ret = + getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment, &FirstInsn); + return ret; } /// \brief Compute the shadow address for a given function argument. @@ -1202,16 +1236,18 @@ struct MemorySanitizerVisitor : public InstVisitor { Type *EltType = A->getType()->getPointerElementType(); ArgAlign = DL.getABITypeAlignment(EltType); } + Value *CpShadowPtr = + getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign) + .first; if (Overflow) { // ParamTLS overflow. EntryIRB.CreateMemSet( - getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), - Constant::getNullValue(EntryIRB.getInt8Ty()), Size, ArgAlign); + CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()), + Size, ArgAlign); } else { unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment); - Value *Cpy = EntryIRB.CreateMemCpy( - getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size, - CopyAlign); + Value *Cpy = + EntryIRB.CreateMemCpy(CpShadowPtr, Base, Size, CopyAlign); DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); (void)Cpy; } @@ -1356,10 +1392,12 @@ struct MemorySanitizerVisitor : public InstVisitor { IRBuilder<> IRB(I.getNextNode()); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = I.getAlignment(); if (PropagateShadow) { - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); - setShadow(&I, - IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + std::tie(ShadowPtr, OriginPtr) = + getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld")); } else { setShadow(&I, getCleanShadow(&I)); } @@ -1372,10 +1410,8 @@ struct MemorySanitizerVisitor : public InstVisitor { if (MS.TrackOrigins) { if (PropagateShadow) { - unsigned Alignment = I.getAlignment(); unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment); - setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB, Alignment), - OriginAlignment)); + setOrigin(&I, IRB.CreateAlignedLoad(OriginPtr, OriginAlignment)); } else { setOrigin(&I, getCleanOrigin()); } @@ -1395,7 +1431,8 @@ struct MemorySanitizerVisitor : public InstVisitor { IRBuilder<> IRB(&I); Value *Addr = I.getOperand(0); - Value *ShadowPtr = getShadowPtr(Addr, I.getType(), IRB); + Value *ShadowPtr = + getShadowOriginPtr(Addr, IRB, I.getType(), /*Alignment*/ 1).first; if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); @@ -2016,18 +2053,19 @@ struct MemorySanitizerVisitor : public InstVisitor { IRBuilder<> IRB(&I); Value* Addr = I.getArgOperand(0); Value *Shadow = getShadow(&I, 1); - Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); + Value *ShadowPtr, *OriginPtr; // We don't know the pointer alignment (could be unaligned SSE store!). // Have to assume to worst case. + std::tie(ShadowPtr, OriginPtr) = + getShadowOriginPtr(Addr, IRB, Shadow->getType(), /*Alignment*/ 1); IRB.CreateAlignedStore(Shadow, ShadowPtr, 1); if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); // FIXME: factor out common code from materializeStores - if (MS.TrackOrigins) - IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB, 1)); + if (MS.TrackOrigins) IRB.CreateStore(getOrigin(&I, 1), OriginPtr); return true; } @@ -2040,11 +2078,14 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Addr = I.getArgOperand(0); Type *ShadowTy = getShadowTy(&I); + Value *ShadowPtr, *OriginPtr; if (PropagateShadow) { - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); // We don't know the pointer alignment (could be unaligned SSE load!). // Have to assume to worst case. - setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); + unsigned Alignment = 1; + std::tie(ShadowPtr, OriginPtr) = + getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment); + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_msld")); } else { setShadow(&I, getCleanShadow(&I)); } @@ -2054,7 +2095,7 @@ struct MemorySanitizerVisitor : public InstVisitor { if (MS.TrackOrigins) { if (PropagateShadow) - setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB, 1))); + setOrigin(&I, IRB.CreateLoad(OriginPtr)); else setOrigin(&I, getCleanOrigin()); } @@ -2412,7 +2453,7 @@ struct MemorySanitizerVisitor : public InstVisitor { IRBuilder<> IRB(&I); Value* Addr = I.getArgOperand(0); Type *Ty = IRB.getInt32Ty(); - Value *ShadowPtr = getShadowPtr(Addr, Ty, IRB); + Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, Ty, /*Alignment*/ 1).first; IRB.CreateStore(getCleanShadow(Ty), IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo())); @@ -2428,15 +2469,16 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Addr = I.getArgOperand(0); Type *Ty = IRB.getInt32Ty(); unsigned Alignment = 1; + Value *ShadowPtr, *OriginPtr; + std::tie(ShadowPtr, OriginPtr) = + getShadowOriginPtr(Addr, IRB, Ty, Alignment); if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); - Value *Shadow = IRB.CreateAlignedLoad(getShadowPtr(Addr, Ty, IRB), - Alignment, "_ldmxcsr"); - Value *Origin = MS.TrackOrigins - ? IRB.CreateLoad(getOriginPtr(Addr, IRB, Alignment)) - : getCleanOrigin(); + Value *Shadow = IRB.CreateAlignedLoad(ShadowPtr, Alignment, "_ldmxcsr"); + Value *Origin = + MS.TrackOrigins ? IRB.CreateLoad(OriginPtr) : getCleanOrigin(); insertShadowCheck(Shadow, Origin, &I); } @@ -2723,9 +2765,10 @@ struct MemorySanitizerVisitor : public InstVisitor { if (ArgOffset + Size > kParamTLSSize) break; unsigned ParamAlignment = CS.getParamAlignment(i); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); - Store = IRB.CreateMemCpy(ArgShadowBase, - getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), - Size, Alignment); + Value *AShadowPtr = + getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment).first; + + Store = IRB.CreateMemCpy(ArgShadowBase, AShadowPtr, Size, Alignment); } else { Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; @@ -2772,6 +2815,8 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getCleanOrigin()); return; } + // FIXME: NextInsn is likely in a basic block that has not been visited yet. + // Anything inserted there will be instrumented by MSan later! NextInsn = NormalDest->getFirstInsertionPt(); assert(NextInsn != NormalDest->end() && "Could not find insertion point for retval shadow load"); @@ -2843,7 +2888,9 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateCall(MS.MsanPoisonStackFn, {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len}); } else { - Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB); + Value *ShadowBase = + getShadowOriginPtr(&I, IRB, IRB.getInt8Ty(), I.getAlignment()).first; + Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0); IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment()); } @@ -3065,38 +3112,44 @@ struct VarArgAMD64Helper : public VarArgHelper { assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); + Value *ShadowBase = + getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); OverflowOffset += alignTo(ArgSize, 8); - IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), - ArgSize, kShadowTLSAlignment); + Value *ShadowPtr, *OriginPtr; + std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( + A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); + + IRB.CreateMemCpy(ShadowBase, ShadowPtr, ArgSize, kShadowTLSAlignment); } else { ArgKind AK = classifyArgument(A); if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) AK = AK_Memory; if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset) AK = AK_Memory; - Value *Base; + Value *ShadowBase; switch (AK) { case AK_GeneralPurpose: - Base = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset); + ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, GpOffset); GpOffset += 8; break; case AK_FloatingPoint: - Base = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset); + ShadowBase = getShadowPtrForVAArgument(A->getType(), IRB, FpOffset); FpOffset += 16; break; case AK_Memory: if (IsFixed) continue; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); - Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); + ShadowBase = + getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset); OverflowOffset += alignTo(ArgSize, 8); } // Take fixed arguments into account for GpOffset and FpOffset, // but don't actually store shadows for them. if (IsFixed) continue; - IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); + IRB.CreateAlignedStore(MSV.getShadow(A), ShadowBase, + kShadowTLSAlignment); } } Constant *OverflowSize = @@ -3113,31 +3166,32 @@ struct VarArgAMD64Helper : public VarArgHelper { "_msarg"); } - void visitVAStartInst(VAStartInst &I) override { - if (F.getCallingConv() == CallingConv::Win64) - return; + void unpoisonVAListTagForInst(IntrinsicInst &I) { IRBuilder<> IRB(&I); - VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */24, /* alignment */8, false); + /* size */ 24, Alignment, false); + // We shouldn't need to zero out the origins, as they're only checked for + // nonzero shadow. } - void visitVACopyInst(VACopyInst &I) override { + void visitVAStartInst(VAStartInst &I) override { if (F.getCallingConv() == CallingConv::Win64) return; - IRBuilder<> IRB(&I); - Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + VAStartInstrumentationList.push_back(&I); + unpoisonVAListTagForInst(I); + } - // Unpoison the whole __va_list_tag. - // FIXME: magic ABI constants. - IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */24, /* alignment */8, false); + void visitVACopyInst(VACopyInst &I) override { + if (F.getCallingConv() == CallingConv::Win64) return; + unpoisonVAListTagForInst(I); } void finalizeInstrumentation() override { @@ -3162,28 +3216,31 @@ struct VarArgAMD64Helper : public VarArgHelper { IRBuilder<> IRB(OrigInst->getNextNode()); Value *VAListTag = OrigInst->getArgOperand(0); - Value *RegSaveAreaPtrPtr = - IRB.CreateIntToPtr( + Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, 16)), Type::getInt64PtrTy(*MS.C)); Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); - Value *RegSaveAreaShadowPtr = - MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, - AMD64FpEndOffset, 16); - - Value *OverflowArgAreaPtrPtr = - IRB.CreateIntToPtr( + Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; + unsigned Alignment = 16; + std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = + MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), + Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, AMD64FpEndOffset, + Alignment); + Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, 8)), Type::getInt64PtrTy(*MS.C)); Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr); - Value *OverflowArgAreaShadowPtr = - MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); + Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; + std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) = + MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(), + Alignment); Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, AMD64FpEndOffset); - IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, + Alignment); } } }; @@ -3242,19 +3299,24 @@ struct VarArgMIPS64Helper : public VarArgHelper { IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */8, /* alignment */8, false); + /* size */ 8, Alignment, false); } void visitVACopyInst(VACopyInst &I) override { IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); - // Unpoison the whole __va_list_tag. - // FIXME: magic ABI constants. + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */8, /* alignment */8, false); + /* size */ 8, Alignment, false); } void finalizeInstrumentation() override { @@ -3282,9 +3344,12 @@ struct VarArgMIPS64Helper : public VarArgHelper { IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), Type::getInt64PtrTy(*MS.C)); Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); - Value *RegSaveAreaShadowPtr = - MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); + Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; + unsigned Alignment = 8; + std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = + MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), + Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment); } } }; @@ -3394,21 +3459,24 @@ struct VarArgAArch64Helper : public VarArgHelper { IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); - // Unpoison the whole __va_list_tag. - // FIXME: magic ABI constants (size of va_list). + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */32, /* alignment */8, false); + /* size */ 32, Alignment, false); } void visitVACopyInst(VACopyInst &I) override { IRBuilder<> IRB(&I); + VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); - // Unpoison the whole __va_list_tag. - // FIXME: magic ABI constants (size of va_list). + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */32, /* alignment */8, false); + /* size */ 32, Alignment, false); } // Retrieve a va_list field of 'void*' size. @@ -3494,7 +3562,9 @@ struct VarArgAArch64Helper : public VarArgHelper { IRB.CreateAdd(GrArgSize, GrOffSaveArea); Value *GrRegSaveAreaShadowPtr = - MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), + /*Alignment*/ 8) + .first; Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, GrRegSaveAreaShadowPtrOff); @@ -3507,7 +3577,9 @@ struct VarArgAArch64Helper : public VarArgHelper { IRB.CreateAdd(VrArgSize, VrOffSaveArea); Value *VrRegSaveAreaShadowPtr = - MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB); + MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(), + /*Alignment*/ 8) + .first; Value *VrSrcPtr = IRB.CreateInBoundsGEP( IRB.getInt8Ty(), @@ -3520,7 +3592,9 @@ struct VarArgAArch64Helper : public VarArgHelper { // And finally for remaining arguments. Value *StackSaveAreaShadowPtr = - MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB); + MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(), + /*Alignment*/ 16) + .first; Value *StackSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, @@ -3581,8 +3655,11 @@ struct VarArgPowerPC64Helper : public VarArgHelper { if (!IsFixed) { Value *Base = getShadowPtrForVAArgument(RealTy, IRB, VAArgOffset - VAArgBase); - IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), - ArgSize, kShadowTLSAlignment); + Value *AShadowPtr, *AOriginPtr; + std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr( + A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); + + IRB.CreateMemCpy(Base, AShadowPtr, ArgSize, kShadowTLSAlignment); } VAArgOffset += alignTo(ArgSize, 8); } else { @@ -3640,19 +3717,25 @@ struct VarArgPowerPC64Helper : public VarArgHelper { IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */8, /* alignment */8, false); + /* size */ 8, Alignment, false); } void visitVACopyInst(VACopyInst &I) override { IRBuilder<> IRB(&I); Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + Value *ShadowPtr, *OriginPtr; + unsigned Alignment = 8; + std::tie(ShadowPtr, OriginPtr) = + MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment); // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), - /* size */8, /* alignment */8, false); + /* size */ 8, Alignment, false); } void finalizeInstrumentation() override { @@ -3680,9 +3763,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper { IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), Type::getInt64PtrTy(*MS.C)); Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); - Value *RegSaveAreaShadowPtr = - MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); + Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; + unsigned Alignment = 8; + std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = + MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), + Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment); } } }; diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 47278e192834..ab3619ecef0e 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -119,6 +119,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "pgo-instrumentation" @@ -462,7 +463,7 @@ struct PGOEdge { bool Removed = false; bool IsCritical = false; - PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) : SrcBB(Src), DestBB(Dest), Weight(W) {} // Return the information string of an edge. @@ -716,6 +717,9 @@ BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, std::unordered_multimap &ComdatMembers) { + // Split indirectbr critical edges here before computing the MST rather than + // later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, BPI, BFI); FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, BFI); unsigned NumCounters = FuncInfo.getNumCounters(); @@ -776,7 +780,7 @@ struct PGOUseEdge : public PGOEdge { bool CountValid = false; uint64_t CountValue = 0; - PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1) : PGOEdge(Src, Dest, W) {} // Set edge count value @@ -1136,7 +1140,7 @@ void PGOUseFunc::populateCounters() { } #endif uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; - F.setEntryCount(FuncEntryCount); + F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); uint64_t FuncMaxCount = FuncEntryCount; for (auto &BB : F) { auto BI = findBBInfo(&BB); @@ -1463,6 +1467,9 @@ static bool annotateAllFunctions( continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); + // Split indirectbr critical edges here before computing the MST rather than + // later in getInstrBB() to avoid invalidating it. + SplitIndirectBrCriticalEdges(F, BPI, BFI); PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); if (!Func.readCounters(PGOReader.get())) continue; diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp index 688dd12c408a..c30aaebd0f4d 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.cpp +++ b/lib/Transforms/ObjCARC/ObjCARC.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "ObjCARC.h" -#include "llvm-c/Core.h" #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index cd9b3d96a14f..745dac886190 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -82,6 +82,26 @@ static inline const Instruction *getreturnRVOperand(const Instruction &Inst, return dyn_cast(Opnd); } +/// Return the list of PHI nodes that are equivalent to PN. +template +void getEquivalentPHIs(PHINodeTy &PN, VectorTy &PHIList) { + auto *BB = PN.getParent(); + for (auto &P : BB->phis()) { + if (&P == &PN) // Do not add PN to the list. + continue; + unsigned I = 0, E = PN.getNumIncomingValues(); + for (; I < E; ++I) { + auto *BB = PN.getIncomingBlock(I); + auto *PNOpnd = PN.getIncomingValue(I)->stripPointerCasts(); + auto *POpnd = P.getIncomingValueForBlock(BB)->stripPointerCasts(); + if (PNOpnd != POpnd) + break; + } + if (I == E) + PHIList.push_back(&P); + } +} + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index e70e7591f6a7..5deb39449e92 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -248,7 +248,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load, // Ok, now we know we have not seen a store yet. See if Inst can write to // our load location, if it can not, just ignore the instruction. - if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod)) + if (!isModSet(AA->getModRefInfo(Inst, Loc))) continue; Store = dyn_cast(Inst); @@ -618,8 +618,17 @@ bool ObjCARCContract::runOnFunction(Function &F) { else if (isa(Arg) && !cast(Arg)->isInterposable()) Arg = cast(Arg)->getAliasee(); - else + else { + // If Arg is a PHI node, get PHIs that are equivalent to it and replace + // their uses. + if (PHINode *PN = dyn_cast(Arg)) { + SmallVector PHIList; + getEquivalentPHIs(*PN, PHIList); + for (Value *PHI : PHIList) + ReplaceArgUses(PHI); + } break; + } } // Replace bitcast users of Arg that are dominated by Inst. diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 99ed6863c22e..ecec85444b12 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -652,6 +652,11 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, SmallVector Users; Users.push_back(Ptr); + + // Add PHIs that are equivalent to Ptr to Users. + if (const PHINode *PN = dyn_cast(Ptr)) + getEquivalentPHIs(*PN, Users); + do { Ptr = Users.pop_back_val(); for (const User *U : Ptr->users()) { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 99480f12da9e..6c871bb9e7eb 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -374,8 +374,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { NewAlignment = std::max(NewAlignment, AltSrcAlignment); if (NewAlignment > MI->getAlignment()) { - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewAlignment)); + MI->setAlignment(NewAlignment); ++NumMemIntAlignChanged; } @@ -385,8 +384,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { assert((!isa(MI) || isa(MI)) && "Unknown memory intrinsic"); - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewDestAlignment)); + MI->setAlignment(NewDestAlignment); ++NumMemIntAlignChanged; } } diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp index 9d7997be1eb5..851efa000f65 100644 --- a/lib/Transforms/Scalar/BDCE.cpp +++ b/lib/Transforms/Scalar/BDCE.cpp @@ -20,11 +20,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Scalar/CallSiteSplitting.cpp b/lib/Transforms/Scalar/CallSiteSplitting.cpp index d53968be6120..3243731f07db 100644 --- a/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -13,10 +13,11 @@ // threading, or IPA-CP based function cloning, etc.). // As of now we support two cases : // -// 1) If a call site is dominated by an OR condition and if any of its arguments -// are predicated on this OR condition, try to split the condition with more -// constrained arguments. For example, in the code below, we try to split the -// call site since we can predicate the argument(ptr) based on the OR condition. +// 1) Try to a split call-site with constrained arguments, if any constraints +// on any argument can be found by following the single predecessors of the +// all site's predecessors. Currently this pass only handles call-sites with 2 +// predecessors. For example, in the code below, we try to split the call-site +// since we can predicate the argument(ptr) based on the OR condition. // // Split from : // if (!ptr || c) @@ -72,11 +73,7 @@ using namespace PatternMatch; STATISTIC(NumCallSiteSplit, "Number of call-site split"); -static void addNonNullAttribute(Instruction *CallI, Instruction *&NewCallI, - Value *Op) { - if (!NewCallI) - NewCallI = CallI->clone(); - CallSite CS(NewCallI); +static void addNonNullAttribute(CallSite CS, Value *Op) { unsigned ArgNo = 0; for (auto &I : CS.args()) { if (&*I == Op) @@ -85,11 +82,8 @@ static void addNonNullAttribute(Instruction *CallI, Instruction *&NewCallI, } } -static void setConstantInArgument(Instruction *CallI, Instruction *&NewCallI, - Value *Op, Constant *ConstValue) { - if (!NewCallI) - NewCallI = CallI->clone(); - CallSite CS(NewCallI); +static void setConstantInArgument(CallSite CS, Value *Op, + Constant *ConstValue) { unsigned ArgNo = 0; for (auto &I : CS.args()) { if (&*I == Op) @@ -114,99 +108,63 @@ static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallSite CS) { return false; } -static SmallVector -findOrCondRelevantToCallArgument(CallSite CS) { - SmallVector BranchInsts; - for (auto PredBB : predecessors(CS.getInstruction()->getParent())) { - auto *PBI = dyn_cast(PredBB->getTerminator()); - if (!PBI || !PBI->isConditional()) - continue; +typedef std::pair ConditionTy; +typedef SmallVector ConditionsTy; + +/// If From has a conditional jump to To, add the condition to Conditions, +/// if it is relevant to any argument at CS. +static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To, + ConditionsTy &Conditions) { + auto *BI = dyn_cast(From->getTerminator()); + if (!BI || !BI->isConditional()) + return; + + CmpInst::Predicate Pred; + Value *Cond = BI->getCondition(); + if (!match(Cond, m_ICmp(Pred, m_Value(), m_Constant()))) + return; + + ICmpInst *Cmp = cast(Cond); + if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) + if (isCondRelevantToAnyCallArgument(Cmp, CS)) + Conditions.push_back({Cmp, From->getTerminator()->getSuccessor(0) == To + ? Pred + : Cmp->getInversePredicate()}); +} - CmpInst::Predicate Pred; - Value *Cond = PBI->getCondition(); - if (!match(Cond, m_ICmp(Pred, m_Value(), m_Constant()))) - continue; - ICmpInst *Cmp = cast(Cond); - if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) - if (isCondRelevantToAnyCallArgument(Cmp, CS)) - BranchInsts.push_back(PBI); +/// Record ICmp conditions relevant to any argument in CS following Pred's +/// single successors. If there are conflicting conditions along a path, like +/// x == 1 and x == 0, the first condition will be used. +static void recordConditions(CallSite CS, BasicBlock *Pred, + ConditionsTy &Conditions) { + recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions); + BasicBlock *From = Pred; + BasicBlock *To = Pred; + SmallPtrSet Visited = {From}; + while (!Visited.count(From->getSinglePredecessor()) && + (From = From->getSinglePredecessor())) { + recordCondition(CS, From, To, Conditions); + To = From; } - return BranchInsts; } -static bool tryCreateCallSitesOnOrPredicatedArgument( - CallSite CS, Instruction *&NewCSTakenFromHeader, - Instruction *&NewCSTakenFromNextCond, BasicBlock *HeaderBB) { - auto BranchInsts = findOrCondRelevantToCallArgument(CS); - assert(BranchInsts.size() <= 2 && - "Unexpected number of blocks in the OR predicated condition"); - Instruction *Instr = CS.getInstruction(); - BasicBlock *CallSiteBB = Instr->getParent(); - TerminatorInst *HeaderTI = HeaderBB->getTerminator(); - bool IsCSInTakenPath = CallSiteBB == HeaderTI->getSuccessor(0); - - for (auto *PBI : BranchInsts) { - assert(isa(PBI->getCondition()) && - "Unexpected condition in a conditional branch."); - ICmpInst *Cmp = cast(PBI->getCondition()); - Value *Arg = Cmp->getOperand(0); - assert(isa(Cmp->getOperand(1)) && - "Expected op1 to be a constant."); - Constant *ConstVal = cast(Cmp->getOperand(1)); - CmpInst::Predicate Pred = Cmp->getPredicate(); - - if (PBI->getParent() == HeaderBB) { - Instruction *&CallTakenFromHeader = - IsCSInTakenPath ? NewCSTakenFromHeader : NewCSTakenFromNextCond; - Instruction *&CallUntakenFromHeader = - IsCSInTakenPath ? NewCSTakenFromNextCond : NewCSTakenFromHeader; - - assert((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && - "Unexpected predicate in an OR condition"); - - // Set the constant value for agruments in the call predicated based on - // the OR condition. - Instruction *&CallToSetConst = Pred == ICmpInst::ICMP_EQ - ? CallTakenFromHeader - : CallUntakenFromHeader; - setConstantInArgument(Instr, CallToSetConst, Arg, ConstVal); - - // Add the NonNull attribute if compared with the null pointer. - if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) { - Instruction *&CallToSetAttr = Pred == ICmpInst::ICMP_EQ - ? CallUntakenFromHeader - : CallTakenFromHeader; - addNonNullAttribute(Instr, CallToSetAttr, Arg); - } - continue; - } - - if (Pred == ICmpInst::ICMP_EQ) { - if (PBI->getSuccessor(0) == Instr->getParent()) { - // Set the constant value for the call taken from the second block in - // the OR condition. - setConstantInArgument(Instr, NewCSTakenFromNextCond, Arg, ConstVal); - } else { - // Add the NonNull attribute if compared with the null pointer for the - // call taken from the second block in the OR condition. - if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) - addNonNullAttribute(Instr, NewCSTakenFromNextCond, Arg); - } - } else { - if (PBI->getSuccessor(0) == Instr->getParent()) { - // Add the NonNull attribute if compared with the null pointer for the - // call taken from the second block in the OR condition. - if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) - addNonNullAttribute(Instr, NewCSTakenFromNextCond, Arg); - } else if (Pred == ICmpInst::ICMP_NE) { - // Set the constant value for the call in the untaken path from the - // header block. - setConstantInArgument(Instr, NewCSTakenFromNextCond, Arg, ConstVal); - } else - llvm_unreachable("Unexpected condition"); +static void addConditions(CallSite CS, const ConditionsTy &Conditions) { + for (auto &Cond : Conditions) { + Value *Arg = Cond.first->getOperand(0); + Constant *ConstVal = cast(Cond.first->getOperand(1)); + if (Cond.second == ICmpInst::ICMP_EQ) + setConstantInArgument(CS, Arg, ConstVal); + else if (ConstVal->getType()->isPointerTy() && ConstVal->isNullValue()) { + assert(Cond.second == ICmpInst::ICMP_NE); + addNonNullAttribute(CS, Arg); } } - return NewCSTakenFromHeader || NewCSTakenFromNextCond; +} + +static SmallVector getTwoPredecessors(BasicBlock *BB) { + SmallVector Preds(predecessors((BB))); + assert(Preds.size() == 2 && "Expected exactly 2 predecessors!"); + return Preds; } static bool canSplitCallSite(CallSite CS) { @@ -221,7 +179,7 @@ static bool canSplitCallSite(CallSite CS) { // call instruction, and we do not move a call-site across any other // instruction. BasicBlock *CallSiteBB = Instr->getParent(); - if (Instr != CallSiteBB->getFirstNonPHI()) + if (Instr != CallSiteBB->getFirstNonPHIOrDbg()) return false; // Need 2 predecessors and cannot split an edge from an IndirectBrInst. @@ -233,17 +191,19 @@ static bool canSplitCallSite(CallSite CS) { return CallSiteBB->canSplitPredecessors(); } -/// Return true if the CS is split into its new predecessors which are directly -/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2. -/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point -/// to the second compare block. CallInst1 and CallInst2 will be the new -/// call-sites placed in the new predecessors split for PredBB1 and PredBB2, -/// repectively. Therefore, CallInst1 will be the call-site placed -/// between Header and Tail, and CallInst2 will be the call-site between TBB and -/// Tail. For example, in the IR below with an OR condition, the call-site can -/// be split +/// Return true if the CS is split into its new predecessors. +/// +/// For each (predecessor, conditions from predecessors) pair, it will split the +/// basic block containing the call site, hook it up to the predecessor and +/// replace the call instruction with new call instructions, which contain +/// constraints based on the conditions from their predecessors. +/// For example, in the IR below with an OR condition, the call-site can +/// be split. In this case, Preds for Tail is [(Header, a == null), +/// (TBB, a != null, b == null)]. Tail is replaced by 2 split blocks, containing +/// CallInst1, which has constraints based on the conditions from Head and +/// CallInst2, which has constraints based on the conditions coming from TBB. /// -/// from : +/// From : /// /// Header: /// %c = icmp eq i32* %a, null @@ -271,60 +231,53 @@ static bool canSplitCallSite(CallSite CS) { /// Tail: /// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] /// -/// Note that for an OR predicated case, CallInst1 and CallInst2 should be -/// created with more constrained arguments in -/// createCallSitesOnOrPredicatedArgument(). -static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, - Instruction *CallInst1, Instruction *CallInst2) { +/// Note that in case any arguments at the call-site are constrained by its +/// predecessors, new call-sites with more constrained arguments will be +/// created in createCallSitesOnPredicatedArgument(). +static void splitCallSite( + CallSite CS, + const SmallVectorImpl> &Preds) { Instruction *Instr = CS.getInstruction(); BasicBlock *TailBB = Instr->getParent(); - assert(Instr == (TailBB->getFirstNonPHI()) && "Unexpected call-site"); - - BasicBlock *SplitBlock1 = - SplitBlockPredecessors(TailBB, PredBB1, ".predBB1.split"); - BasicBlock *SplitBlock2 = - SplitBlockPredecessors(TailBB, PredBB2, ".predBB2.split"); - - assert((SplitBlock1 && SplitBlock2) && "Unexpected new basic block split."); - - if (!CallInst1) - CallInst1 = Instr->clone(); - if (!CallInst2) - CallInst2 = Instr->clone(); - - CallInst1->insertBefore(&*SplitBlock1->getFirstInsertionPt()); - CallInst2->insertBefore(&*SplitBlock2->getFirstInsertionPt()); - - CallSite CS1(CallInst1); - CallSite CS2(CallInst2); - - // Handle PHIs used as arguments in the call-site. - for (auto &PI : *TailBB) { - PHINode *PN = dyn_cast(&PI); - if (!PN) - break; - unsigned ArgNo = 0; - for (auto &CI : CS.args()) { - if (&*CI == PN) { - CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1)); - CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2)); + + PHINode *CallPN = nullptr; + if (Instr->getNumUses()) + CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call"); + + DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); + for (const auto &P : Preds) { + BasicBlock *PredBB = P.first; + BasicBlock *SplitBlock = + SplitBlockPredecessors(TailBB, PredBB, ".predBB.split"); + assert(SplitBlock && "Unexpected new basic block split."); + + Instruction *NewCI = Instr->clone(); + CallSite NewCS(NewCI); + addConditions(NewCS, P.second); + NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt()); + + // Handle PHIs used as arguments in the call-site. + for (PHINode &PN : TailBB->phis()) { + unsigned ArgNo = 0; + for (auto &CI : CS.args()) { + if (&*CI == &PN) { + NewCS.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock)); + } + ++ArgNo; } - ++ArgNo; } + DEBUG(dbgs() << " " << *NewCI << " in " << SplitBlock->getName() + << "\n"); + if (CallPN) + CallPN->addIncoming(NewCI, SplitBlock); } // Replace users of the original call with a PHI mering call-sites split. - if (Instr->getNumUses()) { - PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", Instr); - PN->addIncoming(CallInst1, SplitBlock1); - PN->addIncoming(CallInst2, SplitBlock2); - Instr->replaceAllUsesWith(PN); + if (CallPN) { + CallPN->insertBefore(TailBB->getFirstNonPHI()); + Instr->replaceAllUsesWith(CallPN); } - DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); - DEBUG(dbgs() << " " << *CallInst1 << " in " << SplitBlock1->getName() - << "\n"); - DEBUG(dbgs() << " " << *CallInst2 << " in " << SplitBlock2->getName() - << "\n"); + Instr->eraseFromParent(); NumCallSiteSplit++; } @@ -334,7 +287,7 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, static bool isPredicatedOnPHI(CallSite CS) { Instruction *Instr = CS.getInstruction(); BasicBlock *Parent = Instr->getParent(); - if (Instr != Parent->getFirstNonPHI()) + if (Instr != Parent->getFirstNonPHIOrDbg()) return false; for (auto &BI : *Parent) { @@ -357,58 +310,43 @@ static bool isPredicatedOnPHI(CallSite CS) { return false; } -static SmallVector getTwoPredecessors(BasicBlock *BB) { - SmallVector Preds(predecessors((BB))); - assert(Preds.size() == 2 && "Expected exactly 2 predecessors!"); - return Preds; -} - static bool tryToSplitOnPHIPredicatedArgument(CallSite CS) { if (!isPredicatedOnPHI(CS)) return false; auto Preds = getTwoPredecessors(CS.getInstruction()->getParent()); - splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr); + SmallVector, 2> PredsCS = { + {Preds[0], {}}, {Preds[1], {}}}; + splitCallSite(CS, PredsCS); return true; } -// Check if one of the predecessors is a single predecessors of the other. -// This is a requirement for control flow modeling an OR. HeaderBB points to -// the single predecessor and OrBB points to other node. HeaderBB potentially -// contains the first compare of the OR and OrBB the second. -static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) { - return OrBB->getSinglePredecessor() == HeaderBB && - HeaderBB->getTerminator()->getNumSuccessors() == 2; -} -static bool tryToSplitOnOrPredicatedArgument(CallSite CS) { +static bool tryToSplitOnPredicatedArgument(CallSite CS) { auto Preds = getTwoPredecessors(CS.getInstruction()->getParent()); - BasicBlock *HeaderBB = nullptr; - BasicBlock *OrBB = nullptr; - if (isOrHeader(Preds[0], Preds[1])) { - HeaderBB = Preds[0]; - OrBB = Preds[1]; - } else if (isOrHeader(Preds[1], Preds[0])) { - HeaderBB = Preds[1]; - OrBB = Preds[0]; - } else + if (Preds[0] == Preds[1]) return false; - Instruction *CallInst1 = nullptr; - Instruction *CallInst2 = nullptr; - if (!tryCreateCallSitesOnOrPredicatedArgument(CS, CallInst1, CallInst2, - HeaderBB)) { - assert(!CallInst1 && !CallInst2 && "Unexpected new call-sites cloned."); - return false; + SmallVector, 2> PredsCS; + for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) { + ConditionsTy Conditions; + recordConditions(CS, Pred, Conditions); + PredsCS.push_back({Pred, Conditions}); } - splitCallSite(CS, HeaderBB, OrBB, CallInst1, CallInst2); + if (std::all_of(PredsCS.begin(), PredsCS.end(), + [](const std::pair &P) { + return P.second.empty(); + })) + return false; + + splitCallSite(CS, PredsCS); return true; } static bool tryToSplitCallSite(CallSite CS) { if (!CS.arg_size() || !canSplitCallSite(CS)) return false; - return tryToSplitOnOrPredicatedArgument(CS) || + return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); } diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 040e0f59c61a..07803f6e0c73 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -77,6 +78,7 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -88,6 +90,7 @@ char CorrelatedValuePropagation::ID = 0; INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", "Value Propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation", "Value Propagation", false, false) @@ -120,8 +123,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { return true; } -static bool processPHI(PHINode *P, LazyValueInfo *LVI, - const SimplifyQuery &SQ) { +static bool processPHI(PHINode *P, LazyValueInfo *LVI, const SimplifyQuery &SQ, + DenseSet &ReachableBlocks) { bool Changed = false; BasicBlock *BB = P->getParent(); @@ -129,7 +132,18 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI, Value *Incoming = P->getIncomingValue(i); if (isa(Incoming)) continue; - Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P); + // If the incoming value is coming from an unreachable block, replace + // it with undef and go on. This is good for two reasons: + // 1) We skip an LVI query for an unreachable block + // 2) We transform the incoming value so that the code below doesn't + // mess around with IR in unreachable blocks. + BasicBlock *IncomingBB = P->getIncomingBlock(i); + if (!ReachableBlocks.count(IncomingBB)) { + P->setIncomingValue(i, UndefValue::get(P->getType())); + continue; + } + + Value *V = LVI->getConstantOnEdge(Incoming, IncomingBB, BB, P); // Look if the incoming value is a select with a scalar condition for which // LVI can tells us the value. In that case replace the incoming value with @@ -329,13 +343,15 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { // See if we can prove that the given overflow intrinsic will not overflow. static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) { using OBO = OverflowingBinaryOperator; - auto NoWrapOnAddition = [&] (Value *LHS, Value *RHS, unsigned NoWrapKind) { + auto NoWrap = [&] (Instruction::BinaryOps BinOp, unsigned NoWrapKind) { + Value *RHS = II->getOperand(1); ConstantRange RRange = LVI->getConstantRange(RHS, II->getParent(), II); ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion( - BinaryOperator::Add, RRange, NoWrapKind); + BinOp, RRange, NoWrapKind); // As an optimization, do not compute LRange if we do not need it. if (NWRegion.isEmptySet()) return false; + Value *LHS = II->getOperand(0); ConstantRange LRange = LVI->getConstantRange(LHS, II->getParent(), II); return NWRegion.contains(LRange); }; @@ -343,11 +359,13 @@ static bool willNotOverflow(IntrinsicInst *II, LazyValueInfo *LVI) { default: break; case Intrinsic::uadd_with_overflow: - return NoWrapOnAddition(II->getOperand(0), II->getOperand(1), - OBO::NoUnsignedWrap); + return NoWrap(Instruction::Add, OBO::NoUnsignedWrap); case Intrinsic::sadd_with_overflow: - return NoWrapOnAddition(II->getOperand(0), II->getOperand(1), - OBO::NoSignedWrap); + return NoWrap(Instruction::Add, OBO::NoSignedWrap); + case Intrinsic::usub_with_overflow: + return NoWrap(Instruction::Sub, OBO::NoUnsignedWrap); + case Intrinsic::ssub_with_overflow: + return NoWrap(Instruction::Sub, OBO::NoSignedWrap); } return false; } @@ -356,12 +374,17 @@ static void processOverflowIntrinsic(IntrinsicInst *II) { Value *NewOp = nullptr; switch (II->getIntrinsicID()) { default: - llvm_unreachable("Illegal instruction."); + llvm_unreachable("Unexpected instruction."); case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: NewOp = BinaryOperator::CreateAdd(II->getOperand(0), II->getOperand(1), II->getName(), II); break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + NewOp = BinaryOperator::CreateSub(II->getOperand(0), II->getOperand(1), + II->getName(), II); + break; } ++NumOverflows; IRBuilder<> B(II); @@ -376,7 +399,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { SmallVector ArgNos; unsigned ArgNo = 0; - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + if (auto *II = dyn_cast(CS.getInstruction())) { if (willNotOverflow(II, LVI)) { processOverflowIntrinsic(II); return true; @@ -552,11 +575,19 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { bool FnChanged = false; + + // Compute reachability from the entry block of this function via an RPO + // walk. We use this information when processing PHIs. + DenseSet ReachableBlocks; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) + ReachableBlocks.insert(BB); + // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less // work to do for deep blocks. This also means we don't visit unreachable - // blocks. + // blocks. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { bool BBChanged = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { @@ -566,7 +597,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { BBChanged |= processSelect(cast(II), LVI); break; case Instruction::PHI: - BBChanged |= processPHI(cast(II), LVI, SQ); + BBChanged |= processPHI(cast(II), LVI, SQ, ReachableBlocks); break; case Instruction::ICmp: case Instruction::FCmp: diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 877050ec1771..18cf3592556b 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -146,7 +146,8 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI, /// Does this instruction write some memory? This only returns true for things /// that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { +static bool hasAnalyzableMemoryWrite(Instruction *I, + const TargetLibraryInfo &TLI) { if (isa(I)) return true; if (IntrinsicInst *II = dyn_cast(I)) { @@ -180,7 +181,8 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { /// Return a Location stored to by the specified instruction. If isRemovable /// returns true, this function and getLocForRead completely describe the memory /// operations for this instruction. -static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { +static MemoryLocation getLocForWrite(Instruction *Inst) { + if (StoreInst *SI = dyn_cast(Inst)) return MemoryLocation::get(SI); @@ -190,29 +192,30 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { return Loc; } - IntrinsicInst *II = dyn_cast(Inst); - if (!II) - return MemoryLocation(); - - switch (II->getIntrinsicID()) { - default: - return MemoryLocation(); // Unhandled intrinsic. - case Intrinsic::init_trampoline: - // FIXME: We don't know the size of the trampoline, so we can't really - // handle it here. - return MemoryLocation(II->getArgOperand(0)); - case Intrinsic::lifetime_end: { - uint64_t Len = cast(II->getArgOperand(0))->getZExtValue(); - return MemoryLocation(II->getArgOperand(1), Len); - } + if (IntrinsicInst *II = dyn_cast(Inst)) { + switch (II->getIntrinsicID()) { + default: + return MemoryLocation(); // Unhandled intrinsic. + case Intrinsic::init_trampoline: + return MemoryLocation(II->getArgOperand(0)); + case Intrinsic::lifetime_end: { + uint64_t Len = cast(II->getArgOperand(0))->getZExtValue(); + return MemoryLocation(II->getArgOperand(1), Len); + } + } } + if (auto CS = CallSite(Inst)) + // All the supported TLI functions so far happen to have dest as their + // first argument. + return MemoryLocation(CS.getArgument(0)); + return MemoryLocation(); } -/// Return the location read by the specified "hasMemoryWrite" instruction if -/// any. +/// Return the location read by the specified "hasAnalyzableMemoryWrite" +/// instruction if any. static MemoryLocation getLocForRead(Instruction *Inst, const TargetLibraryInfo &TLI) { - assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case"); + assert(hasAnalyzableMemoryWrite(Inst, TLI) && "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -230,7 +233,7 @@ static bool isRemovable(Instruction *I) { if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); + default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate"); case Intrinsic::lifetime_end: // Never remove dead lifetime_end's, e.g. because it is followed by a // free. @@ -246,6 +249,7 @@ static bool isRemovable(Instruction *I) { } } + // note: only get here for calls with analyzable writes - i.e. libcalls if (auto CS = CallSite(I)) return CS.getInstruction()->use_empty(); @@ -286,23 +290,12 @@ static bool isShortenableAtTheBeginning(Instruction *I) { /// Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { - if (StoreInst *SI = dyn_cast(I)) - return SI->getPointerOperand(); - if (MemIntrinsic *MI = dyn_cast(I)) - return MI->getDest(); - - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getArgOperand(0); - } - } - - CallSite CS(I); - // All the supported functions so far happen to have dest as their first - // argument. - return CS.getArgument(0); + //TODO: factor this to reuse getLocForWrite + MemoryLocation Loc = getLocForWrite(I); + assert(Loc.Ptr && + "unable to find pointer writen for analyzable instruction?"); + // TODO: most APIs don't expect const Value * + return const_cast(Loc.Ptr); } static uint64_t getPointerSize(const Value *V, const DataLayout &DL, @@ -594,11 +587,9 @@ static bool memoryIsNotModifiedBetween(Instruction *FirstI, } for (; BI != EI; ++BI) { Instruction *I = &*BI; - if (I->mayWriteToMemory() && I != SecondI) { - auto Res = AA->getModRefInfo(I, MemLoc); - if (Res & MRI_Mod) + if (I->mayWriteToMemory() && I != SecondI) + if (isModSet(AA->getModRefInfo(I, MemLoc))) return false; - } } if (B != FirstBB) { assert(B != &FirstBB->getParent()->getEntryBlock() && @@ -652,7 +643,8 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA, MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency)) + if (!hasAnalyzableMemoryWrite(Dependency, *TLI) || + !isRemovable(Dependency)) break; Value *DepPointer = @@ -756,7 +748,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { + if (hasAnalyzableMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) { // See through pointer-to-pointer bitcasts SmallVector Pointers; GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL); @@ -822,9 +814,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA, // the call is live. DeadStackObjects.remove_if([&](Value *I) { // See if the call site touches the value. - ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI)); - - return A == MRI_ModRef || A == MRI_Ref; + return isRefSet(AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI))); }); // If all of the allocas were clobbered by the call then we're not going @@ -970,7 +960,7 @@ static bool removePartiallyOverlappedStores(AliasAnalysis *AA, bool Changed = false; for (auto OI : IOL) { Instruction *EarlierWrite = OI.first; - MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA); + MemoryLocation Loc = getLocForWrite(EarlierWrite); assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc"); @@ -1071,7 +1061,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, } // Check to see if Inst writes to memory. If not, continue. - if (!hasMemoryWrite(Inst, *TLI)) + if (!hasAnalyzableMemoryWrite(Inst, *TLI)) continue; // eliminateNoopStore will update in iterator, if necessary. @@ -1089,7 +1079,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, continue; // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *AA); + MemoryLocation Loc = getLocForWrite(Inst); // If we didn't get a useful location, fail. if (!Loc.Ptr) @@ -1111,7 +1101,9 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, // // Find out what memory location the dependent instruction stores. Instruction *DepWrite = InstDep.getInst(); - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + if (!hasAnalyzableMemoryWrite(DepWrite, *TLI)) + break; + MemoryLocation DepLoc = getLocForWrite(DepWrite); // If we didn't get a useful location, or if it isn't a size, bail out. if (!DepLoc.Ptr) break; @@ -1255,7 +1247,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA, if (DepWrite == &BB.front()) break; // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + if (isRefSet(AA->getModRefInfo(DepWrite, Loc))) break; InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false, diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 1066dc33007b..342a6d08fa5b 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -142,6 +143,21 @@ unsigned DenseMapInfo::getHashValue(SimpleValue Val) { return hash_combine(Inst->getOpcode(), Pred, LHS, RHS); } + // Hash min/max/abs (cmp + select) to allow for commuted operands. + // Min/max may also have non-canonical compare predicate (eg, the compare for + // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the + // compare. + Value *A, *B; + SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor; + // TODO: We should also detect FP min/max. + if (SPF == SPF_SMIN || SPF == SPF_SMAX || + SPF == SPF_UMIN || SPF == SPF_UMAX || + SPF == SPF_ABS || SPF == SPF_NABS) { + if (A > B) + std::swap(A, B); + return hash_combine(Inst->getOpcode(), SPF, A, B); + } + if (CastInst *CI = dyn_cast(Inst)) return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0)); @@ -200,6 +216,20 @@ bool DenseMapInfo::isEqual(SimpleValue LHS, SimpleValue RHS) { LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate(); } + // Min/max/abs can occur with commuted operands, non-canonical predicates, + // and/or non-canonical operands. + Value *LHSA, *LHSB; + SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor; + // TODO: We should also detect FP min/max. + if (LSPF == SPF_SMIN || LSPF == SPF_SMAX || + LSPF == SPF_UMIN || LSPF == SPF_UMAX || + LSPF == SPF_ABS || LSPF == SPF_NABS) { + Value *RHSA, *RHSB; + SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor; + return (LSPF == RSPF && ((LHSA == RHSA && LHSB == RHSB) || + (LHSA == RHSB && LHSB == RHSA))); + } + return false; } @@ -687,6 +717,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst, &TLI)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); + salvageDebugInfo(*Inst); removeMSSA(Inst); Inst->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 76e295c1ad2e..e2c1eaf58e43 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1299,7 +1299,10 @@ static void reportLoadElim(LoadInst *LI, Value *AvailableValue, /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI) { // non-local speculations are not allowed under asan. - if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress)) + if (LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeAddress) || + LI->getParent()->getParent()->hasFnAttribute( + Attribute::SanitizeHWAddress)) return false; // Step 1: Find the non-local dependencies of the load. diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 3b551844dc23..026fab5dbd3b 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -648,7 +648,7 @@ class GVNHoist { // track in a CHI. In the PDom walk, there can be values in the // stack which are not control dependent e.g., nested loop. if (si != RenameStack.end() && si->second.size() && - DT->dominates(Pred, si->second.back()->getParent())) { + DT->properlyDominates(Pred, si->second.back()->getParent())) { C.Dest = BB; // Assign the edge C.I = si->second.pop_back_val(); // Assign the argument DEBUG(dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() @@ -795,8 +795,8 @@ class GVNHoist { for (auto IDFB : IDFBlocks) { // TODO: Prune out useless CHI insertions. for (unsigned i = 0; i < V.size(); ++i) { CHIArg C = {VN, nullptr, nullptr}; - if (DT->dominates(IDFB, V[i]->getParent())) { // Ignore spurious PDFs. - // InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i])); + // Ignore spurious PDFs. + if (DT->properlyDominates(IDFB, V[i]->getParent())) { OutValue[IDFB].push_back(C); DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName() << ", for Insn: " << *V[i]); diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 814a62cd7d65..5594c29bbd9f 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -592,12 +592,8 @@ class GVNSink { /// Create a ModelledPHI for each PHI in BB, adding to PHIs. void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, SmallPtrSetImpl &PHIContents) { - for (auto &I : *BB) { - auto *PN = dyn_cast(&I); - if (!PN) - return; - - auto MPHI = ModelledPHI(PN); + for (PHINode &PN : BB->phis()) { + auto MPHI = ModelledPHI(&PN); PHIs.insert(MPHI); for (auto *V : MPHI.getValues()) PHIContents.insert(V); @@ -641,7 +637,7 @@ Optional GVNSink::analyzeInstructionForSinking( DenseMap VNums; for (auto *I : Insts) { uint32_t N = VN.lookupOrAdd(I); - DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n"); if (N == ~0U) return None; VNums[N]++; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index abb50f27f1cc..221fe57581ca 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -48,7 +48,6 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -486,9 +485,8 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { BasicBlock *Header = L->getHeader(); SmallVector PHIs; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast(I); ++I) - PHIs.push_back(PN); + for (PHINode &PN : Header->phis()) + PHIs.push_back(&PN); for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null(&*PHIs[i])) @@ -725,13 +723,12 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { assert(LoopHeader && "Invalid loop"); for (auto *ExitBB : ExitBlocks) { - BasicBlock::iterator BBI = ExitBB->begin(); // If there are no more PHI nodes in this exit block, then no more // values defined inside the loop are used on this path. - while (auto *PN = dyn_cast(BBI++)) { - for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues(); - IncomingValIdx != E; ++IncomingValIdx) { - auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx); + for (PHINode &PN : ExitBB->phis()) { + for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); + IncomingValIdx != E; ++IncomingValIdx) { + auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); // We currently only support loop exits from loop header. If the // incoming block is not loop header, we need to recursively check @@ -756,8 +753,7 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { if (!L->isLoopInvariant(Cond)) continue; - auto *ExitVal = - dyn_cast(PN->getIncomingValue(IncomingValIdx)); + auto *ExitVal = dyn_cast(PN.getIncomingValue(IncomingValIdx)); // Only deal with PHIs. if (!ExitVal) @@ -772,8 +768,8 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { if (PreheaderIdx != -1) { assert(ExitVal->getParent() == LoopHeader && "ExitVal must be in loop header"); - PN->setIncomingValue(IncomingValIdx, - ExitVal->getIncomingValue(PreheaderIdx)); + PN.setIncomingValue(IncomingValIdx, + ExitVal->getIncomingValue(PreheaderIdx)); } } } diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 5c4d55bfbb2b..c8e58a1e93a7 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -179,10 +179,7 @@ class InductiveRangeCheck { OS << " Step: "; Step->print(OS); OS << " End: "; - if (End) - End->print(OS); - else - OS << "(null)"; + End->print(OS); OS << "\n CheckUse: "; getCheckUse()->getUser()->print(OS); OS << " Operand: " << getCheckUse()->getOperandNo() << "\n"; @@ -196,7 +193,7 @@ class InductiveRangeCheck { Use *getCheckUse() const { return CheckUse; } /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If - /// R.getEnd() sle R.getBegin(), then R denotes the empty range. + /// R.getEnd() le R.getBegin(), then R denotes the empty range. class Range { const SCEV *Begin; @@ -394,8 +391,23 @@ void InductiveRangeCheck::extractRangeChecksFromCond( if (!IsAffineIndex) return; + const SCEV *End = nullptr; + // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". + // We can potentially do much better here. + if (Length) + End = SE.getSCEV(Length); + else { + assert(RCKind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); + // So far we can only reach this point for Signed range check. This may + // change in future. In this case we will need to pick Unsigned max for the + // unsigned range check. + unsigned BitWidth = cast(IndexAddRec->getType())->getBitWidth(); + const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); + End = SIntMax; + } + InductiveRangeCheck IRC; - IRC.End = Length ? SE.getSCEV(Length) : nullptr; + IRC.End = End; IRC.Begin = IndexAddRec->getStart(); IRC.Step = IndexAddRec->getStepRecurrence(SE); IRC.CheckUse = &ConditionUse; @@ -1174,13 +1186,9 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, if (OriginalLoop.contains(SBB)) continue; // not an exit block - for (Instruction &I : *SBB) { - auto *PN = dyn_cast(&I); - if (!PN) - break; - - Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); - PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); + for (PHINode &PN : SBB->phis()) { + Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB); + PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB); } } } @@ -1327,16 +1335,12 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of // each of the PHI nodes in the loop header. This feeds into the initial // value of the same PHI nodes if/when we continue execution. - for (Instruction &I : *LS.Header) { - auto *PN = dyn_cast(&I); - if (!PN) - break; - - PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", + for (PHINode &PN : LS.Header->phis()) { + PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy", BranchToContinuation); - NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); - NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), + NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader); + NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch), RRI.ExitSelector); RRI.PHIValuesAtPseudoExit.push_back(NewPHI); } @@ -1348,12 +1352,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( // The latch exit now has a branch from `RRI.ExitSelector' instead of // `LS.Latch'. The PHI nodes need to be updated to reflect that. - for (Instruction &I : *LS.LatchExit) { - if (PHINode *PN = dyn_cast(&I)) - replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); - else - break; - } + for (PHINode &PN : LS.LatchExit->phis()) + replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector); return RRI; } @@ -1362,15 +1362,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs( LoopStructure &LS, BasicBlock *ContinuationBlock, const LoopConstrainer::RewrittenRangeInfo &RRI) const { unsigned PHIIndex = 0; - for (Instruction &I : *LS.Header) { - auto *PN = dyn_cast(&I); - if (!PN) - break; - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) - if (PN->getIncomingBlock(i) == ContinuationBlock) - PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); - } + for (PHINode &PN : LS.Header->phis()) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i) + if (PN.getIncomingBlock(i) == ContinuationBlock) + PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); LS.IndVarStart = RRI.IndVarEnd; } @@ -1381,14 +1376,9 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); BranchInst::Create(LS.Header, Preheader); - for (Instruction &I : *LS.Header) { - auto *PN = dyn_cast(&I); - if (!PN) - break; - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) - replacePHIBlock(PN, OldPreheader, Preheader); - } + for (PHINode &PN : LS.Header->phis()) + for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i) + replacePHIBlock(&PN, OldPreheader, Preheader); return Preheader; } @@ -1685,17 +1675,7 @@ InductiveRangeCheck::computeSafeIterationSpace( const SCEV *M = SE.getMinusSCEV(C, A); const SCEV *Zero = SE.getZero(M->getType()); const SCEV *Begin = ClampedSubstract(Zero, M); - const SCEV *L = nullptr; - - // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". - // We can potentially do much better here. - if (const SCEV *EndLimit = getEnd()) - L = EndLimit; - else { - assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); - L = SIntMax; - } - const SCEV *End = ClampedSubstract(L, M); + const SCEV *End = ClampedSubstract(getEnd(), M); return InductiveRangeCheck::Range(Begin, End); } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 7d66c0f73821..e4591649038e 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -260,7 +260,10 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, switch (II->getIntrinsicID()) { case Intrinsic::amdgcn_atomic_inc: - case Intrinsic::amdgcn_atomic_dec:{ + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: { const ConstantInt *IsVolatile = dyn_cast(II->getArgOperand(4)); if (!IsVolatile || !IsVolatile->isZero()) return false; @@ -289,6 +292,9 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands( case Intrinsic::objectsize: case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_atomic_fadd: + case Intrinsic::amdgcn_atomic_fmin: + case Intrinsic::amdgcn_atomic_fmax: appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), PostorderStack, Visited); break; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index e6cab3f34cf0..4d366e8e3924 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -131,10 +131,11 @@ namespace { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { - if (PrintLVIAfterJumpThreading) - AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); } @@ -148,6 +149,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -188,14 +190,14 @@ JumpThreadingPass::JumpThreadingPass(int T) { // // Given that P(cond == true) = P(cond == true | A) * P(A) + // P(cond == true | B) * P(B) -// we get +// we get: // P(cond == true ) = P(A) + P(cond == true | B) * P(B) // // which gives us: // P(A) is less than P(cond == true), i.e. // P(t == true) <= P(cond == true) // -// In other words, if we know P(cond == true) is unlikely, we know +// In other words, if we know P(cond == true) is unlikely, we know // that P(t == true) is also unlikely. // static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { @@ -278,23 +280,26 @@ bool JumpThreading::runOnFunction(Function &F) { if (skipFunction(F)) return false; auto TLI = &getAnalysis().getTLI(); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto DT = &getAnalysis().getDomTree(); auto LVI = &getAnalysis().getLVI(); auto AA = &getAnalysis().getAAResults(); + DeferredDominance DDT(*DT); std::unique_ptr BFI; std::unique_ptr BPI; - bool HasProfileData = F.getEntryCount().hasValue(); + bool HasProfileData = F.hasProfileData(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; - LVI->printLVI(F, getAnalysis().getDomTree(), - dbgs()); + LVI->printLVI(F, *DT, dbgs()); } return Changed; } @@ -302,37 +307,43 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); + // Get DT analysis before LVI. When LVI is initialized it conditionally adds + // DT if it's available. + auto &DT = AM.getResult(F); auto &LVI = AM.getResult(F); auto &AA = AM.getResult(F); + DeferredDominance DDT(DT); std::unique_ptr BFI; std::unique_ptr BPI; - bool HasProfileData = F.getEntryCount().hasValue(); - if (HasProfileData) { + if (F.hasProfileData()) { LoopInfo LI{DominatorTree(F)}; BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI), - std::move(BPI)); + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DDT, HasProfileData, + std::move(BFI), std::move(BPI)); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); + PA.preserve(); return PA; } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, AliasAnalysis *AA_, - bool HasProfileData_, + DeferredDominance *DDT_, bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; LVI = LVI_; AA = AA_; + DDT = DDT_; BFI.reset(); BPI.reset(); // When profile data is available, we need to update edge weights after @@ -354,7 +365,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // back edges. This works for normal cases but not for unreachable blocks as // they may have cycle with no back edge. bool EverChanged = false; - EverChanged |= removeUnreachableBlocks(F, LVI); + EverChanged |= removeUnreachableBlocks(F, LVI, DDT); FindLoopHeaders(F); @@ -369,6 +380,10 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, ++I; + // Don't thread branches over a block that's slated for deletion. + if (DDT->pendingDeletedBB(BB)) + continue; + // If the block is trivially dead, zap it. This eliminates the successor // edges which simplifies the CFG. if (pred_empty(BB) && @@ -377,7 +392,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); LVI->eraseBlock(BB); - DeleteDeadBlock(BB); + DeleteDeadBlock(BB, DDT); Changed = true; continue; } @@ -401,7 +416,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, // awesome, but it allows us to use AssertingVH to prevent nasty // dangling pointer issues within LazyValueInfo. LVI->eraseBlock(BB); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB, DDT)) Changed = true; } } @@ -409,6 +424,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, } while (Changed); LoopHeaders.clear(); + DDT->flush(); return EverChanged; } @@ -932,8 +948,8 @@ static bool hasAddressTakenAndUsed(BasicBlock *BB) { bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // If the block is trivially dead, just return and let the caller nuke it. // This simplifies other transformations. - if (pred_empty(BB) && - BB != &BB->getParent()->getEntryBlock()) + if (DDT->pendingDeletedBB(BB) || + (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock())) return false; // If this block has a single predecessor, and if that pred has a single @@ -949,7 +965,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { LoopHeaders.insert(BB); LVI->eraseBlock(SinglePred); - MergeBasicBlockIntoOnlyPred(BB); + MergeBasicBlockIntoOnlyPred(BB, nullptr, DDT); // Now that BB is merged into SinglePred (i.e. SinglePred Code followed by // BB code within one basic block `BB`), we need to invalidate the LVI @@ -1032,18 +1048,23 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa(Condition)) { unsigned BestSucc = GetBestDestForJumpOnUndef(BB); + std::vector Updates; // Fold the branch/switch. TerminatorInst *BBTerm = BB->getTerminator(); + Updates.reserve(BBTerm->getNumSuccessors()); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { if (i == BestSucc) continue; - BBTerm->getSuccessor(i)->removePredecessor(BB, true); + BasicBlock *Succ = BBTerm->getSuccessor(i); + Succ->removePredecessor(BB, true); + Updates.push_back({DominatorTree::Delete, BB, Succ}); } DEBUG(dbgs() << " In block '" << BB->getName() << "' folding undef terminator: " << *BBTerm << '\n'); BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); + DDT->applyUpdates(Updates); return true; } @@ -1054,7 +1075,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB, true); + ConstantFoldTerminator(BB, true, nullptr, DDT); return true; } @@ -1087,7 +1108,8 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { if (Ret != LazyValueInfo::Unknown) { unsigned ToRemove = Ret == LazyValueInfo::True ? 1 : 0; unsigned ToKeep = Ret == LazyValueInfo::True ? 0 : 1; - CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + ToRemoveSucc->removePredecessor(BB, true); BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr); CondBr->eraseFromParent(); if (CondCmp->use_empty()) @@ -1105,6 +1127,7 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { ConstantInt::getFalse(CondCmp->getType()); ReplaceFoldableUses(CondCmp, CI); } + DDT->deleteEdge(BB, ToRemoveSucc); return true; } @@ -1183,9 +1206,12 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) { Optional Implication = isImpliedCondition(PBI->getCondition(), Cond, DL, CondIsTrue); if (Implication) { - BI->getSuccessor(*Implication ? 1 : 0)->removePredecessor(BB); - BranchInst::Create(BI->getSuccessor(*Implication ? 0 : 1), BI); + BasicBlock *KeepSucc = BI->getSuccessor(*Implication ? 0 : 1); + BasicBlock *RemoveSucc = BI->getSuccessor(*Implication ? 1 : 0); + RemoveSucc->removePredecessor(BB); + BranchInst::Create(KeepSucc, BI); BI->eraseFromParent(); + DDT->deleteEdge(BB, RemoveSucc); return true; } CurrentBB = CurrentPred; @@ -1333,6 +1359,20 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // code size. BasicBlock *UnavailablePred = nullptr; + // If the value is unavailable in one of predecessors, we will end up + // inserting a new instruction into them. It is only valid if all the + // instructions before LI are guaranteed to pass execution to its successor, + // or if LI is safe to speculate. + // TODO: If this logic becomes more complex, and we will perform PRE insertion + // farther than to a predecessor, we need to reuse the code from GVN's PRE. + // It requires domination tree analysis, so for this simple case it is an + // overkill. + if (PredsScanned.size() != AvailablePreds.size() && + !isSafeToSpeculativelyExecute(LI)) + for (auto I = LoadBB->begin(); &*I != LI; ++I) + if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) + return false; + // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an // unconditional branch, we know that it isn't a critical edge. @@ -1578,17 +1618,22 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredWithKnownDest == (size_t)std::distance(pred_begin(BB), pred_end(BB))) { bool SeenFirstBranchToOnlyDest = false; + std::vector Updates; + Updates.reserve(BB->getTerminator()->getNumSuccessors() - 1); for (BasicBlock *SuccBB : successors(BB)) { - if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) { SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. - else + } else { SuccBB->removePredecessor(BB, true); // This is unreachable successor. + Updates.push_back({DominatorTree::Delete, BB, SuccBB}); + } } // Finally update the terminator. TerminatorInst *Term = BB->getTerminator(); BranchInst::Create(OnlyDest, Term); Term->eraseFromParent(); + DDT->applyUpdates(Updates); // If the condition is now dead due to the removal of the old terminator, // erase it. @@ -1787,11 +1832,10 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, BasicBlock *OldPred, BasicBlock *NewPred, DenseMap &ValueMap) { - for (BasicBlock::iterator PNI = PHIBB->begin(); - PHINode *PN = dyn_cast(PNI); ++PNI) { + for (PHINode &PN : PHIBB->phis()) { // Ok, we have a PHI node. Figure out what the incoming value was for the // DestBlock. - Value *IV = PN->getIncomingValueForBlock(OldPred); + Value *IV = PN.getIncomingValueForBlock(OldPred); // Remap the value if necessary. if (Instruction *Inst = dyn_cast(IV)) { @@ -1800,7 +1844,7 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, IV = I->second; } - PN->addIncoming(IV, NewPred); + PN.addIncoming(IV, NewPred); } } @@ -1952,6 +1996,10 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, PredTerm->setSuccessor(i, NewBB); } + DDT->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}, + {DominatorTree::Insert, PredBB, NewBB}, + {DominatorTree::Delete, PredBB, BB}}); + // At this point, the IR is fully up to date and consistent. Do a quick scan // over the new instructions and zap any that are constants or dead. This // frequently happens because of phi translation. @@ -1971,20 +2019,42 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB, BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB, ArrayRef Preds, const char *Suffix) { + SmallVector NewBBs; + // Collect the frequencies of all predecessors of BB, which will be used to - // update the edge weight on BB->SuccBB. - BlockFrequency PredBBFreq(0); + // update the edge weight of the result of splitting predecessors. + DenseMap FreqMap; if (HasProfileData) for (auto Pred : Preds) - PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB); + FreqMap.insert(std::make_pair( + Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB))); + + // In the case when BB is a LandingPad block we create 2 new predecessors + // instead of just one. + if (BB->isLandingPad()) { + std::string NewName = std::string(Suffix) + ".split-lp"; + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs); + } else { + NewBBs.push_back(SplitBlockPredecessors(BB, Preds, Suffix)); + } - BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix); + std::vector Updates; + Updates.reserve((2 * Preds.size()) + NewBBs.size()); + for (auto NewBB : NewBBs) { + BlockFrequency NewBBFreq(0); + Updates.push_back({DominatorTree::Insert, NewBB, BB}); + for (auto Pred : predecessors(NewBB)) { + Updates.push_back({DominatorTree::Delete, Pred, BB}); + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + if (HasProfileData) // Update frequencies between Pred -> NewBB. + NewBBFreq += FreqMap.lookup(Pred); + } + if (HasProfileData) // Apply the summed frequency to NewBB. + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } - // Set the block frequency of the newly created PredBB, which is the sum of - // frequencies of Preds. - if (HasProfileData) - BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency()); - return PredBB; + DDT->applyUpdates(Updates); + return NewBBs[0]; } bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) { @@ -2128,6 +2198,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( } // And finally, do it! Start by factoring the predecessors if needed. + std::vector Updates; BasicBlock *PredBB; if (PredBBs.size() == 1) PredBB = PredBBs[0]; @@ -2136,6 +2207,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( << " common predecessors.\n"); PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm"); } + Updates.push_back({DominatorTree::Delete, PredBB, BB}); // Okay, we decided to do this! Clone all the instructions in BB onto the end // of PredBB. @@ -2148,7 +2220,11 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( BranchInst *OldPredBranch = dyn_cast(PredBB->getTerminator()); if (!OldPredBranch || !OldPredBranch->isUnconditional()) { - PredBB = SplitEdge(PredBB, BB); + BasicBlock *OldPredBB = PredBB; + PredBB = SplitEdge(OldPredBB, BB); + Updates.push_back({DominatorTree::Insert, OldPredBB, PredBB}); + Updates.push_back({DominatorTree::Insert, PredBB, BB}); + Updates.push_back({DominatorTree::Delete, OldPredBB, BB}); OldPredBranch = cast(PredBB->getTerminator()); } @@ -2190,6 +2266,10 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Otherwise, insert the new instruction into the block. New->setName(BI->getName()); PredBB->getInstList().insert(OldPredBranch->getIterator(), New); + // Update Dominance from simplified New instruction operands. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (BasicBlock *SuccBB = dyn_cast(New->getOperand(i))) + Updates.push_back({DominatorTree::Insert, PredBB, SuccBB}); } } @@ -2245,6 +2325,7 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // Remove the unconditional branch at the end of the PredBB block. OldPredBranch->eraseFromParent(); + DDT->applyUpdates(Updates); ++NumDupes; return true; @@ -2317,6 +2398,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { // The select is now dead. SI->eraseFromParent(); + DDT->applyUpdates({{DominatorTree::Insert, NewBB, BB}, + {DominatorTree::Insert, Pred, NewBB}}); // Update any other PHI nodes in BB. for (BasicBlock::iterator BI = BB->begin(); PHINode *Phi = dyn_cast(BI); ++BI) @@ -2395,11 +2478,25 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { // Expand the select. TerminatorInst *Term = SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + BasicBlock *SplitBB = SI->getParent(); + BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); NewPN->addIncoming(SI->getTrueValue(), Term->getParent()); NewPN->addIncoming(SI->getFalseValue(), BB); SI->replaceAllUsesWith(NewPN); SI->eraseFromParent(); + // NewBB and SplitBB are newly created blocks which require insertion. + std::vector Updates; + Updates.reserve((2 * SplitBB->getTerminator()->getNumSuccessors()) + 3); + Updates.push_back({DominatorTree::Insert, BB, SplitBB}); + Updates.push_back({DominatorTree::Insert, BB, NewBB}); + Updates.push_back({DominatorTree::Insert, NewBB, SplitBB}); + // BB's successors were moved to SplitBB, update DDT accordingly. + for (auto *Succ : successors(SplitBB)) { + Updates.push_back({DominatorTree::Delete, BB, Succ}); + Updates.push_back({DominatorTree::Insert, SplitBB, Succ}); + } + DDT->applyUpdates(Updates); return true; } return false; @@ -2486,8 +2583,8 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, if (!TrueDestIsSafe && !FalseDestIsSafe) return false; - BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; - BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredUnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest; + BasicBlock *PredGuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest; ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); @@ -2496,18 +2593,29 @@ bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, return false; // Duplicate all instructions before the guard and the guard itself to the // branch where implication is not proved. - GuardedBlock = DuplicateInstructionsInSplitBetween( - BB, GuardedBlock, AfterGuard, GuardedMapping); + BasicBlock *GuardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredGuardedBlock, AfterGuard, GuardedMapping); assert(GuardedBlock && "Could not create the guarded block?"); // Duplicate all instructions before the guard in the unguarded branch. // Since we have successfully duplicated the guarded block and this block // has fewer instructions, we expect it to succeed. - UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock, - Guard, UnguardedMapping); + BasicBlock *UnguardedBlock = DuplicateInstructionsInSplitBetween( + BB, PredUnguardedBlock, Guard, UnguardedMapping); assert(UnguardedBlock && "Could not create the unguarded block?"); DEBUG(dbgs() << "Moved guard " << *Guard << " to block " << GuardedBlock->getName() << "\n"); - + // DuplicateInstructionsInSplitBetween inserts a new block "BB.split" between + // PredBB and BB. We need to perform two inserts and one delete for each of + // the above calls to update Dominators. + DDT->applyUpdates( + {// Guarded block split. + {DominatorTree::Delete, PredGuardedBlock, BB}, + {DominatorTree::Insert, PredGuardedBlock, GuardedBlock}, + {DominatorTree::Insert, GuardedBlock, BB}, + // Unguarded block split. + {DominatorTree::Delete, PredUnguardedBlock, BB}, + {DominatorTree::Insert, PredUnguardedBlock, UnguardedBlock}, + {DominatorTree::Insert, UnguardedBlock, BB}}); // Some instructions before the guard may still have uses. For them, we need // to create Phi nodes merging their copies in both guarded and unguarded // branches. Those instructions that have no uses can be just removed. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index f610aae2403b..4ea935793b80 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -90,14 +90,15 @@ static cl::opt MaxNumUsesTraversed( "invariance in loop using invariant start (default = 8)")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo); +static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, bool &FreeInLoop); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, bool FreeInLoop); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -115,7 +116,8 @@ CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA, + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST); DenseMap &getLoopToAliasSetMap() { @@ -159,6 +161,8 @@ struct LegacyLICMPass : public LoopPass { &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), &getAnalysis().getTLI(), + &getAnalysis().getTTI( + *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE, false); } @@ -170,6 +174,7 @@ struct LegacyLICMPass : public LoopPass { AU.addRequired(); if (EnableMSSALoopDependency) AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -210,8 +215,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, "cached at a higher level"); LoopInvariantCodeMotion LICM; - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE, - true)) + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, + AR.MSSA, ORE, true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -224,6 +229,7 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -236,12 +242,10 @@ Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } /// We should delete AST for inner loops in the new pass manager to avoid /// memory leak. /// -bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, - LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, - ScalarEvolution *SE, MemorySSA *MSSA, - OptimizationRemarkEmitter *ORE, - bool DeleteAST) { +bool LoopInvariantCodeMotion::runOnLoop( + Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, + MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -266,7 +270,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, @@ -359,7 +363,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + DominatorTree *DT, TargetLibraryInfo *TLI, + TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { @@ -400,12 +405,15 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && + bool FreeInLoop = false; + if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { - if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) { - ++II; - CurAST->deleteValue(&I); - I.eraseFromParent(); + if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) { + if (!FreeInLoop) { + ++II; + CurAST->deleteValue(&I); + I.eraseFromParent(); + } Changed = true; } } @@ -708,13 +716,40 @@ static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) { return true; } +/// Return true if the instruction is free in the loop. +static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, + const TargetTransformInfo *TTI) { + + if (const GetElementPtrInst *GEP = dyn_cast(&I)) { + if (TTI->getUserCost(GEP) != TargetTransformInfo::TCC_Free) + return false; + // For a GEP, we cannot simply use getUserCost because currently it + // optimistically assume that a GEP will fold into addressing mode + // regardless of its users. + const BasicBlock *BB = GEP->getParent(); + for (const User *U : GEP->users()) { + const Instruction *UI = cast(U); + if (CurLoop->contains(UI) && + (BB != UI->getParent() || + (!isa(UI) && !isa(UI)))) + return false; + } + return true; + } else + return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free; +} + /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo) { +/// We also return true if the instruction could be folded away in lowering. +/// (e.g., a GEP can be folded into a load as an addressing mode in the loop). +static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, bool &FreeInLoop) { const auto &BlockColors = SafetyInfo->BlockColors; + bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { @@ -731,8 +766,13 @@ static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, return false; } - if (CurLoop->contains(UI)) + if (CurLoop->contains(UI)) { + if (IsFree) { + FreeInLoop = true; + continue; + } return false; + } } return true; } @@ -888,7 +928,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, bool FreeInLoop) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) @@ -900,7 +940,6 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, else if (isa(I)) ++NumMovedCalls; ++NumSunk; - Changed = true; // Iterate over users to be ready for actual sinking. Replace users via // unrechable blocks with undef and make all user PHIs trivially replcable. @@ -910,11 +949,12 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Use &U = UI.getUse(); ++UI; - if (VisitedUsers.count(User)) + if (VisitedUsers.count(User) || CurLoop->contains(User)) continue; if (!DT->isReachableFromEntry(User->getParent())) { U = UndefValue::get(I.getType()); + Changed = true; continue; } @@ -927,6 +967,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); + Changed = true; continue; } @@ -935,7 +976,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, continue; if (!canSplitPredecessors(PN)) - return false; + return Changed; // Split predecessors of the PHI so that we can make users trivially // replacable. @@ -947,6 +988,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, UE = I.user_end(); } + if (VisitedUsers.empty()) + return Changed; + #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -960,9 +1004,14 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. - while (!I.use_empty()) { - Value::user_iterator UI = I.user_begin(); - PHINode *PN = cast(*UI); + SmallSetVector Users(I.user_begin(), I.user_end()); + for (auto *UI : Users) { + auto *User = cast(UI); + + if (CurLoop->contains(User)) + continue; + + PHINode *PN = cast(User); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); // The PHI must be trivially replacable. @@ -970,6 +1019,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, SafetyInfo, CurLoop); PN->replaceAllUsesWith(New); PN->eraseFromParent(); + Changed = true; } return Changed; } diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 3b5b9c99a3c0..24150b1e4711 100644 --- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -18,25 +18,20 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -76,7 +71,7 @@ class LoopDataPrefetch { private: bool runOnLoop(Loop *L); - /// \brief Check if the the stride of the accesses is large enough to + /// \brief Check if the stride of the accesses is large enough to /// warrant a prefetch. bool isStrideLargeEnough(const SCEVAddRecExpr *AR); @@ -280,7 +275,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { if (!LSCEVAddRec) continue; - // Check if the the stride of the accesses is large enough to warrant a + // Check if the stride of the accesses is large enough to warrant a // prefetch. if (!isStrideLargeEnough(LSCEVAddRec)) continue; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 82604a8842bf..15cd1086f209 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -49,11 +49,10 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, // must pass through a PHI in the exit block, meaning that this check is // sufficient to guarantee that no loop-variant values are used outside // of the loop. - BasicBlock::iterator BI = ExitBlock->begin(); bool AllEntriesInvariant = true; bool AllOutgoingValuesSame = true; - while (PHINode *P = dyn_cast(BI)) { - Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + for (PHINode &P : ExitBlock->phis()) { + Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]); // Make sure all exiting blocks produce the same incoming value for the exit // block. If there are different incoming values for different exiting @@ -61,7 +60,7 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, // be used. AllOutgoingValuesSame = all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) { - return incoming == P->getIncomingValueForBlock(BB); + return incoming == P.getIncomingValueForBlock(BB); }); if (!AllOutgoingValuesSame) @@ -72,8 +71,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, AllEntriesInvariant = false; break; } - - ++BI; } if (Changed) @@ -162,11 +159,9 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, if (ExitBlock && isLoopNeverExecuted(L)) { DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); // Set incoming value to undef for phi nodes in the exit block. - BasicBlock::iterator BI = ExitBlock->begin(); - while (PHINode *P = dyn_cast(BI)) { - for (unsigned i = 0; i < P->getNumIncomingValues(); i++) - P->setIncomingValue(i, UndefValue::get(P->getType())); - BI++; + for (PHINode &P : ExitBlock->phis()) { + std::fill(P.incoming_values().begin(), P.incoming_values().end(), + UndefValue::get(P.getType())); } deleteDeadLoop(L, &DT, &SE, &LI); ++NumDeleted; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 7234b97f64d4..21551f0a0825 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -334,13 +334,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() { return MadeChange; } -static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) { - uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType()); - assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) && - "Don't overflow unsigned."); - return (unsigned)SizeInBits >> 3; -} - static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) { const SCEVConstant *ConstStride = cast(StoreEv->getOperand(1)); return ConstStride->getAPInt(); @@ -458,7 +451,7 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); if (StoreSize != Stride && StoreSize != -Stride) return LegalStoreKind::None; @@ -597,7 +590,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, const SCEVAddRecExpr *FirstStoreEv = cast(SE->getSCEV(FirstStorePtr)); APInt FirstStride = getStoreStride(FirstStoreEv); - unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); + unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); // See if we can optimize just this store in isolation. if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { @@ -690,7 +683,7 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, break; AdjacentStores.insert(I); - StoreSize += getStoreSizeInBytes(I, DL); + StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType()); // Move to the next value in the chain. I = ConsecutiveChain[I]; } @@ -795,7 +788,8 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, ++BI) for (Instruction &I : **BI) if (IgnoredStores.count(&I) == 0 && - (AA.getModRefInfo(&I, StoreLoc) & Access)) + isModOrRefSet( + intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) return true; return false; @@ -893,8 +887,8 @@ bool LoopIdiomRecognize::processLoopStridedStore( // base pointer and checking the region. Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator()); - if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, - *AA, Stores)) { + if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, + StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); @@ -964,7 +958,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); + unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. @@ -1003,7 +997,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, SmallPtrSet Stores; Stores.insert(SI); - if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, + if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. @@ -1023,8 +1017,8 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); - if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Stores)) { + if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, + StoreSize, *AA, Stores)) { Expander.clear(); // If we generated new code for the base pointer, clean up. RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 52dea3254e79..2e4c7b19e476 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -98,60 +98,79 @@ // Note that we can use anything stronger than M, i.e. any condition which // implies M. // -// For now the transformation is limited to the following case: +// When S = 1 (i.e. forward iterating loop), the transformation is supported +// when: // * The loop has a single latch with the condition of the form: // B(X) = latchStart + X latchLimit, // where is u<, u<=, s<, or s<=. -// * The step of the IV used in the latch condition is 1. // * The guard condition is of the form // G(X) = guardStart + X u< guardLimit // -// For the ult latch comparison case M is: -// forall X . guardStart + X u< guardLimit && latchStart + X -// guardStart + X + 1 u< guardLimit -// -// The only way the antecedent can be true and the consequent can be false is -// if -// X == guardLimit - 1 - guardStart -// (and guardLimit is non-zero, but we won't use this latter fact). -// If X == guardLimit - 1 - guardStart then the second half of the antecedent is -// latchStart + guardLimit - 1 - guardStart u< latchLimit -// and its negation is -// latchStart + guardLimit - 1 - guardStart u>= latchLimit -// -// In other words, if -// latchLimit u<= latchStart + guardLimit - 1 - guardStart -// then: -// (the ranges below are written in ConstantRange notation, where [A, B) is the -// set for (I = A; I != B; I++ /*maywrap*/) yield(I);) -// -// forall X . guardStart + X u< guardLimit && -// latchStart + X u< latchLimit => -// guardStart + X + 1 u< guardLimit -// == forall X . guardStart + X u< guardLimit && -// latchStart + X u< latchStart + guardLimit - 1 - guardStart => -// guardStart + X + 1 u< guardLimit -// == forall X . (guardStart + X) in [0, guardLimit) && -// (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => -// (guardStart + X + 1) in [0, guardLimit) -// == forall X . X in [-guardStart, guardLimit - guardStart) && -// X in [-latchStart, guardLimit - 1 - guardStart) => -// X in [-guardStart - 1, guardLimit - guardStart - 1) -// == true -// -// So the widened condition is: -// guardStart u< guardLimit && -// latchStart + guardLimit - 1 - guardStart u>= latchLimit -// Similarly for ule condition the widened condition is: -// guardStart u< guardLimit && -// latchStart + guardLimit - 1 - guardStart u> latchLimit -// For slt condition the widened condition is: -// guardStart u< guardLimit && -// latchStart + guardLimit - 1 - guardStart s>= latchLimit -// For sle condition the widened condition is: -// guardStart u< guardLimit && -// latchStart + guardLimit - 1 - guardStart s> latchLimit +// For the ult latch comparison case M is: +// forall X . guardStart + X u< guardLimit && latchStart + X +// guardStart + X + 1 u< guardLimit // +// The only way the antecedent can be true and the consequent can be false is +// if +// X == guardLimit - 1 - guardStart +// (and guardLimit is non-zero, but we won't use this latter fact). +// If X == guardLimit - 1 - guardStart then the second half of the antecedent is +// latchStart + guardLimit - 1 - guardStart u< latchLimit +// and its negation is +// latchStart + guardLimit - 1 - guardStart u>= latchLimit +// +// In other words, if +// latchLimit u<= latchStart + guardLimit - 1 - guardStart +// then: +// (the ranges below are written in ConstantRange notation, where [A, B) is the +// set for (I = A; I != B; I++ /*maywrap*/) yield(I);) +// +// forall X . guardStart + X u< guardLimit && +// latchStart + X u< latchLimit => +// guardStart + X + 1 u< guardLimit +// == forall X . guardStart + X u< guardLimit && +// latchStart + X u< latchStart + guardLimit - 1 - guardStart => +// guardStart + X + 1 u< guardLimit +// == forall X . (guardStart + X) in [0, guardLimit) && +// (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => +// (guardStart + X + 1) in [0, guardLimit) +// == forall X . X in [-guardStart, guardLimit - guardStart) && +// X in [-latchStart, guardLimit - 1 - guardStart) => +// X in [-guardStart - 1, guardLimit - guardStart - 1) +// == true +// +// So the widened condition is: +// guardStart u< guardLimit && +// latchStart + guardLimit - 1 - guardStart u>= latchLimit +// Similarly for ule condition the widened condition is: +// guardStart u< guardLimit && +// latchStart + guardLimit - 1 - guardStart u> latchLimit +// For slt condition the widened condition is: +// guardStart u< guardLimit && +// latchStart + guardLimit - 1 - guardStart s>= latchLimit +// For sle condition the widened condition is: +// guardStart u< guardLimit && +// latchStart + guardLimit - 1 - guardStart s> latchLimit +// +// When S = -1 (i.e. reverse iterating loop), the transformation is supported +// when: +// * The loop has a single latch with the condition of the form: +// B(X) = X latchLimit, where is u> or s>. +// * The guard condition is of the form +// G(X) = X - 1 u< guardLimit +// +// For the ugt latch comparison case M is: +// forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit +// +// The only way the antecedent can be true and the consequent can be false is if +// X == 1. +// If X == 1 then the second half of the antecedent is +// 1 u> latchLimit, and its negation is latchLimit u>= 1. +// +// So the widened condition is: +// guardStart u< guardLimit && latchLimit u>= 1. +// Similarly for sgt condition the widened condition is: +// guardStart u< guardLimit && latchLimit s>= 1. //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" @@ -177,6 +196,8 @@ using namespace llvm; static cl::opt EnableIVTruncation("loop-predication-enable-iv-truncation", cl::Hidden, cl::init(true)); +static cl::opt EnableCountDownLoop("loop-predication-enable-count-down-loop", + cl::Hidden, cl::init(true)); namespace { class LoopPredication { /// Represents an induction variable check: @@ -223,7 +244,10 @@ class LoopPredication { LoopICmp RangeCheck, SCEVExpander &Expander, IRBuilder<> &Builder); - + Optional widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, + LoopICmp RangeCheck, + SCEVExpander &Expander, + IRBuilder<> &Builder); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); // When the IV type is wider than the range operand type, we can still do loop @@ -360,7 +384,7 @@ LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) { } bool LoopPredication::isSupportedStep(const SCEV* Step) { - return Step->isOne(); + return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); } bool LoopPredication::CanExpand(const SCEV* S) { @@ -420,6 +444,44 @@ Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( GuardStart, GuardLimit, InsertAt); return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } + +Optional LoopPredication::widenICmpRangeCheckDecrementingLoop( + LoopPredication::LoopICmp LatchCheck, LoopPredication::LoopICmp RangeCheck, + SCEVExpander &Expander, IRBuilder<> &Builder) { + auto *Ty = RangeCheck.IV->getType(); + const SCEV *GuardStart = RangeCheck.IV->getStart(); + const SCEV *GuardLimit = RangeCheck.Limit; + const SCEV *LatchLimit = LatchCheck.Limit; + if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || + !CanExpand(LatchLimit)) { + DEBUG(dbgs() << "Can't expand limit check!\n"); + return None; + } + // The decrement of the latch check IV should be the same as the + // rangeCheckIV. + auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE); + if (RangeCheck.IV != PostDecLatchCheckIV) { + DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: " + << *PostDecLatchCheckIV + << " and RangeCheckIV: " << *RangeCheck.IV << "\n"); + return None; + } + + // Generate the widened condition for CountDownLoop: + // guardStart u< guardLimit && + // latchLimit 1. + // See the header comment for reasoning of the checks. + Instruction *InsertAt = Preheader->getTerminator(); + auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) + ? ICmpInst::ICMP_SGE + : ICmpInst::ICMP_UGE; + auto *FirstIterationCheck = expandCheck(Expander, Builder, ICmpInst::ICMP_ULT, + GuardStart, GuardLimit, InsertAt); + auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, + SE->getOne(Ty), InsertAt); + return Builder.CreateAnd(FirstIterationCheck, LimitCheck); +} + /// If ICI can be widened to a loop invariant condition emits the loop /// invariant condition in the loop preheader and return it, otherwise /// returns None. @@ -467,13 +529,24 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, } LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; - // At this point the range check step and latch step should have the same - // value and type. - assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) && - "Range and latch should have same step recurrence!"); + // At this point, the range and latch step should have the same type, but need + // not have the same value (we support both 1 and -1 steps). + assert(Step->getType() == + CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() && + "Range and latch steps should be of same type!"); + if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { + DEBUG(dbgs() << "Range and latch have different step values!\n"); + return None; + } - return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, - Expander, Builder); + if (Step->isOne()) + return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, + Expander, Builder); + else { + assert(Step->isAllOnesValue() && "Step should be -1!"); + return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck, + Expander, Builder); + } } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -580,9 +653,13 @@ Optional LoopPredication::parseLoopLatchICmp() { } auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { - assert(Step->isOne() && "expected Step to be one!"); - return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && - Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; + if (Step->isOne()) { + return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && + Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; + } else { + assert(Step->isAllOnesValue() && "Step should be -1!"); + return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT; + } }; if (IsUnsupportedPredicate(Step, Result->Pred)) { diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index a91f53ba663f..0f35fccbe663 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -268,7 +268,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (!OrigPreheader) + if (!OrigPreheader || !L->hasDedicatedExits()) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index c9d55b4594fe..430a7085d93f 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -247,7 +247,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // Enable LoopSink only when runtime profile is available. // With static profile, the sinking decision may be sub-optimal. - if (!Preheader->getParent()->getEntryCount()) + if (!Preheader->getParent()->hasProfileData()) return false; const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a161c839b8d8..332c074a1dfd 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -442,7 +442,7 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { canonicalize(*L); } -/// \brief Check whether or not this formula statisfies the canonical +/// \brief Check whether or not this formula satisfies the canonical /// representation. /// \see Formula::BaseRegs. bool Formula::isCanonical(const Loop &L) const { @@ -777,7 +777,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { /// Returns true if the specified instruction is using the specified value as an /// address. -static bool isAddressUse(Instruction *Inst, Value *OperandVal) { +static bool isAddressUse(const TargetTransformInfo &TTI, + Instruction *Inst, Value *OperandVal) { bool isAddress = isa(Inst); if (StoreInst *SI = dyn_cast(Inst)) { if (SI->getPointerOperand() == OperandVal) @@ -786,18 +787,24 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { // Addressing modes can also be folded into prefetches and a variety // of intrinsics. switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::memset: - case Intrinsic::prefetch: - if (II->getArgOperand(0) == OperandVal) - isAddress = true; - break; - case Intrinsic::memmove: - case Intrinsic::memcpy: - if (II->getArgOperand(0) == OperandVal || - II->getArgOperand(1) == OperandVal) + case Intrinsic::memset: + case Intrinsic::prefetch: + if (II->getArgOperand(0) == OperandVal) + isAddress = true; + break; + case Intrinsic::memmove: + case Intrinsic::memcpy: + if (II->getArgOperand(0) == OperandVal || + II->getArgOperand(1) == OperandVal) + isAddress = true; + break; + default: { + MemIntrinsicInfo IntrInfo; + if (TTI.getTgtMemIntrinsic(II, IntrInfo)) { + if (IntrInfo.PtrVal == OperandVal) isAddress = true; - break; + } + } } } else if (AtomicRMWInst *RMW = dyn_cast(Inst)) { if (RMW->getPointerOperand() == OperandVal) @@ -810,7 +817,8 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { } /// Return the type of the memory being accessed. -static MemAccessTy getAccessType(const Instruction *Inst) { +static MemAccessTy getAccessType(const TargetTransformInfo &TTI, + Instruction *Inst) { MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace); if (const StoreInst *SI = dyn_cast(Inst)) { AccessTy.MemTy = SI->getOperand(0)->getType(); @@ -821,6 +829,21 @@ static MemAccessTy getAccessType(const Instruction *Inst) { AccessTy.AddrSpace = RMW->getPointerAddressSpace(); } else if (const AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); + } else if (IntrinsicInst *II = dyn_cast(Inst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::prefetch: + AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); + break; + default: { + MemIntrinsicInfo IntrInfo; + if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) { + AccessTy.AddrSpace + = IntrInfo.PtrVal->getType()->getPointerAddressSpace(); + } + + break; + } + } } // All pointers have the same requirements, so canonicalize them to an @@ -834,12 +857,11 @@ static MemAccessTy getAccessType(const Instruction *Inst) { /// Return true if this AddRec is already a phi in its loop. static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { - for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast(I); ++I) { - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == + for (PHINode &PN : AR->getLoop()->getHeader()->phis()) { + if (SE.isSCEVable(PN.getType()) && + (SE.getEffectiveSCEVType(PN.getType()) == SE.getEffectiveSCEVType(AR->getType())) && - SE.getSCEV(PN) == AR) + SE.getSCEV(&PN) == AR) return true; } return false; @@ -915,7 +937,7 @@ static bool isHighCostExpansion(const SCEV *S, return true; } -/// If any of the instructions is the specified set are trivially dead, delete +/// If any of the instructions in the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { @@ -1025,7 +1047,7 @@ class Cost { ScalarEvolution &SE, DominatorTree &DT, SmallPtrSetImpl *LoserRegs); }; - + /// An operand value in an instruction which is to be replaced with some /// equivalent, possibly strength-reduced, replacement. struct LSRFixup { @@ -1149,7 +1171,7 @@ class LSRUse { if (f.Offset < MinOffset) MinOffset = f.Offset; } - + bool HasFormulaWithSameRegs(const Formula &F) const; float getNotSelectedProbability(const SCEV *Reg) const; bool InsertFormula(const Formula &F, const Loop &L); @@ -2362,7 +2384,7 @@ LSRInstance::OptimizeLoopTermCond() { C->getValue().isMinSignedValue()) goto decline_post_inc; // Check for possible scaled-address reuse. - MemAccessTy AccessTy = getAccessType(UI->getUser()); + MemAccessTy AccessTy = getAccessType(TTI, UI->getUser()); int64_t Scale = C->getSExtValue(); if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, /*BaseOffset=*/0, @@ -2990,15 +3012,14 @@ void LSRInstance::CollectChains() { } // Continue walking down the instructions. } // Continue walking down the domtree. // Visit phi backedges to determine if the chain can generate the IV postinc. - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast(I); ++I) { - if (!SE.isSCEVable(PN->getType())) + for (PHINode &PN : L->getHeader()->phis()) { + if (!SE.isSCEVable(PN.getType())) continue; Instruction *IncV = - dyn_cast(PN->getIncomingValueForBlock(L->getLoopLatch())); + dyn_cast(PN.getIncomingValueForBlock(L->getLoopLatch())); if (IncV) - ChainInstruction(PN, IncV, ChainUsersVec); + ChainInstruction(&PN, IncV, ChainUsersVec); } // Remove any unprofitable chains. unsigned ChainIdx = 0; @@ -3032,13 +3053,13 @@ void LSRInstance::FinalizeChain(IVChain &Chain) { static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI) { const SCEVConstant *IncConst = dyn_cast(IncExpr); - if (!IncConst || !isAddressUse(UserInst, Operand)) + if (!IncConst || !isAddressUse(TTI, UserInst, Operand)) return false; if (IncConst->getAPInt().getMinSignedBits() > 64) return false; - MemAccessTy AccessTy = getAccessType(UserInst); + MemAccessTy AccessTy = getAccessType(TTI, UserInst); int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, IncOffset, /*HaseBaseReg=*/false)) @@ -3129,12 +3150,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // If LSR created a new, wider phi, we may also replace its postinc. We only // do this if we also found a wide value for the head of the chain. if (isa(Chain.tailUserInst())) { - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *Phi = dyn_cast(I); ++I) { - if (!isCompatibleIVType(Phi, IVSrc)) + for (PHINode &Phi : L->getHeader()->phis()) { + if (!isCompatibleIVType(&Phi, IVSrc)) continue; Instruction *PostIncV = dyn_cast( - Phi->getIncomingValueForBlock(L->getLoopLatch())); + Phi.getIncomingValueForBlock(L->getLoopLatch())); if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) continue; Value *IVOper = IVSrc; @@ -3145,7 +3165,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); } - Phi->replaceUsesOfWith(PostIncV, IVOper); + Phi.replaceUsesOfWith(PostIncV, IVOper); DeadInsts.emplace_back(PostIncV); } } @@ -3165,14 +3185,14 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LSRUse::KindType Kind = LSRUse::Basic; MemAccessTy AccessTy; - if (isAddressUse(UserInst, U.getOperandValToReplace())) { + if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { Kind = LSRUse::Address; - AccessTy = getAccessType(UserInst); + AccessTy = getAccessType(TTI, UserInst); } const SCEV *S = IU.getExpr(U); PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); - + // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work // with rather than just N or i, so we can consider the register @@ -4304,7 +4324,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { LUThatHas->pushFixup(Fixup); DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } - + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 7b1d6446a24a..15e7da5e1a7a 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -882,7 +882,7 @@ static bool computeUnrollCount( } // Check if the runtime trip count is too small when profile is available. - if (L->getHeader()->getParent()->getEntryCount()) { + if (L->getHeader()->getParent()->hasProfileData()) { if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) { if (*ProfileTripCount < FlatLoopTripCountThreshold) return false; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index bd468338a1d0..f2405d9b0c03 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1274,12 +1274,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // If the successor of the exit block had PHI nodes, add an entry for // NewExit. - for (BasicBlock::iterator I = ExitSucc->begin(); - PHINode *PN = dyn_cast(I); ++I) { - Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); + for (PHINode &PN : ExitSucc->phis()) { + Value *V = PN.getIncomingValueForBlock(ExitBlocks[i]); ValueToValueMapTy::iterator It = VMap.find(V); if (It != VMap.end()) V = It->second; - PN->addIncoming(V, NewExit); + PN.addIncoming(V, NewExit); } if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { @@ -1496,10 +1495,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, BranchInst::Create(Abort, OldSISucc, ConstantInt::getTrue(Context), NewSISucc); // Release the PHI operands for this edge. - for (BasicBlock::iterator II = NewSISucc->begin(); - PHINode *PN = dyn_cast(II); ++II) - PN->setIncomingValue(PN->getBasicBlockIndex(Switch), - UndefValue::get(PN->getType())); + for (PHINode &PN : NewSISucc->phis()) + PN.setIncomingValue(PN.getBasicBlockIndex(Switch), + UndefValue::get(PN.getType())); // Tell the domtree about the new block. We don't fully update the // domtree here -- instead we force it to do a full recomputation // after the pass is complete -- but we do need to inform it of diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 6f77c5bd0d07..c165c5ece95c 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Scalar/LowerAtomic.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a4b4330bfedb..9c870b42a747 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -518,7 +518,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, const LoadInst *LI) { // If the store alias this position, early bail out. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo(P, StoreLoc))) return false; // Keep track of the arguments of all instruction we plan to lift @@ -542,20 +542,20 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { auto *C = &*I; - bool MayAlias = AA.getModRefInfo(C, None) != MRI_NoModRef; + bool MayAlias = isModOrRefSet(AA.getModRefInfo(C, None)); bool NeedLift = false; if (Args.erase(C)) NeedLift = true; else if (MayAlias) { NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) { - return AA.getModRefInfo(C, ML); + return isModOrRefSet(AA.getModRefInfo(C, ML)); }); if (!NeedLift) NeedLift = llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) { - return AA.getModRefInfo(C, CS); + return isModOrRefSet(AA.getModRefInfo(C, CS)); }); } @@ -565,18 +565,18 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, if (MayAlias) { // Since LI is implicitly moved downwards past the lifted instructions, // none of them may modify its source. - if (AA.getModRefInfo(C, LoadLoc) & MRI_Mod) + if (isModSet(AA.getModRefInfo(C, LoadLoc))) return false; else if (auto CS = ImmutableCallSite(C)) { // If we can't lift this before P, it's game over. - if (AA.getModRefInfo(P, CS) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo(P, CS))) return false; CallSites.push_back(CS); } else if (isa(C) || isa(C) || isa(C)) { // If we can't lift this before P, it's game over. auto ML = MemoryLocation::get(C); - if (AA.getModRefInfo(P, ML) != MRI_NoModRef) + if (isModOrRefSet(AA.getModRefInfo(P, ML))) return false; MemLocs.push_back(ML); @@ -631,7 +631,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // of at the store position. Instruction *P = SI; for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) { + if (isModSet(AA.getModRefInfo(&I, LoadLoc))) { P = &I; break; } @@ -702,7 +702,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { MemoryLocation StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); I != E; --I) { - if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { + if (isModOrRefSet(AA.getModRefInfo(&*I, StoreLoc))) { C = nullptr; break; } @@ -934,9 +934,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, AliasAnalysis &AA = LookupAliasAnalysis(); ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize); // If necessary, perform additional analysis. - if (MR != MRI_NoModRef) + if (isModOrRefSet(MR)) MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); - if (MR != MRI_NoModRef) + if (isModOrRefSet(MR)) return false; // We can't create address space casts here because we don't know if they're diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index f4de036059ec..6856d5855368 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -26,13 +26,11 @@ #include #include #include -#include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -129,7 +127,7 @@ class BCECmpBlock { return Lhs_.Base() != nullptr && Rhs_.Base() != nullptr; } - // Assert the the block is consistent: If valid, it should also have + // Assert the block is consistent: If valid, it should also have // non-null members besides Lhs_ and Rhs_. void AssertConsistent() const { if (IsValid()) { @@ -554,7 +552,7 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) { // - The last basic block (bb4 here) must branch unconditionally to bb_phi. // It's the only block that contributes a non-constant value to the Phi. // - All other blocks (b1, b2, b3) must have exactly two successors, one of - // them being the the phi block. + // them being the phi block. // - All intermediate blocks (bb2, bb3) must have only one predecessor. // - Blocks cannot do other work besides the comparison, see doesOtherWork() diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index 6727cf0179c1..f2f615cb9b0f 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -80,11 +80,9 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" @@ -195,7 +193,7 @@ bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start, make_range(Start.getIterator(), End.getIterator())) if (Inst.mayThrow()) return true; - return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef); + return AA->canInstructionRangeModRef(Start, End, Loc, ModRefInfo::ModRef); } /// diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 9ebf2d769356..5e6b58055ec4 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -4058,7 +4058,8 @@ bool NewGVN::eliminateInstructions(Function &F) { Value *DominatingLeader = EliminationStack.back(); auto *II = dyn_cast(DominatingLeader); - if (II && II->getIntrinsicID() == Intrinsic::ssa_copy) + bool isSSACopy = II && II->getIntrinsicID() == Intrinsic::ssa_copy; + if (isSSACopy) DominatingLeader = II->getOperand(0); // Don't replace our existing users with ourselves. @@ -4081,7 +4082,9 @@ bool NewGVN::eliminateInstructions(Function &F) { // It's about to be alive again. if (LeaderUseCount == 0 && isa(DominatingLeader)) ProbablyDead.erase(cast(DominatingLeader)); - if (LeaderUseCount == 0 && II) + // Copy instructions, however, are still dead beacuse we use their + // operand as the leader. + if (LeaderUseCount == 0 && isSSACopy) ProbablyDead.insert(II); ++LeaderUseCount; AnythingReplaced = true; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index dcaa40340813..88dcaf0f8a36 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -2184,11 +2185,104 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { return; } + if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) { + // Find the pair with the highest count in the pairmap and move it to the + // back of the list so that it can later be CSE'd. + // example: + // a*b*c*d*e + // if c*e is the most "popular" pair, we can express this as + // (((c*e)*d)*b)*a + unsigned Max = 1; + unsigned BestRank = 0; + std::pair BestPair; + unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin; + for (unsigned i = 0; i < Ops.size() - 1; ++i) + for (unsigned j = i + 1; j < Ops.size(); ++j) { + unsigned Score = 0; + Value *Op0 = Ops[i].Op; + Value *Op1 = Ops[j].Op; + if (std::less()(Op1, Op0)) + std::swap(Op0, Op1); + auto it = PairMap[Idx].find({Op0, Op1}); + if (it != PairMap[Idx].end()) + Score += it->second; + + unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank); + if (Score > Max || (Score == Max && MaxRank < BestRank)) { + BestPair = {i, j}; + Max = Score; + BestRank = MaxRank; + } + } + if (Max > 1) { + auto Op0 = Ops[BestPair.first]; + auto Op1 = Ops[BestPair.second]; + Ops.erase(&Ops[BestPair.second]); + Ops.erase(&Ops[BestPair.first]); + Ops.push_back(Op0); + Ops.push_back(Op1); + } + } // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. RewriteExprTree(I, Ops); } +void +ReassociatePass::BuildPairMap(ReversePostOrderTraversal &RPOT) { + // Make a "pairmap" of how often each operand pair occurs. + for (BasicBlock *BI : RPOT) { + for (Instruction &I : *BI) { + if (!I.isAssociative()) + continue; + + // Ignore nodes that aren't at the root of trees. + if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode()) + continue; + + // Collect all operands in a single reassociable expression. + // Since Reassociate has already been run once, we can assume things + // are already canonical according to Reassociation's regime. + SmallVector Worklist = { I.getOperand(0), I.getOperand(1) }; + SmallVector Ops; + while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) { + Value *Op = Worklist.pop_back_val(); + Instruction *OpI = dyn_cast(Op); + if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) { + Ops.push_back(Op); + continue; + } + // Be paranoid about self-referencing expressions in unreachable code. + if (OpI->getOperand(0) != OpI) + Worklist.push_back(OpI->getOperand(0)); + if (OpI->getOperand(1) != OpI) + Worklist.push_back(OpI->getOperand(1)); + } + // Skip extremely long expressions. + if (Ops.size() > GlobalReassociateLimit) + continue; + + // Add all pairwise combinations of operands to the pair map. + unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin; + SmallSet, 32> Visited; + for (unsigned i = 0; i < Ops.size() - 1; ++i) { + for (unsigned j = i + 1; j < Ops.size(); ++j) { + // Canonicalize operand orderings. + Value *Op0 = Ops[i]; + Value *Op1 = Ops[j]; + if (std::less()(Op1, Op0)) + std::swap(Op0, Op1); + if (!Visited.insert({Op0, Op1}).second) + continue; + auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1}); + if (!res.second) + ++res.first->second; + } + } + } + } +} + PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { // Get the functions basic blocks in Reverse Post Order. This order is used by // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic @@ -2199,8 +2293,20 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { // Calculate the rank map for F. BuildRankMap(F, RPOT); + // Build the pair map before running reassociate. + // Technically this would be more accurate if we did it after one round + // of reassociation, but in practice it doesn't seem to help much on + // real-world code, so don't waste the compile time running reassociate + // twice. + // If a user wants, they could expicitly run reassociate twice in their + // pass pipeline for further potential gains. + // It might also be possible to update the pair map during runtime, but the + // overhead of that may be large if there's many reassociable chains. + BuildPairMap(RPOT); + MadeChange = false; - // Traverse the same blocks that was analysed by BuildRankMap. + + // Traverse the same blocks that were analysed by BuildRankMap. for (BasicBlock *BI : RPOT) { assert(RankMap.count(&*BI) && "BB should be ranked."); // Optimize every instruction in the basic block. @@ -2239,9 +2345,11 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { } } - // We are done with the rank map. + // We are done with the rank map and pair map. RankMap.clear(); ValueRankMap.clear(); + for (auto &Entry : PairMap) + Entry.clear(); if (MadeChange) { PreservedAnalyses PA; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 44acfc885797..c7acdef27136 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" + #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -108,30 +110,96 @@ static cl::opt AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", cl::Hidden, cl::init(true)); +/// The IR fed into RewriteStatepointsForGC may have had attributes and +/// metadata implying dereferenceability that are no longer valid/correct after +/// RewriteStatepointsForGC has run. This is because semantically, after +/// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire +/// heap. stripNonValidData (conservatively) restores +/// correctness by erasing all attributes in the module that externally imply +/// dereferenceability. Similar reasoning also applies to the noalias +/// attributes and metadata. gc.statepoint can touch the entire heap including +/// noalias objects. +/// Apart from attributes and metadata, we also remove instructions that imply +/// constant physical memory: llvm.invariant.start. +static void stripNonValidData(Module &M); + +static bool shouldRewriteStatepointsIn(Function &F); + +PreservedAnalyses RewriteStatepointsForGC::run(Module &M, + ModuleAnalysisManager &AM) { + bool Changed = false; + auto &FAM = AM.getResult(M).getManager(); + for (Function &F : M) { + // Nothing to do for declarations. + if (F.isDeclaration() || F.empty()) + continue; + + // Policy choice says not to rewrite - the most common reason is that we're + // compiling code without a GCStrategy. + if (!shouldRewriteStatepointsIn(F)) + continue; + + auto &DT = FAM.getResult(F); + auto &TTI = FAM.getResult(F); + auto &TLI = FAM.getResult(F); + Changed |= runOnFunction(F, DT, TTI, TLI); + } + if (!Changed) + return PreservedAnalyses::all(); + + // stripNonValidData asserts that shouldRewriteStatepointsIn + // returns true for at least one function in the module. Since at least + // one function changed, we know that the precondition is satisfied. + stripNonValidData(M); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} + namespace { -struct RewriteStatepointsForGC : public ModulePass { +class RewriteStatepointsForGCLegacyPass : public ModulePass { + RewriteStatepointsForGC Impl; + +public: static char ID; // Pass identification, replacement for typeid - RewriteStatepointsForGC() : ModulePass(ID) { - initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); + RewriteStatepointsForGCLegacyPass() : ModulePass(ID), Impl() { + initializeRewriteStatepointsForGCLegacyPassPass( + *PassRegistry::getPassRegistry()); } - bool runOnFunction(Function &F); - bool runOnModule(Module &M) override { bool Changed = false; - for (Function &F : M) - Changed |= runOnFunction(F); - - if (Changed) { - // stripNonValidData asserts that shouldRewriteStatepointsIn - // returns true for at least one function in the module. Since at least - // one function changed, we know that the precondition is satisfied. - stripNonValidData(M); + const TargetLibraryInfo &TLI = + getAnalysis().getTLI(); + for (Function &F : M) { + // Nothing to do for declarations. + if (F.isDeclaration() || F.empty()) + continue; + + // Policy choice says not to rewrite - the most common reason is that + // we're compiling code without a GCStrategy. + if (!shouldRewriteStatepointsIn(F)) + continue; + + TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + auto &DT = getAnalysis(F).getDomTree(); + + Changed |= Impl.runOnFunction(F, DT, TTI, TLI); } - return Changed; + if (!Changed) + return false; + + // stripNonValidData asserts that shouldRewriteStatepointsIn + // returns true for at least one function in the module. Since at least + // one function changed, we know that the precondition is satisfied. + stripNonValidData(M); + return true; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -141,43 +209,23 @@ struct RewriteStatepointsForGC : public ModulePass { AU.addRequired(); AU.addRequired(); } - - /// The IR fed into RewriteStatepointsForGC may have had attributes and - /// metadata implying dereferenceability that are no longer valid/correct after - /// RewriteStatepointsForGC has run. This is because semantically, after - /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidData (conservatively) restores - /// correctness by erasing all attributes in the module that externally imply - /// dereferenceability. Similar reasoning also applies to the noalias - /// attributes and metadata. gc.statepoint can touch the entire heap including - /// noalias objects. - /// Apart from attributes and metadata, we also remove instructions that imply - /// constant physical memory: llvm.invariant.start. - void stripNonValidData(Module &M); - - // Helpers for stripNonValidData - void stripNonValidDataFromBody(Function &F); - void stripNonValidAttributesFromPrototype(Function &F); - - // Certain metadata on instructions are invalid after running RS4GC. - // Optimizations that run after RS4GC can incorrectly use this metadata to - // optimize functions. We drop such metadata on the instruction. - void stripInvalidMetadataFromInstruction(Instruction &I); }; } // end anonymous namespace -char RewriteStatepointsForGC::ID = 0; +char RewriteStatepointsForGCLegacyPass::ID = 0; -ModulePass *llvm::createRewriteStatepointsForGCPass() { - return new RewriteStatepointsForGC(); +ModulePass *llvm::createRewriteStatepointsForGCLegacyPass() { + return new RewriteStatepointsForGCLegacyPass(); } -INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", +INITIALIZE_PASS_BEGIN(RewriteStatepointsForGCLegacyPass, + "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", +INITIALIZE_PASS_END(RewriteStatepointsForGCLegacyPass, + "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) namespace { @@ -2346,8 +2394,7 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R)); } -void -RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { +static void stripNonValidAttributesFromPrototype(Function &F) { LLVMContext &Ctx = F.getContext(); for (Argument &A : F.args()) @@ -2359,7 +2406,10 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); } -void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { +/// Certain metadata on instructions are invalid after running RS4GC. +/// Optimizations that run after RS4GC can incorrectly use this metadata to +/// optimize functions. We drop such metadata on the instruction. +static void stripInvalidMetadataFromInstruction(Instruction &I) { if (!isa(I) && !isa(I)) return; // These are the attributes that are still valid on loads and stores after @@ -2387,7 +2437,7 @@ void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); } -void RewriteStatepointsForGC::stripNonValidDataFromBody(Function &F) { +static void stripNonValidDataFromBody(Function &F) { if (F.empty()) return; @@ -2411,22 +2461,8 @@ void RewriteStatepointsForGC::stripNonValidDataFromBody(Function &F) { continue; } - if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { - assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); - bool IsImmutableTBAA = - MD->getNumOperands() == 4 && - mdconst::extract(MD->getOperand(3))->getValue() == 1; - - if (!IsImmutableTBAA) - continue; // no work to do, MD_tbaa is already marked mutable - - MDNode *Base = cast(MD->getOperand(0)); - MDNode *Access = cast(MD->getOperand(1)); - uint64_t Offset = - mdconst::extract(MD->getOperand(2))->getZExtValue(); - - MDNode *MutableTBAA = - Builder.createTBAAStructTagNode(Base, Access, Offset); + if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) { + MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag); I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); } @@ -2462,7 +2498,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidData(Module &M) { +static void stripNonValidData(Module &M) { #ifndef NDEBUG assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2474,21 +2510,12 @@ void RewriteStatepointsForGC::stripNonValidData(Module &M) { stripNonValidDataFromBody(F); } -bool RewriteStatepointsForGC::runOnFunction(Function &F) { - // Nothing to do for declarations. - if (F.isDeclaration() || F.empty()) - return false; - - // Policy choice says not to rewrite - the most common reason is that we're - // compiling code without a GCStrategy. - if (!shouldRewriteStatepointsIn(F)) - return false; - - DominatorTree &DT = getAnalysis(F).getDomTree(); - TargetTransformInfo &TTI = - getAnalysis().getTTI(F); - const TargetLibraryInfo &TLI = - getAnalysis().getTLI(); +bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, + TargetTransformInfo &TTI, + const TargetLibraryInfo &TLI) { + assert(!F.isDeclaration() && !F.empty() && + "need function body to rewrite statepoints in"); + assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); auto NeedsRewrite = [&TLI](Instruction &I) { if (ImmutableCallSite CS = ImmutableCallSite(&I)) @@ -2755,17 +2782,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, StatepointLiveSetTy Updated; findLiveSetAtInst(Inst, RevisedLivenessData, Updated); -#ifndef NDEBUG - DenseSet Bases; - for (auto KVPair : Info.PointerToBase) - Bases.insert(KVPair.second); -#endif - // We may have base pointers which are now live that weren't before. We need // to update the PointerToBase structure to reflect this. for (auto V : Updated) if (Info.PointerToBase.insert({V, V}).second) { - assert(Bases.count(V) && "Can't find base for unexpected live value!"); + assert(isKnownBaseResult(V) && + "Can't find base for unexpected live value!"); continue; } diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index e5866b4718da..b6d034e9fb9f 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -523,10 +523,8 @@ class SCCPSolver : public InstVisitor { DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() << " -> " << Dest->getName() << '\n'); - PHINode *PN; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast(I)); ++I) - visitPHINode(*PN); + for (PHINode &PN : Dest->phis()) + visitPHINode(PN); } } @@ -1902,7 +1900,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, if (Inst->getType()->isVoidTy()) continue; if (tryToReplaceWithConstant(Solver, Inst)) { - if (!isa(Inst) && !isa(Inst)) + if (Inst->isSafeToRemove()) Inst->eraseFromParent(); // Hey, we just changed something! MadeChanges = true; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index b430d07406c0..00b7346d24e7 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -2678,8 +2679,7 @@ class llvm::sroa::AllocaSliceRewriter assert(!IsSplit); assert(NewBeginOffset == BeginOffset); II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment(ConstantInt::get(CstTy, getSliceAlign())); + II.setAlignment(getSliceAlign()); deleteIfTriviallyDead(OldPtr); return false; @@ -2801,9 +2801,7 @@ class llvm::sroa::AllocaSliceRewriter II.setSource(AdjustedPtr); if (II.getAlignment() > SliceAlign) { - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment( - ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign))); + II.setAlignment(MinAlign(II.getAlignment(), SliceAlign)); } DEBUG(dbgs() << " to: " << II << "\n"); @@ -3928,10 +3926,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to // perform phi and select speculation. + // P.beginOffset() can be non-zero even with the same type in a case with + // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll). AllocaInst *NewAI; - if (SliceTy == AI.getAllocatedType()) { - assert(P.beginOffset() == 0 && - "Non-zero begin offset but same alloca type"); + if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) { NewAI = &AI; // FIXME: We should be able to bail at this point with "nothing changed". // FIXME: We might want to defer PHI speculation until after here. @@ -4047,27 +4045,58 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // First try to pre-split loads and stores. Changed |= presplitLoadsAndStores(AI, AS); - // Now that we have identified any pre-splitting opportunities, mark any - // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail - // to split these during pre-splitting, we want to force them to be - // rewritten into a partition. + // Now that we have identified any pre-splitting opportunities, + // mark loads and stores unsplittable except for the following case. + // We leave a slice splittable if all other slices are disjoint or fully + // included in the slice, such as whole-alloca loads and stores. + // If we fail to split these during pre-splitting, we want to force them + // to be rewritten into a partition. bool IsSorted = true; - for (Slice &S : AS) { - if (!S.isSplittable()) - continue; - // FIXME: We currently leave whole-alloca splittable loads and stores. This - // used to be the only splittable loads and stores and we need to be - // confident that the above handling of splittable loads and stores is - // completely sufficient before we forcibly disable the remaining handling. - if (S.beginOffset() == 0 && - S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType())) - continue; - if (isa(S.getUse()->getUser()) || - isa(S.getUse()->getUser())) { - S.makeUnsplittable(); - IsSorted = false; + + uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType()); + const uint64_t MaxBitVectorSize = 1024; + if (AllocaSize <= MaxBitVectorSize) { + // If a byte boundary is included in any load or store, a slice starting or + // ending at the boundary is not splittable. + SmallBitVector SplittableOffset(AllocaSize + 1, true); + for (Slice &S : AS) + for (unsigned O = S.beginOffset() + 1; + O < S.endOffset() && O < AllocaSize; O++) + SplittableOffset.reset(O); + + for (Slice &S : AS) { + if (!S.isSplittable()) + continue; + + if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) && + (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()])) + continue; + + if (isa(S.getUse()->getUser()) || + isa(S.getUse()->getUser())) { + S.makeUnsplittable(); + IsSorted = false; + } } } + else { + // We only allow whole-alloca splittable loads and stores + // for a large alloca to avoid creating too large BitVector. + for (Slice &S : AS) { + if (!S.isSplittable()) + continue; + + if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize) + continue; + + if (isa(S.getUse()->getUser()) || + isa(S.getUse()->getUser())) { + S.makeUnsplittable(); + IsSorted = false; + } + } + } + if (!IsSorted) std::sort(AS.begin(), AS.end()); @@ -4134,6 +4163,15 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { "new fragment is outside of original fragment"); Start -= OrigFragment->OffsetInBits; } + + // The alloca may be larger than the variable. + if (VarSize) { + if (Size > *VarSize) + Size = *VarSize; + if (Size == 0 || Start + Size > *VarSize) + continue; + } + // Avoid creating a fragment expression that covers the entire variable. if (!VarSize || *VarSize != Size) { if (auto E = diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 01d557f8113f..3b99ddff2e06 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -81,7 +81,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializePartiallyInlineLibCallsLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRegToMemPass(Registry); - initializeRewriteStatepointsForGCPass(Registry); + initializeRewriteStatepointsForGCLegacyPassPass(Registry); initializeSCCPLegacyPassPass(Registry); initializeIPSCCPLegacyPassPass(Registry); initializeSROALegacyPassPass(Registry); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 209821ff21d7..8fa9ffb6d014 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -97,7 +97,7 @@ // load %p2 // ... // -// We can not do CSE for to the common part related to index "i64 %i". Lowering +// We can not do CSE to the common part related to index "i64 %i". Lowering // GEPs can achieve such goals. // If the target does not use alias analysis in codegen, this pass will // lower a GEP with multiple indices into arithmetic operations: diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 3d0fca0bc3a5..aba732bc413f 100644 --- a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -271,19 +271,14 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, BasicBlock &OldExitingBB, BasicBlock &OldPH) { - for (Instruction &I : UnswitchedBB) { - auto *PN = dyn_cast(&I); - if (!PN) - // No more PHIs to check. - break; - + for (PHINode &PN : UnswitchedBB.phis()) { // When the loop exit is directly unswitched we just need to update the // incoming basic block. We loop to handle weird cases with repeated // incoming blocks, but expect to typically only have one operand here. - for (auto i : seq(0, PN->getNumOperands())) { - assert(PN->getIncomingBlock(i) == &OldExitingBB && + for (auto i : seq(0, PN.getNumOperands())) { + assert(PN.getIncomingBlock(i) == &OldExitingBB && "Found incoming block different from unique predecessor!"); - PN->setIncomingBlock(i, &OldPH); + PN.setIncomingBlock(i, &OldPH); } } } @@ -302,14 +297,9 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, assert(&ExitBB != &UnswitchedBB && "Must have different loop exit and unswitched blocks!"); Instruction *InsertPt = &*UnswitchedBB.begin(); - for (Instruction &I : ExitBB) { - auto *PN = dyn_cast(&I); - if (!PN) - // No more PHIs to check. - break; - - auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2, - PN->getName() + ".split", InsertPt); + for (PHINode &PN : ExitBB.phis()) { + auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2, + PN.getName() + ".split", InsertPt); // Walk backwards over the old PHI node's inputs to minimize the cost of // removing each one. We have to do this weird loop manually so that we @@ -320,18 +310,18 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, // allowed us to create a single entry for a predecessor block without // having separate entries for each "edge" even though these edges are // required to produce identical results. - for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) { - if (PN->getIncomingBlock(i) != &OldExitingBB) + for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) { + if (PN.getIncomingBlock(i) != &OldExitingBB) continue; - Value *Incoming = PN->removeIncomingValue(i); + Value *Incoming = PN.removeIncomingValue(i); NewPN->addIncoming(Incoming, &OldPH); } // Now replace the old PHI with the new one and wire the old one in as an // input to the new one. - PN->replaceAllUsesWith(NewPN); - NewPN->addIncoming(PN, &ExitBB); + PN.replaceAllUsesWith(NewPN); + NewPN->addIncoming(&PN, &ExitBB); } } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 789e0a477932..1522170dc3b9 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -61,6 +61,11 @@ static cl::opt UserForwardSwitchCond( "forward-switch-cond", cl::Hidden, cl::init(false), cl::desc("Forward switch condition to phi ops (default = false)")); +static cl::opt UserSinkCommonInsts( + "sink-common-insts", cl::Hidden, cl::init(false), + cl::desc("Sink common instructions (default = false)")); + + STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, @@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) { Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : Opts.NeedCanonicalLoop; + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : Opts.SinkCommonInsts; } PreservedAnalyses SimplifyCFGPass::run(Function &F, @@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass { CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, bool ConvertSwitch = false, bool KeepLoops = true, + bool SinkCommon = false, std::function Ftor = nullptr) : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { @@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass { Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; + + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : SinkCommon; } bool runOnFunction(Function &F) override { @@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, FunctionPass * llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, bool ConvertSwitch, bool KeepLoops, + bool SinkCommon, std::function Ftor) { return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, - KeepLoops, std::move(Ftor)); + KeepLoops, SinkCommon, std::move(Ftor)); } diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 5210f165b874..811762880493 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -68,7 +68,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, if (LoadInst *L = dyn_cast(Inst)) { MemoryLocation Loc = MemoryLocation::get(L); for (Instruction *S : Stores) - if (AA.getModRefInfo(S, Loc) & MRI_Mod) + if (isModSet(AA.getModRefInfo(S, Loc))) return false; } @@ -83,7 +83,7 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis &AA, return false; for (Instruction *S : Stores) - if (AA.getModRefInfo(S, CS) & MRI_Mod) + if (isModSet(AA.getModRefInfo(S, CS))) return false; } @@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (isa(Inst)) + if (Inst->mayReadFromMemory()) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 2972e1cff9a4..525425bd0f0c 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DivergenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" @@ -177,9 +176,8 @@ class StructurizeCFG : public RegionPass { Region *ParentRegion; DominatorTree *DT; - LoopInfo *LI; - SmallVector Order; + std::deque Order; BBSet Visited; BBPhiMap DeletedPhis; @@ -204,7 +202,7 @@ class StructurizeCFG : public RegionPass { void gatherPredicates(RegionNode *N); - void collectInfos(); + void analyzeNode(RegionNode *N); void insertConditions(bool Loops); @@ -258,7 +256,6 @@ class StructurizeCFG : public RegionPass { AU.addRequired(); AU.addRequiredID(LowerSwitchID); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); RegionPass::getAnalysisUsage(AU); @@ -292,55 +289,17 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { /// \brief Build up the general order of nodes void StructurizeCFG::orderNodes() { - ReversePostOrderTraversal RPOT(ParentRegion); - SmallDenseMap LoopBlocks; - - // The reverse post-order traversal of the list gives us an ordering close - // to what we want. The only problem with it is that sometimes backedges - // for outer loops will be visited before backedges for inner loops. - for (RegionNode *RN : RPOT) { - BasicBlock *BB = RN->getEntry(); - Loop *Loop = LI->getLoopFor(BB); - ++LoopBlocks[Loop]; + assert(Visited.empty()); + assert(Predicates.empty()); + assert(Loops.empty()); + assert(LoopPreds.empty()); + + // This must be RPO order for the back edge detection to work + for (RegionNode *RN : ReversePostOrderTraversal(ParentRegion)) { + // FIXME: Is there a better order to use for structurization? + Order.push_back(RN); + analyzeNode(RN); } - - unsigned CurrentLoopDepth = 0; - Loop *CurrentLoop = nullptr; - for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = (*I)->getEntry(); - unsigned LoopDepth = LI->getLoopDepth(BB); - - if (is_contained(Order, *I)) - continue; - - if (LoopDepth < CurrentLoopDepth) { - // Make sure we have visited all blocks in this loop before moving back to - // the outer loop. - - auto LoopI = I; - while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) { - LoopI++; - BasicBlock *LoopBB = (*LoopI)->getEntry(); - if (LI->getLoopFor(LoopBB) == CurrentLoop) { - --BlockCount; - Order.push_back(*LoopI); - } - } - } - - CurrentLoop = LI->getLoopFor(BB); - if (CurrentLoop) - LoopBlocks[CurrentLoop]--; - - CurrentLoopDepth = LoopDepth; - Order.push_back(*I); - } - - // This pass originally used a post-order traversal and then operated on - // the list in reverse. Now that we are using a reverse post-order traversal - // rather than re-working the whole pass to operate on the list in order, - // we just reverse the list and continue to operate on it in reverse. - std::reverse(Order.begin(), Order.end()); } /// \brief Determine the end of the loops @@ -466,32 +425,19 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { } /// \brief Collect various loop and predicate infos -void StructurizeCFG::collectInfos() { - // Reset predicate - Predicates.clear(); - - // and loop infos - Loops.clear(); - LoopPreds.clear(); +void StructurizeCFG::analyzeNode(RegionNode *RN) { + DEBUG(dbgs() << "Visiting: " + << (RN->isSubRegion() ? "SubRegion with entry: " : "") + << RN->getEntry()->getName() << '\n'); - // Reset the visited nodes - Visited.clear(); - - for (RegionNode *RN : reverse(Order)) { - DEBUG(dbgs() << "Visiting: " - << (RN->isSubRegion() ? "SubRegion with entry: " : "") - << RN->getEntry()->getName() << " Loop Depth: " - << LI->getLoopDepth(RN->getEntry()) << "\n"); - - // Analyze all the conditions leading to a node - gatherPredicates(RN); + // Analyze all the conditions leading to a node + gatherPredicates(RN); - // Remember that we've seen this node - Visited.insert(RN->getEntry()); + // Remember that we've seen this node + Visited.insert(RN->getEntry()); - // Find the last back edges - analyzeLoops(RN); - } + // Find the last back edges + analyzeLoops(RN); } /// \brief Insert the missing branch conditions @@ -544,10 +490,7 @@ void StructurizeCFG::insertConditions(bool Loops) { /// them in DeletedPhis void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { PhiMap &Map = DeletedPhis[To]; - for (Instruction &I : *To) { - if (!isa(I)) - break; - PHINode &Phi = cast(I); + for (PHINode &Phi : To->phis()) { while (Phi.getBasicBlockIndex(From) != -1) { Value *Deleted = Phi.removeIncomingValue(From, false); Map[&Phi].push_back(std::make_pair(From, Deleted)); @@ -557,10 +500,7 @@ void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { /// \brief Add a dummy PHI value as soon as we knew the new predecessor void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { - for (Instruction &I : *To) { - if (!isa(I)) - break; - PHINode &Phi = cast(I); + for (PHINode &Phi : To->phis()) { Value *Undef = UndefValue::get(Phi.getType()); Phi.addIncoming(Undef, From); } @@ -670,7 +610,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { LLVMContext &Context = Func->getContext(); BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : - Order.back()->getEntry(); + Order.front()->getEntry(); BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); DT->addNewBlock(Flow, Dominator); @@ -750,7 +690,8 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { /// Take one node from the order vector and wire it up void StructurizeCFG::wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.pop_back_val(); + RegionNode *Node = Order.front(); + Order.pop_front(); Visited.insert(Node->getEntry()); if (isPredictableTrue(Node)) { @@ -774,7 +715,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, PrevNode = Node; while (!Order.empty() && !Visited.count(LoopEnd) && - dominatesPredicates(Entry, Order.back())) { + dominatesPredicates(Entry, Order.front())) { handleLoops(false, LoopEnd); } @@ -785,7 +726,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, void StructurizeCFG::handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd) { - RegionNode *Node = Order.back(); + RegionNode *Node = Order.front(); BasicBlock *LoopStart = Node->getEntry(); if (!Loops.count(LoopStart)) { @@ -930,10 +871,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { ParentRegion = R; DT = &getAnalysis().getDomTree(); - LI = &getAnalysis().getLoopInfo(); orderNodes(); - collectInfos(); + createFlow(); insertConditions(false); insertConditions(true); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 9d6702b0fd0e..2a1106b41de2 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -79,7 +79,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "tailcallelim" @@ -303,10 +302,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls, if (Visited[CI->getParent()] != ESCAPED) { // If the escape point was part way through the block, calls after the // escape point wouldn't have been put into DeferredTails. - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "tailcall", CI) - << "marked as tail call candidate"; - }); + DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); CI->setTailCall(); Modified = true; } else { @@ -335,7 +331,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { // Writes to memory only matter if they may alias the pointer // being loaded from. const DataLayout &DL = L->getModule()->getDataLayout(); - if ((AA->getModRefInfo(CI, MemoryLocation::get(L)) & MRI_Mod) || + if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) || !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getAlignment(), DL, L)) return false; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 606bd8baccaa..9d3593913fae 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -45,16 +45,22 @@ using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB) { +void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) { assert((pred_begin(BB) == pred_end(BB) || // Can delete self loop. BB->getSinglePredecessor() == BB) && "Block is not dead!"); TerminatorInst *BBTerm = BB->getTerminator(); + std::vector Updates; // Loop through all of our successors and make sure they know that one // of their predecessors is going away. - for (BasicBlock *Succ : BBTerm->successors()) + if (DDT) + Updates.reserve(BBTerm->getNumSuccessors()); + for (BasicBlock *Succ : BBTerm->successors()) { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } // Zap all the instructions in the block. while (!BB->empty()) { @@ -69,8 +75,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { BB->getInstList().pop_back(); } - // Zap the block! - BB->eraseFromParent(); + if (DDT) { + DDT->applyUpdates(Updates); + DDT->deleteBB(BB); // Deferred deletion of BB. + } else { + BB->eraseFromParent(); // Zap the block! + } } void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, @@ -94,9 +104,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. SmallVector PHIs; - for (BasicBlock::iterator I = BB->begin(); - PHINode *PN = dyn_cast(I); ++I) - PHIs.push_back(PN); + for (PHINode &PN : BB->phis()) + PHIs.push_back(&PN); bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) @@ -134,24 +143,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT, if (!OnlySucc) return false; // Can't merge if there is PHI loop. - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { - if (PHINode *PN = dyn_cast(BI)) { - for (Value *IncValue : PN->incoming_values()) - if (IncValue == PN) - return false; - } else - break; - } + for (PHINode &PN : BB->phis()) + for (Value *IncValue : PN.incoming_values()) + if (IncValue == &PN) + return false; // Begin by getting rid of unneeded PHIs. SmallVector IncomingValues; if (isa(BB->front())) { - for (auto &I : *BB) - if (PHINode *PN = dyn_cast(&I)) { - if (PN->getIncomingValue(0) != PN) - IncomingValues.push_back(PN->getIncomingValue(0)); - } else - break; + for (PHINode &PN : BB->phis()) + if (PN.getIncomingValue(0) != &PN) + IncomingValues.push_back(PN.getIncomingValue(0)); FoldSingleEntryPHINodes(BB, MemDep); } @@ -324,6 +326,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!LI) return; + assert(DT && "DT should be available to update LoopInfo!"); Loop *L = LI->getLoopFor(OldBB); // If we need to preserve loop analyses, collect some information about how @@ -331,6 +334,12 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (BasicBlock *Pred : Preds) { + // Preds that are not reachable from entry should not be used to identify if + // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks + // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader + // as true and make the NewBB the header of some loop. This breaks LI. + if (!DT->isReachableFromEntry(Pred)) + continue; // If we need to preserve LCSSA, determine if any of the preds is a loop // exit. if (PreserveLCSSA) diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 417a771cf952..464d1a34f518 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -16,9 +16,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" @@ -28,6 +30,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; #define DEBUG_TYPE "break-crit-edges" @@ -102,10 +106,9 @@ static void createPHIsForSplitLoopExit(ArrayRef Preds, SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!"); // For each PHI in the destination block. - for (BasicBlock::iterator I = DestBB->begin(); - PHINode *PN = dyn_cast(I); ++I) { - unsigned Idx = PN->getBasicBlockIndex(SplitBB); - Value *V = PN->getIncomingValue(Idx); + for (PHINode &PN : DestBB->phis()) { + unsigned Idx = PN.getBasicBlockIndex(SplitBB); + Value *V = PN.getIncomingValue(Idx); // If the input is a PHI which already satisfies LCSSA, don't create // a new one. @@ -115,13 +118,13 @@ static void createPHIsForSplitLoopExit(ArrayRef Preds, // Otherwise a new PHI is needed. Create one and populate it. PHINode *NewPN = PHINode::Create( - PN->getType(), Preds.size(), "split", + PN.getType(), Preds.size(), "split", SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); // Update the original PHI. - PN->setIncomingValue(Idx, NewPN); + PN.setIncomingValue(Idx, NewPN); } } @@ -290,3 +293,159 @@ llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, return NewBB; } + +// Return the unique indirectbr predecessor of a block. This may return null +// even if such a predecessor exists, if it's not useful for splitting. +// If a predecessor is found, OtherPreds will contain all other (non-indirectbr) +// predecessors of BB. +static BasicBlock * +findIBRPredecessor(BasicBlock *BB, SmallVectorImpl &OtherPreds) { + // If the block doesn't have any PHIs, we don't care about it, since there's + // no point in splitting it. + PHINode *PN = dyn_cast(BB->begin()); + if (!PN) + return nullptr; + + // Verify we have exactly one IBR predecessor. + // Conservatively bail out if one of the other predecessors is not a "regular" + // terminator (that is, not a switch or a br). + BasicBlock *IBB = nullptr; + for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) { + BasicBlock *PredBB = PN->getIncomingBlock(Pred); + TerminatorInst *PredTerm = PredBB->getTerminator(); + switch (PredTerm->getOpcode()) { + case Instruction::IndirectBr: + if (IBB) + return nullptr; + IBB = PredBB; + break; + case Instruction::Br: + case Instruction::Switch: + OtherPreds.push_back(PredBB); + continue; + default: + return nullptr; + } + } + + return IBB; +} + +bool llvm::SplitIndirectBrCriticalEdges(Function &F, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + // Check whether the function has any indirectbrs, and collect which blocks + // they may jump to. Since most functions don't have indirect branches, + // this lowers the common case's overhead to O(Blocks) instead of O(Edges). + SmallSetVector Targets; + for (auto &BB : F) { + auto *IBI = dyn_cast(BB.getTerminator()); + if (!IBI) + continue; + + for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ) + Targets.insert(IBI->getSuccessor(Succ)); + } + + if (Targets.empty()) + return false; + + bool ShouldUpdateAnalysis = BPI && BFI; + bool Changed = false; + for (BasicBlock *Target : Targets) { + SmallVector OtherPreds; + BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds); + // If we did not found an indirectbr, or the indirectbr is the only + // incoming edge, this isn't the kind of edge we're looking for. + if (!IBRPred || OtherPreds.empty()) + continue; + + // Don't even think about ehpads/landingpads. + Instruction *FirstNonPHI = Target->getFirstNonPHI(); + if (FirstNonPHI->isEHPad() || Target->isLandingPad()) + continue; + + BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split"); + if (ShouldUpdateAnalysis) { + // Copy the BFI/BPI from Target to BodyBlock. + for (unsigned I = 0, E = BodyBlock->getTerminator()->getNumSuccessors(); + I < E; ++I) + BPI->setEdgeProbability(BodyBlock, I, + BPI->getEdgeProbability(Target, I)); + BFI->setBlockFreq(BodyBlock, BFI->getBlockFreq(Target).getFrequency()); + } + // It's possible Target was its own successor through an indirectbr. + // In this case, the indirectbr now comes from BodyBlock. + if (IBRPred == Target) + IBRPred = BodyBlock; + + // At this point Target only has PHIs, and BodyBlock has the rest of the + // block's body. Create a copy of Target that will be used by the "direct" + // preds. + ValueToValueMapTy VMap; + BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); + + BlockFrequency BlockFreqForDirectSucc; + for (BasicBlock *Pred : OtherPreds) { + // If the target is a loop to itself, then the terminator of the split + // block (BodyBlock) needs to be updated. + BasicBlock *Src = Pred != Target ? Pred : BodyBlock; + Src->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + if (ShouldUpdateAnalysis) + BlockFreqForDirectSucc += BFI->getBlockFreq(Src) * + BPI->getEdgeProbability(Src, DirectSucc); + } + if (ShouldUpdateAnalysis) { + BFI->setBlockFreq(DirectSucc, BlockFreqForDirectSucc.getFrequency()); + BlockFrequency NewBlockFreqForTarget = + BFI->getBlockFreq(Target) - BlockFreqForDirectSucc; + BFI->setBlockFreq(Target, NewBlockFreqForTarget.getFrequency()); + BPI->eraseBlock(Target); + } + + // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that + // they are clones, so the number of PHIs are the same. + // (a) Remove the edge coming from IBRPred from the "Direct" PHI + // (b) Leave that as the only edge in the "Indirect" PHI. + // (c) Merge the two in the body block. + BasicBlock::iterator Indirect = Target->begin(), + End = Target->getFirstNonPHI()->getIterator(); + BasicBlock::iterator Direct = DirectSucc->begin(); + BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt(); + + assert(&*End == Target->getTerminator() && + "Block was expected to only contain PHIs"); + + while (Indirect != End) { + PHINode *DirPHI = cast(Direct); + PHINode *IndPHI = cast(Indirect); + + // Now, clean up - the direct block shouldn't get the indirect value, + // and vice versa. + DirPHI->removeIncomingValue(IBRPred); + Direct++; + + // Advance the pointer here, to avoid invalidation issues when the old + // PHI is erased. + Indirect++; + + PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI); + NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred), + IBRPred); + + // Create a PHI in the body block, to merge the direct and indirect + // predecessors. + PHINode *MergePHI = + PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert); + MergePHI->addIncoming(NewIndPHI, Target); + MergePHI->addIncoming(DirPHI, DirectSucc); + + IndPHI->replaceAllUsesWith(MergePHI); + IndPHI->eraseFromParent(); + } + + Changed = true; + } + + return Changed; +} diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index b60dfb4f3541..d4cf03c326d4 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -709,6 +709,19 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { } } +bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, + LibFunc DoubleFn, LibFunc FloatFn, + LibFunc LongDoubleFn) { + switch (Ty->getTypeID()) { + case Type::FloatTyID: + return TLI->has(FloatFn); + case Type::DoubleTyID: + return TLI->has(DoubleFn); + default: + return TLI->has(LongDoubleFn); + } +} + //- Emit LibCalls ------------------------------------------------------------// Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index e9c14c93a9ad..f711b192f604 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -352,11 +352,6 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { Value *Dividend = SlowDivOrRem->getOperand(0); Value *Divisor = SlowDivOrRem->getOperand(1); - if (isa(Divisor)) { - // Keep division by a constant for DAGCombiner. - return None; - } - VisitedSetTy SetL; ValueRange DividendRange = getValueRange(Dividend, SetL); if (DividendRange == VALRNG_LIKELY_LONG) @@ -372,7 +367,9 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { if (DividendShort && DivisorShort) { // If both operands are known to be short then just replace the long - // division with a short one in-place. + // division with a short one in-place. Since we're not introducing control + // flow in this case, narrowing the division is always a win, even if the + // divisor is a constant (and will later get replaced by a multiplication). IRBuilder<> Builder(SlowDivOrRem); Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); @@ -382,7 +379,16 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); return QuotRemPair(ExtDiv, ExtRem); - } else if (DividendShort && !isSignedOp()) { + } + + if (isa(Divisor)) { + // If the divisor is not a constant, DAGCombiner will convert it to a + // multiplication by a magic constant. It isn't clear if it is worth + // introducing control flow to get a narrower multiply. + return None; + } + + if (DividendShort && !isSignedOp()) { // If the division is unsigned and Dividend is known to be short, then // either // 1) Divisor is less or equal to Dividend, and the result can be computed diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index f3bf0d8c248b..972e47f9270a 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp BypassSlowDivision.cpp + CallPromotionUtils.cpp CloneFunction.cpp CloneModule.cpp CodeExtractor.cpp diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp new file mode 100644 index 000000000000..5dc6068d4a0b --- /dev/null +++ b/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -0,0 +1,423 @@ +//===- CallPromotionUtils.cpp - Utilities for call promotion ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities useful for promoting indirect call sites to +// direct call sites. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CallPromotionUtils.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "call-promotion-utils" + +/// Fix-up phi nodes in an invoke instruction's normal destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now be coming from the "merge" block. For example, in the code +/// below: +/// +/// then_bb: +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// merge_bb: +/// %t2 = phi i32 [ %t0, %then_bb ], [ %t1, %else_bb ] +/// br %normal_dst +/// +/// normal_dst: +/// %t3 = phi i32 [ %x, %orig_bb ], ... +/// +/// "orig_bb" is no longer a predecessor of "normal_dst", so the phi nodes in +/// "normal_dst" must be fixed to refer to "merge_bb": +/// +/// normal_dst: +/// %t3 = phi i32 [ %x, %merge_bb ], ... +/// +static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *MergeBlock) { + for (PHINode &Phi : Invoke->getNormalDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + Phi.setIncomingBlock(Idx, MergeBlock); + } +} + +/// Fix-up phi nodes in an invoke instruction's unwind destination. +/// +/// After versioning an invoke instruction, values coming from the original +/// block will now be coming from either the "then" block or the "else" block. +/// For example, in the code below: +/// +/// then_bb: +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// unwind_dst: +/// %t3 = phi i32 [ %x, %orig_bb ], ... +/// +/// "orig_bb" is no longer a predecessor of "unwind_dst", so the phi nodes in +/// "unwind_dst" must be fixed to refer to "then_bb" and "else_bb": +/// +/// unwind_dst: +/// %t3 = phi i32 [ %x, %then_bb ], [ %x, %else_bb ], ... +/// +static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock, + BasicBlock *ThenBlock, + BasicBlock *ElseBlock) { + for (PHINode &Phi : Invoke->getUnwindDest()->phis()) { + int Idx = Phi.getBasicBlockIndex(OrigBlock); + if (Idx == -1) + continue; + auto *V = Phi.getIncomingValue(Idx); + Phi.setIncomingBlock(Idx, ThenBlock); + Phi.addIncoming(V, ElseBlock); + } +} + +/// Create a phi node for the returned value of a call or invoke instruction. +/// +/// After versioning a call or invoke instruction that returns a value, we have +/// to merge the value of the original and new instructions. We do this by +/// creating a phi node and replacing uses of the original instruction with this +/// phi node. +/// +/// For example, if \p OrigInst is defined in "else_bb" and \p NewInst is +/// defined in "then_bb", we create the following phi node: +/// +/// ; Uses of the original instruction are replaced by uses of the phi node. +/// %t0 = phi i32 [ %orig_inst, %else_bb ], [ %new_inst, %then_bb ], +/// +static void createRetPHINode(Instruction *OrigInst, Instruction *NewInst, + BasicBlock *MergeBlock, IRBuilder<> &Builder) { + + if (OrigInst->getType()->isVoidTy() || OrigInst->use_empty()) + return; + + Builder.SetInsertPoint(&MergeBlock->front()); + PHINode *Phi = Builder.CreatePHI(OrigInst->getType(), 0); + SmallVector UsersToUpdate; + for (User *U : OrigInst->users()) + UsersToUpdate.push_back(U); + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(OrigInst, Phi); + Phi->addIncoming(OrigInst, OrigInst->getParent()); + Phi->addIncoming(NewInst, NewInst->getParent()); +} + +/// Cast a call or invoke instruction to the given type. +/// +/// When promoting a call site, the return type of the call site might not match +/// that of the callee. If this is the case, we have to cast the returned value +/// to the correct type. The location of the cast depends on if we have a call +/// or invoke instruction. +/// +/// For example, if the call instruction below requires a bitcast after +/// promotion: +/// +/// orig_bb: +/// %t0 = call i32 @func() +/// ... +/// +/// The bitcast is placed after the call instruction: +/// +/// orig_bb: +/// ; Uses of the original return value are replaced by uses of the bitcast. +/// %t0 = call i32 @func() +/// %t1 = bitcast i32 %t0 to ... +/// ... +/// +/// A similar transformation is performed for invoke instructions. However, +/// since invokes are terminating, a new block is created for the bitcast. For +/// example, if the invoke instruction below requires a bitcast after promotion: +/// +/// orig_bb: +/// %t0 = invoke i32 @func() to label %normal_dst unwind label %unwind_dst +/// +/// The edge between the original block and the invoke's normal destination is +/// split, and the bitcast is placed there: +/// +/// orig_bb: +/// %t0 = invoke i32 @func() to label %split_bb unwind label %unwind_dst +/// +/// split_bb: +/// ; Uses of the original return value are replaced by uses of the bitcast. +/// %t1 = bitcast i32 %t0 to ... +/// br label %normal_dst +/// +static void createRetBitCast(CallSite CS, Type *RetTy, CastInst **RetBitCast) { + + // Save the users of the calling instruction. These uses will be changed to + // use the bitcast after we create it. + SmallVector UsersToUpdate; + for (User *U : CS.getInstruction()->users()) + UsersToUpdate.push_back(U); + + // Determine an appropriate location to create the bitcast for the return + // value. The location depends on if we have a call or invoke instruction. + Instruction *InsertBefore = nullptr; + if (auto *Invoke = dyn_cast(CS.getInstruction())) + InsertBefore = + &SplitEdge(Invoke->getParent(), Invoke->getNormalDest())->front(); + else + InsertBefore = &*std::next(CS.getInstruction()->getIterator()); + + // Bitcast the return value to the correct type. + auto *Cast = CastInst::Create(Instruction::BitCast, CS.getInstruction(), + RetTy, "", InsertBefore); + if (RetBitCast) + *RetBitCast = Cast; + + // Replace all the original uses of the calling instruction with the bitcast. + for (User *U : UsersToUpdate) + U->replaceUsesOfWith(CS.getInstruction(), Cast); +} + +/// Predicate and clone the given call site. +/// +/// This function creates an if-then-else structure at the location of the call +/// site. The "if" condition compares the call site's called value to the given +/// callee. The original call site is moved into the "else" block, and a clone +/// of the call site is placed in the "then" block. The cloned instruction is +/// returned. +/// +/// For example, the call instruction below: +/// +/// orig_bb: +/// %t0 = call i32 %ptr() +/// ... +/// +/// Is replace by the following: +/// +/// orig_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %else_bb +/// +/// then_bb: +/// ; The clone of the original call instruction is placed in the "then" +/// ; block. It is not yet promoted. +/// %t1 = call i32 %ptr() +/// br merge_bb +/// +/// else_bb: +/// ; The original call instruction is moved to the "else" block. +/// %t0 = call i32 %ptr() +/// br merge_bb +/// +/// merge_bb: +/// ; Uses of the original call instruction are replaced by uses of the phi +/// ; node. +/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] +/// ... +/// +/// A similar transformation is performed for invoke instructions. However, +/// since invokes are terminating, more work is required. For example, the +/// invoke instruction below: +/// +/// orig_bb: +/// %t0 = invoke %ptr() to label %normal_dst unwind label %unwind_dst +/// +/// Is replace by the following: +/// +/// orig_bb: +/// %cond = icmp eq i32 ()* %ptr, @func +/// br i1 %cond, %then_bb, %else_bb +/// +/// then_bb: +/// ; The clone of the original invoke instruction is placed in the "then" +/// ; block, and its normal destination is set to the "merge" block. It is +/// ; not yet promoted. +/// %t1 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// else_bb: +/// ; The original invoke instruction is moved into the "else" block, and +/// ; its normal destination is set to the "merge" block. +/// %t0 = invoke i32 %ptr() to label %merge_bb unwind label %unwind_dst +/// +/// merge_bb: +/// ; Uses of the original invoke instruction are replaced by uses of the +/// ; phi node, and the merge block branches to the normal destination. +/// %t2 = phi i32 [ %t0, %else_bb ], [ %t1, %then_bb ] +/// br %normal_dst +/// +static Instruction *versionCallSite(CallSite CS, Value *Callee, + MDNode *BranchWeights) { + + IRBuilder<> Builder(CS.getInstruction()); + Instruction *OrigInst = CS.getInstruction(); + BasicBlock *OrigBlock = OrigInst->getParent(); + + // Create the compare. The called value and callee must have the same type to + // be compared. + if (CS.getCalledValue()->getType() != Callee->getType()) + Callee = Builder.CreateBitCast(Callee, CS.getCalledValue()->getType()); + auto *Cond = Builder.CreateICmpEQ(CS.getCalledValue(), Callee); + + // Create an if-then-else structure. The original instruction is moved into + // the "else" block, and a clone of the original instruction is placed in the + // "then" block. + TerminatorInst *ThenTerm = nullptr; + TerminatorInst *ElseTerm = nullptr; + SplitBlockAndInsertIfThenElse(Cond, CS.getInstruction(), &ThenTerm, &ElseTerm, + BranchWeights); + BasicBlock *ThenBlock = ThenTerm->getParent(); + BasicBlock *ElseBlock = ElseTerm->getParent(); + BasicBlock *MergeBlock = OrigInst->getParent(); + + ThenBlock->setName("if.true.direct_targ"); + ElseBlock->setName("if.false.orig_indirect"); + MergeBlock->setName("if.end.icp"); + + Instruction *NewInst = OrigInst->clone(); + OrigInst->moveBefore(ElseTerm); + NewInst->insertBefore(ThenTerm); + + // If the original call site is an invoke instruction, we have extra work to + // do since invoke instructions are terminating. We have to fix-up phi nodes + // in the invoke's normal and unwind destinations. + if (auto *OrigInvoke = dyn_cast(OrigInst)) { + auto *NewInvoke = cast(NewInst); + + // Invoke instructions are terminating, so we don't need the terminator + // instructions that were just created. + ThenTerm->eraseFromParent(); + ElseTerm->eraseFromParent(); + + // Branch from the "merge" block to the original normal destination. + Builder.SetInsertPoint(MergeBlock); + Builder.CreateBr(OrigInvoke->getNormalDest()); + + // Fix-up phi nodes in the original invoke's normal and unwind destinations. + fixupPHINodeForNormalDest(OrigInvoke, OrigBlock, MergeBlock); + fixupPHINodeForUnwindDest(OrigInvoke, MergeBlock, ThenBlock, ElseBlock); + + // Now set the normal destinations of the invoke instructions to be the + // "merge" block. + OrigInvoke->setNormalDest(MergeBlock); + NewInvoke->setNormalDest(MergeBlock); + } + + // Create a phi node for the returned value of the call site. + createRetPHINode(OrigInst, NewInst, MergeBlock, Builder); + + return NewInst; +} + +bool llvm::isLegalToPromote(CallSite CS, Function *Callee, + const char **FailureReason) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Check the return type. The callee's return value type must be bitcast + // compatible with the call site's type. + Type *CallRetTy = CS.getInstruction()->getType(); + Type *FuncRetTy = Callee->getReturnType(); + if (CallRetTy != FuncRetTy) + if (!CastInst::isBitCastable(FuncRetTy, CallRetTy)) { + if (FailureReason) + *FailureReason = "Return type mismatch"; + return false; + } + + // The number of formal arguments of the callee. + unsigned NumParams = Callee->getFunctionType()->getNumParams(); + + // Check the number of arguments. The callee and call site must agree on the + // number of arguments. + if (CS.arg_size() != NumParams && !Callee->isVarArg()) { + if (FailureReason) + *FailureReason = "The number of arguments mismatch"; + return false; + } + + // Check the argument types. The callee's formal argument types must be + // bitcast compatible with the corresponding actual argument types of the call + // site. + for (unsigned I = 0; I < NumParams; ++I) { + Type *FormalTy = Callee->getFunctionType()->getFunctionParamType(I); + Type *ActualTy = CS.getArgument(I)->getType(); + if (FormalTy == ActualTy) + continue; + if (!CastInst::isBitCastable(ActualTy, FormalTy)) { + if (FailureReason) + *FailureReason = "Argument type mismatch"; + return false; + } + } + + return true; +} + +Instruction *llvm::promoteCall(CallSite CS, Function *Callee, + CastInst **RetBitCast) { + assert(!CS.getCalledFunction() && "Only indirect call sites can be promoted"); + + // Set the called function of the call site to be the given callee. + CS.setCalledFunction(Callee); + + // Since the call site will no longer be direct, we must clear metadata that + // is only appropriate for indirect calls. This includes !prof and !callees + // metadata. + CS.getInstruction()->setMetadata(LLVMContext::MD_prof, nullptr); + CS.getInstruction()->setMetadata(LLVMContext::MD_callees, nullptr); + + // If the function type of the call site matches that of the callee, no + // additional work is required. + if (CS.getFunctionType() == Callee->getFunctionType()) + return CS.getInstruction(); + + // Save the return types of the call site and callee. + Type *CallSiteRetTy = CS.getInstruction()->getType(); + Type *CalleeRetTy = Callee->getReturnType(); + + // Change the function type of the call site the match that of the callee. + CS.mutateFunctionType(Callee->getFunctionType()); + + // Inspect the arguments of the call site. If an argument's type doesn't + // match the corresponding formal argument's type in the callee, bitcast it + // to the correct type. + for (Use &U : CS.args()) { + unsigned ArgNo = CS.getArgumentNo(&U); + Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); + Type *ActualTy = U.get()->getType(); + if (FormalTy != ActualTy) { + auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", + CS.getInstruction()); + CS.setArgument(ArgNo, Cast); + } + } + + // If the return type of the call site doesn't match that of the callee, cast + // the returned value to the appropriate type. + if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) + createRetBitCast(CS, CallSiteRetTy, RetBitCast); + + return CS.getInstruction(); +} + +Instruction *llvm::promoteCallWithIfThenElse(CallSite CS, Function *Callee, + MDNode *BranchWeights) { + + // Version the indirect call site. If the called value is equal to the given + // callee, 'NewInst' will be executed, otherwise the original call site will + // be executed. + Instruction *NewInst = versionCallSite(CS, Callee, BranchWeights); + + // Promote 'NewInst' so that it directly calls the desired function. + return promoteCall(CallSite(NewInst), Callee); +} + +#undef DEBUG_TYPE diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 3b19ba1b50f2..16af2c7b808b 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -493,17 +493,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Handle PHI nodes specially, as we have to remove references to dead // blocks. - for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { + for (const PHINode &PN : BI.phis()) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. - if (const PHINode *PN = dyn_cast(I)) { - if (isa(VMap[PN])) - PHIToResolve.push_back(PN); - else - break; - } else { + if (isa(VMap[&PN])) + PHIToResolve.push_back(&PN); + else break; - } } // Finally, remap the terminator instructions, as those can't be remapped diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index e5392b53050d..8fee10854229 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index c65cf2eb82ac..2fc987d860d2 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -66,6 +66,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "code-extractor" @@ -620,16 +621,86 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, if (oldFunction->hasUWTable()) newFunction->setHasUWTable(); - // Inherit all of the target dependent attributes. + // Inherit all of the target dependent attributes and white-listed + // target independent attributes. // (e.g. If the extracted region contains a call to an x86.sse // instruction we need to make sure that the extracted region has the // "target-features" attribute allowing it to be lowered. // FIXME: This should be changed to check to see if a specific // attribute can not be inherited. - AttrBuilder AB(oldFunction->getAttributes().getFnAttributes()); - for (const auto &Attr : AB.td_attrs()) - newFunction->addFnAttr(Attr.first, Attr.second); + for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) { + if (Attr.isStringAttribute()) { + if (Attr.getKindAsString() == "thunk") + continue; + } else + switch (Attr.getKindAsEnum()) { + // Those attributes cannot be propagated safely. Explicitly list them + // here so we get a warning if new attributes are added. This list also + // includes non-function attributes. + case Attribute::Alignment: + case Attribute::AllocSize: + case Attribute::ArgMemOnly: + case Attribute::Builtin: + case Attribute::ByVal: + case Attribute::Convergent: + case Attribute::Dereferenceable: + case Attribute::DereferenceableOrNull: + case Attribute::InAlloca: + case Attribute::InReg: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::JumpTable: + case Attribute::Naked: + case Attribute::Nest: + case Attribute::NoAlias: + case Attribute::NoBuiltin: + case Attribute::NoCapture: + case Attribute::NoReturn: + case Attribute::None: + case Attribute::NonNull: + case Attribute::ReadNone: + case Attribute::ReadOnly: + case Attribute::Returned: + case Attribute::ReturnsTwice: + case Attribute::SExt: + case Attribute::Speculatable: + case Attribute::StackAlignment: + case Attribute::StructRet: + case Attribute::SwiftError: + case Attribute::SwiftSelf: + case Attribute::WriteOnly: + case Attribute::ZExt: + case Attribute::EndAttrKinds: + continue; + // Those attributes should be safe to propagate to the extracted function. + case Attribute::AlwaysInline: + case Attribute::Cold: + case Attribute::NoRecurse: + case Attribute::InlineHint: + case Attribute::MinSize: + case Attribute::NoDuplicate: + case Attribute::NoImplicitFloat: + case Attribute::NoInline: + case Attribute::NonLazyBind: + case Attribute::NoRedZone: + case Attribute::NoUnwind: + case Attribute::OptimizeNone: + case Attribute::OptimizeForSize: + case Attribute::SafeStack: + case Attribute::SanitizeAddress: + case Attribute::SanitizeMemory: + case Attribute::SanitizeThread: + case Attribute::SanitizeHWAddress: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::StrictFP: + case Attribute::UWTable: + break; + } + newFunction->addFnAttr(Attr); + } newFunction->getBasicBlockList().push_back(newRootNode); // Create an iterator to name all of the arguments we inserted. @@ -746,6 +817,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Emit the call to the function CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (codeReplacer->getParent()->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } codeReplacer->getInstList().push_back(call); Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); @@ -1023,7 +1102,22 @@ Function *CodeExtractor::extractCodeRegion() { // head of the region, but the entry node of a function cannot have preds. BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), "newFuncRoot"); - newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + auto *BranchI = BranchInst::Create(header); + // If the original function has debug info, we have to add a debug location + // to the new branch instruction from the artificial entry block. + // We use the debug location of the first instruction in the extracted + // blocks, as there is no other equivalent line in the source code. + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } + newFuncRoot->getInstList().push_back(BranchI); findAllocas(SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); @@ -1070,10 +1164,10 @@ Function *CodeExtractor::extractCodeRegion() { // Update the entry count of the function. if (BFI) { - Optional EntryCount = - BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (EntryCount.hasValue()) - newFunction->setEntryCount(EntryCount.getValue()); + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); } diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index 6642a97a29c2..82b67c293102 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -16,7 +16,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 064d7d003a92..421663f82565 100644 --- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -19,7 +20,7 @@ using namespace llvm; static void insertCall(Function &CurFn, StringRef Func, - Instruction *InsertionPt) { + Instruction *InsertionPt, DebugLoc DL) { Module &M = *InsertionPt->getParent()->getParent()->getParent(); LLVMContext &C = InsertionPt->getParent()->getContext(); @@ -32,7 +33,8 @@ static void insertCall(Function &CurFn, StringRef Func, Func == "_mcount" || Func == "__cyg_profile_func_enter_bare") { Constant *Fn = M.getOrInsertFunction(Func, Type::getVoidTy(C)); - CallInst::Create(Fn, "", InsertionPt); + CallInst *Call = CallInst::Create(Fn, "", InsertionPt); + Call->setDebugLoc(DL); return; } @@ -46,11 +48,14 @@ static void insertCall(Function &CurFn, StringRef Func, Intrinsic::getDeclaration(&M, Intrinsic::returnaddress), ArrayRef(ConstantInt::get(Type::getInt32Ty(C), 0)), "", InsertionPt); + RetAddr->setDebugLoc(DL); Value *Args[] = {ConstantExpr::getBitCast(&CurFn, Type::getInt8PtrTy(C)), RetAddr}; - CallInst::Create(Fn, ArrayRef(Args), "", InsertionPt); + CallInst *Call = + CallInst::Create(Fn, ArrayRef(Args), "", InsertionPt); + Call->setDebugLoc(DL); return; } @@ -76,7 +81,11 @@ static bool runOnFunction(Function &F, bool PostInlining) { // run later for some reason. if (!EntryFunc.empty()) { - insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt()); + DebugLoc DL; + if (auto SP = F.getSubprogram()) + DL = DebugLoc::get(SP->getScopeLine(), 0, SP); + + insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL); Changed = true; F.removeAttribute(AttributeList::FunctionIndex, EntryAttr); } @@ -84,8 +93,14 @@ static bool runOnFunction(Function &F, bool PostInlining) { if (!ExitFunc.empty()) { for (BasicBlock &BB : F) { TerminatorInst *T = BB.getTerminator(); + DebugLoc DL; + if (DebugLoc TerminatorDL = T->getDebugLoc()) + DL = TerminatorDL; + else if (auto SP = F.getSubprogram()) + DL = DebugLoc::get(0, 0, SP); + if (isa(T)) { - insertCall(F, ExitFunc, T); + insertCall(F, ExitFunc, T, DL); Changed = true; } } diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index 2e6fc4e8482e..6b5f593073b4 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -13,9 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/FunctionImportUtils.h" -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" using namespace llvm; /// Checks if we should import SGV as a definition, otherwise import as a diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 15a8bf229224..5b4b45a69b4b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -72,6 +72,7 @@ #include using namespace llvm; +using ProfileCount = Function::ProfileCount; static cl::opt EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), @@ -1431,29 +1432,29 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, - const Optional &CalleeEntryCount, + const ProfileCount &CalleeEntryCount, const Instruction *TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { - if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) + if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || + CalleeEntryCount.getCount() < 1) return; - Optional CallSiteCount = - PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; + auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, - CalleeEntryCount.getValue()); + CalleeEntryCount.getCount()); for (auto const &Entry : VMap) if (isa(Entry.first)) if (auto *CI = dyn_cast_or_null(Entry.second)) - CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); + CI->updateProfWeight(CallCount, CalleeEntryCount.getCount()); for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (VMap.count(&BB)) for (Instruction &I : BB) if (CallInst *CI = dyn_cast(&I)) - CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, - CalleeEntryCount.getValue()); + CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, + CalleeEntryCount.getCount()); } /// Update the entry count of callee after inlining. @@ -1467,18 +1468,19 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, // callsite is M, the new callee count is set to N - M. M is estimated from // the caller's entry count, its entry block frequency and the block frequency // of the callsite. - Optional CalleeCount = Callee->getEntryCount(); + auto CalleeCount = Callee->getEntryCount(); if (!CalleeCount.hasValue() || !PSI) return; - Optional CallCount = PSI->getProfileCount(CallInst, CallerBFI); + auto CallCount = PSI->getProfileCount(CallInst, CallerBFI); if (!CallCount.hasValue()) return; // Since CallSiteCount is an estimate, it could exceed the original callee // count and has to be set to 0. - if (CallCount.getValue() > CalleeCount.getValue()) - Callee->setEntryCount(0); + if (CallCount.getValue() > CalleeCount.getCount()) + CalleeCount.setCount(0); else - Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); + CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); + Callee->setEntryCount(CalleeCount); } /// This function inlines the called function into the basic block of the @@ -1500,10 +1502,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, IFI.reset(); Function *CalledFunc = CS.getCalledFunction(); - if (!CalledFunc || // Can't inline external function or indirect - CalledFunc->isDeclaration() || - (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! - return false; + if (!CalledFunc || // Can't inline external function or indirect + CalledFunc->isDeclaration()) // call! + return false; // The inliner does not know how to inline through calls with operand bundles // in general ... @@ -1630,9 +1631,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, auto &DL = Caller->getParent()->getDataLayout(); - assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && - "Varargs calls can only be inlined if the Varargs are forwarded!"); - // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); @@ -1810,13 +1808,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Move any dbg.declares describing the allocas into the entry basic block. DIBuilder DIB(*Caller->getParent()); for (auto &AI : IFI.StaticAllocas) - replaceDbgDeclareForAlloca(AI, AI, DIB, /*Deref=*/false); + replaceDbgDeclareForAlloca(AI, AI, DIB, DIExpression::NoDeref, 0, + DIExpression::NoDeref); } SmallVector VarArgsToForward; + SmallVector VarArgsAttrs; for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); - i < CS.getNumArgOperands(); i++) + i < CS.getNumArgOperands(); i++) { VarArgsToForward.push_back(CS.getArgOperand(i)); + VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); + } bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { @@ -1832,6 +1834,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (!CI) continue; + // Forward varargs from inlined call site to calls to the + // ForwardVarArgsTo function, if requested, and to musttail calls. + if (!VarArgsToForward.empty() && + ((ForwardVarArgsTo && + CI->getCalledFunction() == ForwardVarArgsTo) || + CI->isMustTailCall())) { + // Collect attributes for non-vararg parameters. + AttributeList Attrs = CI->getAttributes(); + SmallVector ArgAttrs; + if (!Attrs.isEmpty()) { + for (unsigned ArgNo = 0; + ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) + ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); + } + + // Add VarArg attributes. + ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); + Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), + Attrs.getRetAttributes(), ArgAttrs); + // Add VarArgs to existing parameters. + SmallVector Params(CI->arg_operands()); + Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); + CallInst *NewCI = + CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction() + : CI->getCalledValue(), + Params, "", CI); + NewCI->setDebugLoc(CI->getDebugLoc()); + NewCI->setAttributes(Attrs); + NewCI->setCallingConv(CI->getCallingConv()); + CI->replaceAllUsesWith(NewCI); + CI->eraseFromParent(); + CI = NewCI; + } + if (Function *F = CI->getCalledFunction()) InlinedDeoptimizeCalls |= F->getIntrinsicID() == Intrinsic::experimental_deoptimize; @@ -1859,14 +1895,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // 'nounwind'. if (MarkNoUnwind) CI->setDoesNotThrow(); - - if (ForwardVarArgsTo && CI->getCalledFunction() == ForwardVarArgsTo) { - SmallVector Params(CI->arg_operands()); - Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); - CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); - CI->replaceAllUsesWith(Call); - CI->eraseFromParent(); - } } } } diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 089f2b5f3b18..ae0e2bb6c280 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -56,9 +56,10 @@ static bool VerifyLoopLCSSA = true; #else static bool VerifyLoopLCSSA = false; #endif -static cl::opt -VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), - cl::desc("Verify loop lcssa form (time consuming)")); +static cl::opt + VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA), + cl::Hidden, + cl::desc("Verify loop lcssa form (time consuming)")); /// Return true if the specified block is in the list. static bool isExitBlock(BasicBlock *BB, diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3f7629540be5..4459d3c68782 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -100,26 +100,23 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + DeferredDominance *DDT) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); // Branch - See if we are conditional jumping on constant - if (BranchInst *BI = dyn_cast(T)) { + if (auto *BI = dyn_cast(T)) { if (BI->isUnconditional()) return false; // Can't optimize uncond branch BasicBlock *Dest1 = BI->getSuccessor(0); BasicBlock *Dest2 = BI->getSuccessor(1); - if (ConstantInt *Cond = dyn_cast(BI->getCondition())) { + if (auto *Cond = dyn_cast(BI->getCondition())) { // Are we branching on constant? // YES. Change to unconditional branch... BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2; BasicBlock *OldDest = Cond->getZExtValue() ? Dest2 : Dest1; - //cerr << "Function: " << T->getParent()->getParent() - // << "\nRemoving branch from " << T->getParent() - // << "\n\nTo: " << OldDest << endl; - // Let the basic block know that we are letting go of it. Based on this, // it will adjust it's PHI nodes. OldDest->removePredecessor(BB); @@ -127,6 +124,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Replace the conditional branch with an unconditional one. Builder.CreateBr(Destination); BI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, OldDest); return true; } @@ -150,10 +149,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, return false; } - if (SwitchInst *SI = dyn_cast(T)) { + if (auto *SI = dyn_cast(T)) { // If we are switching on a constant, we can convert the switch to an // unconditional branch. - ConstantInt *CI = dyn_cast(SI->getCondition()); + auto *CI = dyn_cast(SI->getCondition()); BasicBlock *DefaultDest = SI->getDefaultDest(); BasicBlock *TheOnlyDest = DefaultDest; @@ -197,9 +196,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, createBranchWeights(Weights)); } // Remove this entry. - DefaultDest->removePredecessor(SI->getParent()); + BasicBlock *ParentBB = SI->getParent(); + DefaultDest->removePredecessor(ParentBB); i = SI->removeCase(i); e = SI->case_end(); + if (DDT) + DDT->deleteEdge(ParentBB, DefaultDest); continue; } @@ -225,14 +227,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Insert the new branch. Builder.CreateBr(TheOnlyDest); BasicBlock *BB = SI->getParent(); + std::vector Updates; + if (DDT) + Updates.reserve(SI->getNumSuccessors() - 1); // Remove entries from PHI nodes which we no longer branch to... for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - if (Succ == TheOnlyDest) + if (Succ == TheOnlyDest) { TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest - else + } else { Succ->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Succ}); + } } // Delete the old switch. @@ -240,6 +248,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SI->eraseFromParent(); if (DeleteDeadConditions) RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); + if (DDT) + DDT->applyUpdates(Updates); return true; } @@ -280,19 +290,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, return false; } - if (IndirectBrInst *IBI = dyn_cast(T)) { + if (auto *IBI = dyn_cast(T)) { // indirectbr blockaddress(@F, @BB) -> br label @BB - if (BlockAddress *BA = + if (auto *BA = dyn_cast(IBI->getAddress()->stripPointerCasts())) { BasicBlock *TheOnlyDest = BA->getBasicBlock(); + std::vector Updates; + if (DDT) + Updates.reserve(IBI->getNumDestinations() - 1); + // Insert the new branch. Builder.CreateBr(TheOnlyDest); for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { - if (IBI->getDestination(i) == TheOnlyDest) + if (IBI->getDestination(i) == TheOnlyDest) { TheOnlyDest = nullptr; - else - IBI->getDestination(i)->removePredecessor(IBI->getParent()); + } else { + BasicBlock *ParentBB = IBI->getParent(); + BasicBlock *DestBB = IBI->getDestination(i); + DestBB->removePredecessor(ParentBB); + if (DDT) + Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); + } } Value *Address = IBI->getAddress(); IBI->eraseFromParent(); @@ -307,6 +326,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, new UnreachableInst(BB->getContext(), BB); } + if (DDT) + DDT->applyUpdates(Updates); return true; } } @@ -583,7 +604,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, /// /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, + DeferredDominance *DDT) { // This only adjusts blocks with PHI nodes. if (!isa(BB->begin())) return; @@ -606,13 +628,18 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // of the block. if (PhiIt != OldPhiIt) PhiIt = &BB->front(); } + if (DDT) + DDT->deleteEdge(Pred, BB); } /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT, + DeferredDominance *DDT) { + assert(!(DT && DDT) && "Cannot call with both DT and DDT."); + // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -625,6 +652,25 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { BasicBlock *PredBB = DestBB->getSinglePredecessor(); assert(PredBB && "Block doesn't have a single predecessor!"); + bool ReplaceEntryBB = false; + if (PredBB == &DestBB->getParent()->getEntryBlock()) + ReplaceEntryBB = true; + + // Deferred DT update: Collect all the edges that enter PredBB. These + // dominator edges will be redirected to DestBB. + std::vector Updates; + if (DDT && !ReplaceEntryBB) { + Updates.reserve(1 + + (2 * std::distance(pred_begin(PredBB), pred_end(PredBB)))); + Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); + for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, PredBB}); + // This predecessor of PredBB may already have DestBB as a successor. + if (llvm::find(successors(*I), DestBB) == succ_end(*I)) + Updates.push_back({DominatorTree::Insert, *I, DestBB}); + } + } + // Zap anything that took the address of DestBB. Not doing this will give the // address an invalid value. if (DestBB->hasAddressTaken()) { @@ -645,7 +691,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If the PredBB is the entry block of the function, move DestBB up to // become the entry block after we erase PredBB. - if (PredBB == &DestBB->getParent()->getEntryBlock()) + if (ReplaceEntryBB) DestBB->moveAfter(PredBB); if (DT) { @@ -657,8 +703,19 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { DT->eraseNode(PredBB); } } - // Nuke BB. - PredBB->eraseFromParent(); + + if (DDT) { + DDT->deleteBB(PredBB); // Deferred deletion of BB. + if (ReplaceEntryBB) + // The entry block was removed and there is no external interface for the + // dominator tree to be notified of this change. In this corner-case we + // recalculate the entire tree. + DDT->recalculate(*(DestBB->getParent())); + else + DDT->applyUpdates(Updates); + } else { + PredBB->eraseFromParent(); // Nuke BB. + } } /// CanMergeValues - Return true if we can choose one of these values to use @@ -865,7 +922,8 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, /// potential side-effect free intrinsics and the branch. If possible, /// eliminate BB by rewriting all the predecessors to branch to the successor /// block and return true. If we can't transform, return false. -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, + DeferredDominance *DDT) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -906,6 +964,19 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + std::vector Updates; + if (DDT) { + Updates.reserve(1 + (2 * std::distance(pred_begin(BB), pred_end(BB)))); + Updates.push_back({DominatorTree::Delete, BB, Succ}); + // All predecessors of BB will be moved to Succ. + for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + Updates.push_back({DominatorTree::Delete, *I, BB}); + // This predecessor of BB may already have Succ as a successor. + if (llvm::find(successors(*I), Succ) == succ_end(*I)) + Updates.push_back({DominatorTree::Insert, *I, Succ}); + } + } + if (isa(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in // the successor, then we need to add incoming edges for the PHI nodes @@ -950,7 +1021,13 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // Everything that jumped to BB now goes to Succ. BB->replaceAllUsesWith(Succ); if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. + + if (DDT) { + DDT->deleteBB(BB); // Deferred deletion of the old basic block. + DDT->applyUpdates(Updates); + } else { + BB->eraseFromParent(); // Delete the old basic block. + } return true; } @@ -1293,8 +1370,8 @@ void llvm::findDbgValues(SmallVectorImpl &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void findDbgUsers(SmallVectorImpl &DbgUsers, - Value *V) { +void llvm::findDbgUsers(SmallVectorImpl &DbgUsers, + Value *V) { if (auto *L = LocalAsMetadata::getIfExists(V)) if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) for (User *U : MDV->users()) @@ -1304,14 +1381,14 @@ static void findDbgUsers(SmallVectorImpl &DbgUsers, bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, - bool Deref, int Offset) { + bool DerefBefore, int Offset, bool DerefAfter) { auto DbgAddrs = FindDbgAddrUses(Address); for (DbgInfoIntrinsic *DII : DbgAddrs) { DebugLoc Loc = DII->getDebugLoc(); auto *DIVar = DII->getVariable(); auto *DIExpr = DII->getExpression(); assert(DIVar && "Missing variable"); - DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); + DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter); // Insert llvm.dbg.declare immediately after InsertBefore, and remove old // llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1323,9 +1400,10 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref, int Offset) { + DIBuilder &Builder, bool DerefBefore, + int Offset, bool DerefAfter) { return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, - Deref, Offset); + DerefBefore, Offset, DerefAfter); } static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, @@ -1368,62 +1446,59 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, } void llvm::salvageDebugInfo(Instruction &I) { - SmallVector DbgValues; + SmallVector DbgUsers; + findDbgUsers(DbgUsers, &I); + if (DbgUsers.empty()) + return; + auto &M = *I.getModule(); auto wrapMD = [&](Value *V) { return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); }; - auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) { - auto *DIExpr = DVI->getExpression(); + auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) { + auto *DIExpr = DII->getExpression(); DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::NoDeref, DIExpression::WithStackValue); - DVI->setOperand(0, wrapMD(I.getOperand(0))); - DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + DII->setOperand(0, wrapMD(I.getOperand(0))); + DII->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); }; if (isa(&I) || isa(&I)) { // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value, // dbg.addr, and dbg.declare to use the cast's source. - SmallVector DbgUsers; - findDbgUsers(DbgUsers, &I); for (auto *DII : DbgUsers) { DII->setOperand(0, wrapMD(I.getOperand(0))); DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); } } else if (auto *GEP = dyn_cast(&I)) { - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) { - unsigned BitWidth = - M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); - APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. Since we are performing - // arithmetic to compute the variable's *value* in the DIExpression, we - // need to mark the expression with a DW_OP_stack_value. - if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) - // GEP offsets are i32 and thus always fit into an int64_t. - applyOffset(DVI, Offset.getSExtValue()); - } + unsigned BitWidth = + M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. + APInt Offset(BitWidth, 0); + if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) + for (auto *DII : DbgUsers) + applyOffset(DII, Offset.getSExtValue()); } else if (auto *BI = dyn_cast(&I)) { if (BI->getOpcode() == Instruction::Add) if (auto *ConstInt = dyn_cast(I.getOperand(1))) - if (ConstInt->getBitWidth() <= 64) { - APInt Offset = ConstInt->getValue(); - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) - applyOffset(DVI, Offset.getSExtValue()); - } + if (ConstInt->getBitWidth() <= 64) + for (auto *DII : DbgUsers) + applyOffset(DII, ConstInt->getSExtValue()); } else if (isa(&I)) { - findDbgValues(DbgValues, &I); - for (auto *DVI : DbgValues) { + MetadataAsValue *AddrMD = wrapMD(I.getOperand(0)); + for (auto *DII : DbgUsers) { // Rewrite the load into DW_OP_deref. - auto *DIExpr = DVI->getExpression(); + auto *DIExpr = DII->getExpression(); DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); - DVI->setOperand(0, wrapMD(I.getOperand(0))); - DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); - DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); + DII->setOperand(0, AddrMD); + DII->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); + DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); } } } @@ -1450,13 +1525,19 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { } unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, - bool PreserveLCSSA) { + bool PreserveLCSSA, DeferredDominance *DDT) { BasicBlock *BB = I->getParent(); + std::vector Updates; + // Loop over all of the successors, removing BB's entry from any PHI // nodes. - for (BasicBlock *Successor : successors(BB)) + if (DDT) + Updates.reserve(BB->getTerminator()->getNumSuccessors()); + for (BasicBlock *Successor : successors(BB)) { Successor->removePredecessor(BB, PreserveLCSSA); - + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. if (UseLLVMTrap) { @@ -1476,11 +1557,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, BB->getInstList().erase(BBI++); ++NumInstrsRemoved; } + if (DDT) + DDT->applyUpdates(Updates); return NumInstrsRemoved; } /// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II) { +static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) { SmallVector Args(II->arg_begin(), II->arg_end()); SmallVector OpBundles; II->getOperandBundlesAsDefs(OpBundles); @@ -1493,11 +1576,16 @@ static void changeToCall(InvokeInst *II) { II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. - BranchInst::Create(II->getNormalDest(), II); + BasicBlock *NormalDestBB = II->getNormalDest(); + BranchInst::Create(NormalDestBB, II); // Update PHI nodes in the unwind destination - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *BB = II->getParent(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + UnwindDestBB->removePredecessor(BB); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -1538,7 +1626,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, } static bool markAliveBlocks(Function &F, - SmallPtrSetImpl &Reachable) { + SmallPtrSetImpl &Reachable, + DeferredDominance *DDT = nullptr) { SmallVector Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); @@ -1558,7 +1647,7 @@ static bool markAliveBlocks(Function &F, if (II->getIntrinsicID() == Intrinsic::assume) { if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(II, false); + changeToUnreachable(II, false, false, DDT); Changed = true; break; } @@ -1575,7 +1664,8 @@ static bool markAliveBlocks(Function &F, // still be useful for widening. if (match(II->getArgOperand(0), m_Zero())) if (!isa(II->getNextNode())) { - changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); + changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/false, + false, DDT); Changed = true; break; } @@ -1585,7 +1675,7 @@ static bool markAliveBlocks(Function &F, if (auto *CI = dyn_cast(&I)) { Value *Callee = CI->getCalledValue(); if (isa(Callee) || isa(Callee)) { - changeToUnreachable(CI, /*UseLLVMTrap=*/false); + changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT); Changed = true; break; } @@ -1595,7 +1685,7 @@ static bool markAliveBlocks(Function &F, // though. if (!isa(CI->getNextNode())) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(CI->getNextNode(), false); + changeToUnreachable(CI->getNextNode(), false, false, DDT); Changed = true; } break; @@ -1614,7 +1704,7 @@ static bool markAliveBlocks(Function &F, if (isa(Ptr) || (isa(Ptr) && SI->getPointerAddressSpace() == 0)) { - changeToUnreachable(SI, true); + changeToUnreachable(SI, true, false, DDT); Changed = true; break; } @@ -1626,16 +1716,20 @@ static bool markAliveBlocks(Function &F, // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); if (isa(Callee) || isa(Callee)) { - changeToUnreachable(II, true); + changeToUnreachable(II, true, false, DDT); Changed = true; } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) { if (II->use_empty() && II->onlyReadsMemory()) { // jump to the normal destination branch. - BranchInst::Create(II->getNormalDest(), II); - II->getUnwindDest()->removePredecessor(II->getParent()); + BasicBlock *NormalDestBB = II->getNormalDest(); + BasicBlock *UnwindDestBB = II->getUnwindDest(); + BranchInst::Create(NormalDestBB, II); + UnwindDestBB->removePredecessor(II->getParent()); II->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDestBB); } else - changeToCall(II); + changeToCall(II, DDT); Changed = true; } } else if (auto *CatchSwitch = dyn_cast(Terminator)) { @@ -1681,7 +1775,7 @@ static bool markAliveBlocks(Function &F, } } - Changed |= ConstantFoldTerminator(BB, true); + Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT); for (BasicBlock *Successor : successors(BB)) if (Reachable.insert(Successor).second) Worklist.push_back(Successor); @@ -1689,11 +1783,11 @@ static bool markAliveBlocks(Function &F, return Changed; } -void llvm::removeUnwindEdge(BasicBlock *BB) { +void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) { TerminatorInst *TI = BB->getTerminator(); if (auto *II = dyn_cast(TI)) { - changeToCall(II); + changeToCall(II, DDT); return; } @@ -1721,15 +1815,18 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { UnwindDest->removePredecessor(BB); TI->replaceAllUsesWith(NewTI); TI->eraseFromParent(); + if (DDT) + DDT->deleteEdge(BB, UnwindDest); } /// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. If `LVI` is passed, this function preserves LazyValueInfo /// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, + DeferredDominance *DDT) { SmallPtrSet Reachable; - bool Changed = markAliveBlocks(F, Reachable); + bool Changed = markAliveBlocks(F, Reachable, DDT); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) @@ -1739,25 +1836,39 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { NumRemoved += F.size()-Reachable.size(); // Loop over all of the basic blocks that are not reachable, dropping all of - // their internal references... - for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(&*BB)) + // their internal references. Update DDT and LVI if available. + std::vector Updates; + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { + auto *BB = &*I; + if (Reachable.count(BB)) continue; - - for (BasicBlock *Successor : successors(&*BB)) + for (BasicBlock *Successor : successors(BB)) { if (Reachable.count(Successor)) - Successor->removePredecessor(&*BB); + Successor->removePredecessor(BB); + if (DDT) + Updates.push_back({DominatorTree::Delete, BB, Successor}); + } if (LVI) - LVI->eraseBlock(&*BB); + LVI->eraseBlock(BB); BB->dropAllReferences(); } - for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(&*I)) - I = F.getBasicBlockList().erase(I); - else + for (Function::iterator I = ++F.begin(); I != F.end();) { + auto *BB = &*I; + if (Reachable.count(BB)) { ++I; + continue; + } + if (DDT) { + DDT->deleteBB(BB); // deferred deletion of BB. + ++I; + } else { + I = F.getBasicBlockList().erase(I); + } + } + if (DDT) + DDT->applyUpdates(Updates); return true; } @@ -2141,8 +2252,6 @@ static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, return From == BitWidth - To - 1; } -/// Given an OR instruction, check to see if this is a bitreverse -/// idiom. If so, insert the new intrinsic and return true. bool llvm::recognizeBSwapOrBitReverseIdiom( Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl &InsertedInsts) { diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 2ed059b91788..92dfb1c7204d 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" @@ -259,11 +258,8 @@ static bool isEpilogProfitable(Loop *L) { BasicBlock *PreHeader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); assert(PreHeader && Header); - for (Instruction &BBI : *Header) { - PHINode *PN = dyn_cast(&BBI); - if (!PN) - break; - if (isa(PN->getIncomingValueForBlock(PreHeader))) + for (const PHINode &PN : Header->phis()) { + if (isa(PN.getIncomingValueForBlock(PreHeader))) return true; } return false; @@ -612,13 +608,12 @@ LoopUnrollResult llvm::UnrollLoop( for (BasicBlock *Succ : successors(*BB)) { if (L->contains(Succ)) continue; - for (BasicBlock::iterator BBI = Succ->begin(); - PHINode *phi = dyn_cast(BBI); ++BBI) { - Value *Incoming = phi->getIncomingValueForBlock(*BB); + for (PHINode &PHI : Succ->phis()) { + Value *Incoming = PHI.getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); if (It != LastValueMap.end()) Incoming = It->second; - phi->addIncoming(Incoming, New); + PHI.addIncoming(Incoming, New); } } // Keep track of new headers and latches as we create them, so that @@ -722,10 +717,8 @@ LoopUnrollResult llvm::UnrollLoop( for (BasicBlock *Succ: successors(BB)) { if (Succ == Headers[i]) continue; - for (BasicBlock::iterator BBI = Succ->begin(); - PHINode *Phi = dyn_cast(BBI); ++BBI) { - Phi->removeIncomingValue(BB, false); - } + for (PHINode &Phi : Succ->phis()) + Phi.removeIncomingValue(BB, false); } } // Replace the conditional branch with an unconditional one. diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 4273ce0b6200..4642a50ba6d5 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -203,7 +203,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // hit the peeled section. // We only do this in the presence of profile information, since otherwise // our estimates of the trip count are not reliable enough. - if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) { + if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) { Optional PeelCount = getLoopEstimatedTripCount(L); if (!PeelCount) return; @@ -272,7 +272,7 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, /// \param IterNumber The serial number of the iteration currently being /// peeled off. /// \param Exit The exit block of the original loop. -/// \param[out] NewBlocks A list of the the blocks in the newly created clone +/// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. /// \param LoopBlocks A helper for DFS-traversal of the loop. /// \param LVMap A value-map that maps instructions from the original loop to diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 351163bfa02b..f79f423ce019 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -25,7 +25,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/BasicBlock.h" @@ -81,25 +80,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // The new PHI node value is added as an operand of a PHI node in either // the loop header or the loop exit block. for (BasicBlock *Succ : successors(Latch)) { - for (Instruction &BBI : *Succ) { - PHINode *PN = dyn_cast(&BBI); - // Exit when we passed all PHI nodes. - if (!PN) - break; + for (PHINode &PN : Succ->phis()) { // Add a new PHI node to the prolog end block and add the // appropriate incoming values. - PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", PrologExit->getFirstNonPHI()); // Adding a value to the new PHI node from the original loop preheader. // This is the value that skips all the prolog code. - if (L->contains(PN)) { - NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), + if (L->contains(&PN)) { + NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); } else { - NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); + NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); } - Value *V = PN->getIncomingValueForBlock(Latch); + Value *V = PN.getIncomingValueForBlock(Latch); if (Instruction *I = dyn_cast(V)) { if (L->contains(I)) { V = VMap.lookup(I); @@ -112,10 +107,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing // PHI node is in the original loop block, or the exit block. - if (L->contains(PN)) { - PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); + if (L->contains(&PN)) { + PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN); } else { - PN->addIncoming(NewPN, PrologExit); + PN.addIncoming(NewPN, PrologExit); } } } @@ -192,11 +187,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // Exit (EpilogPN) // Update PHI nodes at NewExit and Exit. - for (Instruction &BBI : *NewExit) { - PHINode *PN = dyn_cast(&BBI); - // Exit when we passed all PHI nodes. - if (!PN) - break; + for (PHINode &PN : NewExit->phis()) { // PN should be used in another PHI located in Exit block as // Exit was split by SplitBlockPredecessors into Exit and NewExit // Basicaly it should look like: @@ -208,14 +199,14 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // // There is EpilogPreHeader incoming block instead of NewExit as // NewExit was spilt 1 more time to get EpilogPreHeader. - assert(PN->hasOneUse() && "The phi should have 1 use"); - PHINode *EpilogPN = cast (PN->use_begin()->getUser()); + assert(PN.hasOneUse() && "The phi should have 1 use"); + PHINode *EpilogPN = cast(PN.use_begin()->getUser()); assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block"); // Add incoming PreHeader from branch around the Loop - PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); + PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); - Value *V = PN->getIncomingValueForBlock(Latch); + Value *V = PN.getIncomingValueForBlock(Latch); Instruction *I = dyn_cast(V); if (I && L->contains(I)) // If value comes from an instruction in the loop add VMap value. @@ -243,23 +234,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, // Skip this as we already updated phis in exit blocks. if (!L->contains(Succ)) continue; - for (Instruction &BBI : *Succ) { - PHINode *PN = dyn_cast(&BBI); - // Exit when we passed all PHI nodes. - if (!PN) - break; + for (PHINode &PN : Succ->phis()) { // Add new PHI nodes to the loop exit block and update epilog // PHIs with the new PHI values. - PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", + PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", NewExit->getFirstNonPHI()); // Adding a value to the new PHI node from the unrolling loop preheader. - NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); + NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); // Adding a value to the new PHI node from the unrolling loop latch. - NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); + NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch); // Update the existing PHI node operand with the value from the new PHI // node. Corresponding instruction in epilog loop should be PHI. - PHINode *VPN = cast(VMap[&BBI]); + PHINode *VPN = cast(VMap[&PN]); VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN); } } @@ -649,8 +636,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, SmallVector Preds(predecessors(LatchExit)); NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); // Split NewExit to insert epilog remainder loop. - EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI); + EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI); EpilogPreHeader->setName(Header->getName() + ".epil.preheader"); } else { // If prolog remainder diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 0de6924e6354..3ea210d4fe7c 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -678,7 +678,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder, } InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, - const SCEV *Step, BinaryOperator *BOp) + const SCEV *Step, BinaryOperator *BOp, + SmallVectorImpl *Casts) : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { assert(IK != IK_NoInduction && "Not an induction"); @@ -705,6 +706,12 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub))) && "Binary opcode should be specified for FP induction"); + + if (Casts) { + for (auto &Inst : *Casts) { + RedundantCasts.push_back(Inst); + } + } } int InductionDescriptor::getConsecutiveDirection() const { @@ -808,7 +815,7 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, StartValue = Phi->getIncomingValue(1); } else { assert(TheLoop->contains(Phi->getIncomingBlock(1)) && - "Unexpected Phi node in the loop"); + "Unexpected Phi node in the loop"); BEValue = Phi->getIncomingValue(1); StartValue = Phi->getIncomingValue(0); } @@ -841,6 +848,111 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, return true; } +/// This function is called when we suspect that the update-chain of a phi node +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev +/// expression with casts). +/// +/// For example, without a predicate the scev expression can take the following +/// form: +/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) +/// +/// It corresponds to the following IR sequence: +/// %for.body: +/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] +/// %casted_phi = "ExtTrunc i64 %x" +/// %add = add i64 %casted_phi, %step +/// +/// where %x is given in \p PN, +/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, +/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of +/// several forms, for example, such as: +/// ExtTrunc1: %casted_phi = and %x, 2^n-1 +/// or: +/// ExtTrunc2: %t = shl %x, m +/// %casted_phi = ashr %t, m +/// +/// If we are able to find such sequence, we return the instructions +/// we found, namely %casted_phi and the instructions on its use-def chain up +/// to the phi (not including the phi). +static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, + const SCEVUnknown *PhiScev, + const SCEVAddRecExpr *AR, + SmallVectorImpl &CastInsts) { + + assert(CastInsts.empty() && "CastInsts is expected to be empty."); + auto *PN = cast(PhiScev->getValue()); + assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); + const Loop *L = AR->getLoop(); + + // Find any cast instructions that participate in the def-use chain of + // PhiScev in the loop. + // FORNOW/TODO: We currently expect the def-use chain to include only + // two-operand instructions, where one of the operands is an invariant. + // createAddRecFromPHIWithCasts() currently does not support anything more + // involved than that, so we keep the search simple. This can be + // extended/generalized as needed. + + auto getDef = [&](const Value *Val) -> Value * { + const BinaryOperator *BinOp = dyn_cast(Val); + if (!BinOp) + return nullptr; + Value *Op0 = BinOp->getOperand(0); + Value *Op1 = BinOp->getOperand(1); + Value *Def = nullptr; + if (L->isLoopInvariant(Op0)) + Def = Op1; + else if (L->isLoopInvariant(Op1)) + Def = Op0; + return Def; + }; + + // Look for the instruction that defines the induction via the + // loop backedge. + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + Value *Val = PN->getIncomingValueForBlock(Latch); + if (!Val) + return false; + + // Follow the def-use chain until the induction phi is reached. + // If on the way we encounter a Value that has the same SCEV Expr as the + // phi node, we can consider the instructions we visit from that point + // as part of the cast-sequence that can be ignored. + bool InCastSequence = false; + auto *Inst = dyn_cast(Val); + while (Val != PN) { + // If we encountered a phi node other than PN, or if we left the loop, + // we bail out. + if (!Inst || !L->contains(Inst)) { + return false; + } + auto *AddRec = dyn_cast(PSE.getSCEV(Val)); + if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) + InCastSequence = true; + if (InCastSequence) { + // Only the last instruction in the cast sequence is expected to have + // uses outside the induction def-use chain. + if (!CastInsts.empty()) + if (!Inst->hasOneUse()) + return false; + CastInsts.push_back(Inst); + } + Val = getDef(Val); + if (!Val) + return false; + Inst = dyn_cast(Val); + } + + return InCastSequence; +} + bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, PredicatedScalarEvolution &PSE, InductionDescriptor &D, @@ -870,13 +982,26 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; } + // Record any Cast instructions that participate in the induction update + const auto *SymbolicPhi = dyn_cast(PhiScev); + // If we started from an UnknownSCEV, and managed to build an addRecurrence + // only after enabling Assume with PSCEV, this means we may have encountered + // cast instructions that required adding a runtime check in order to + // guarantee the correctness of the AddRecurence respresentation of the + // induction. + if (PhiScev != AR && SymbolicPhi) { + SmallVector Casts; + if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); + } + return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); } -bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, - ScalarEvolution *SE, - InductionDescriptor &D, - const SCEV *Expr) { +bool InductionDescriptor::isInductionPHI( + PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, + InductionDescriptor &D, const SCEV *Expr, + SmallVectorImpl *CastsToIgnore) { Type *PhiTy = Phi->getType(); // We only handle integer and pointer inductions variables. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) @@ -895,7 +1020,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, // FIXME: We should treat this as a uniform. Unfortunately, we // don't currently know how to handled uniform PHIs. DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); - return false; + return false; } Value *StartValue = @@ -908,7 +1033,8 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, return false; if (PhiTy->isIntegerTy()) { - D = InductionDescriptor(StartValue, IK_IntInduction, Step); + D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr, + CastsToIgnore); return true; } @@ -1195,13 +1321,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, // Rewrite phis in the exit block to get their inputs from the Preheader // instead of the exiting block. - BasicBlock::iterator BI = ExitBlock->begin(); - while (PHINode *P = dyn_cast(BI)) { + for (PHINode &P : ExitBlock->phis()) { // Set the zero'th element of Phi to be from the preheader and remove all // other incoming values. Given the loop has dedicated exits, all other // incoming values must be from the exiting blocks. int PredIndex = 0; - P->setIncomingBlock(PredIndex, Preheader); + P.setIncomingBlock(PredIndex, Preheader); // Removes all incoming values from all other exiting blocks (including // duplicate values from an exiting block). // Nuke all entries except the zero'th entry which is the preheader entry. @@ -1209,13 +1334,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, // below, to keep the indices valid for deletion (removeIncomingValues // updates getNumIncomingValues and shifts all values down into the operand // being deleted). - for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) - P->removeIncomingValue(e - i, false); + for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) + P.removeIncomingValue(e - i, false); - assert((P->getNumIncomingValues() == 1 && - P->getIncomingBlock(PredIndex) == Preheader) && + assert((P.getNumIncomingValues() == 1 && + P.getIncomingBlock(PredIndex) == Preheader) && "Should have exactly one value and that's from the preheader!"); - ++BI; } // Disconnect the loop body by branching directly to its exit. @@ -1232,6 +1356,32 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, DT->deleteEdge(Preheader, L->getHeader()); } + // Given LCSSA form is satisfied, we should not have users of instructions + // within the dead loop outside of the loop. However, LCSSA doesn't take + // unreachable uses into account. We handle them here. + // We could do it after drop all references (in this case all users in the + // loop will be already eliminated and we have less work to do but according + // to API doc of User::dropAllReferences only valid operation after dropping + // references, is deletion. So let's substitute all usages of + // instruction from the loop with undef value of corresponding type first. + for (auto *Block : L->blocks()) + for (Instruction &I : *Block) { + auto *Undef = UndefValue::get(I.getType()); + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { + Use &U = *UI; + ++UI; + if (auto *Usr = dyn_cast(U.getUser())) + if (L->contains(Usr->getParent())) + continue; + // If we have a DT then we can check that uses outside a loop only in + // unreachable block. + if (DT) + assert(!DT->isReachableFromEntry(U) && + "Unexpected user in reachable block"); + U.set(Undef); + } + } + // Remove the block from the reference counting scheme, so that we can // delete it freely later. for (auto *Block : L->blocks()) @@ -1306,7 +1456,7 @@ Optional llvm::getLoopEstimatedTripCount(Loop *L) { if (!L->getExitingBlock()) return None; - // Get the branch weights for the the loop's backedge. + // Get the branch weights for the loop's backedge. BranchInst *LatchBR = dyn_cast(L->getLoopLatch()->getTerminator()); if (!LatchBR || LatchBR->getNumSuccessors() != 2) @@ -1400,8 +1550,8 @@ Value *llvm::createSimpleTargetReduction( using RD = RecurrenceDescriptor; RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; // TODO: Support creating ordered reductions. - FastMathFlags FMFUnsafe; - FMFUnsafe.setFast(); + FastMathFlags FMFFast; + FMFFast.setFast(); switch (Opcode) { case Instruction::Add: @@ -1422,14 +1572,14 @@ Value *llvm::createSimpleTargetReduction( case Instruction::FAdd: BuildFunc = [&]() { auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); - cast(Rdx)->setFastMathFlags(FMFUnsafe); + cast(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; case Instruction::FMul: BuildFunc = [&]() { auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); - cast(Rdx)->setFastMathFlags(FMFUnsafe); + cast(Rdx)->setFastMathFlags(FMFFast); return Rdx; }; break; @@ -1465,55 +1615,39 @@ Value *llvm::createSimpleTargetReduction( } /// Create a vector reduction using a given recurrence descriptor. -Value *llvm::createTargetReduction(IRBuilder<> &Builder, +Value *llvm::createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN) { // TODO: Support in-order reductions based on the recurrence descriptor. - RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + using RD = RecurrenceDescriptor; + RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; - auto getSimpleRdx = [&](unsigned Opc) { - return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); - }; switch (RecKind) { - case RecurrenceDescriptor::RK_FloatAdd: - return getSimpleRdx(Instruction::FAdd); - case RecurrenceDescriptor::RK_FloatMult: - return getSimpleRdx(Instruction::FMul); - case RecurrenceDescriptor::RK_IntegerAdd: - return getSimpleRdx(Instruction::Add); - case RecurrenceDescriptor::RK_IntegerMult: - return getSimpleRdx(Instruction::Mul); - case RecurrenceDescriptor::RK_IntegerAnd: - return getSimpleRdx(Instruction::And); - case RecurrenceDescriptor::RK_IntegerOr: - return getSimpleRdx(Instruction::Or); - case RecurrenceDescriptor::RK_IntegerXor: - return getSimpleRdx(Instruction::Xor); - case RecurrenceDescriptor::RK_IntegerMinMax: { - switch (Desc.getMinMaxRecurrenceKind()) { - case RecurrenceDescriptor::MRK_SIntMax: - Flags.IsSigned = true; - Flags.IsMaxOp = true; - break; - case RecurrenceDescriptor::MRK_UIntMax: - Flags.IsMaxOp = true; - break; - case RecurrenceDescriptor::MRK_SIntMin: - Flags.IsSigned = true; - break; - case RecurrenceDescriptor::MRK_UIntMin: - break; - default: - llvm_unreachable("Unhandled MRK"); - } - return getSimpleRdx(Instruction::ICmp); + case RD::RK_FloatAdd: + return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); + case RD::RK_FloatMult: + return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); + case RD::RK_IntegerAdd: + return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); + case RD::RK_IntegerMult: + return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); + case RD::RK_IntegerAnd: + return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); + case RD::RK_IntegerOr: + return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); + case RD::RK_IntegerXor: + return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); + case RD::RK_IntegerMinMax: { + RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); + Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); + Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); + return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); } - case RecurrenceDescriptor::RK_FloatMinMax: { - Flags.IsMaxOp = - Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; - return getSimpleRdx(Instruction::FCmp); + case RD::RK_FloatMinMax: { + Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; + return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); } default: llvm_unreachable("Unhandled RecKind"); diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 900450b40061..57dc225e9dab 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -168,13 +168,14 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, IntegerType *ILengthType = dyn_cast(CopyLenType); assert(ILengthType && "expected size argument to memcpy to be an integer type!"); + Type *Int8Type = Type::getInt8Ty(Ctx); + bool LoopOpIsInt8 = LoopOpType == Int8Type; ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); - Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); - Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); - Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); - + Value *RuntimeLoopCount = LoopOpIsInt8 ? + CopyLen : + PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); BasicBlock *LoopBB = - BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); IRBuilder<> LoopBuilder(LoopBB); PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); @@ -189,11 +190,15 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); LoopIndex->addIncoming(NewIndex, LoopBB); - Type *Int8Type = Type::getInt8Ty(Ctx); - if (LoopOpType != Int8Type) { + if (!LoopOpIsInt8) { + // Add in the + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + // Loop body for the residual copy. BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", - PreLoopBB->getParent(), nullptr); + PreLoopBB->getParent(), + PostLoopBB); // Residual loop header. BasicBlock *ResHeaderBB = BasicBlock::Create( Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); @@ -258,61 +263,6 @@ void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, } } -void llvm::createMemCpyLoop(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, Value *CopyLen, - unsigned SrcAlign, unsigned DestAlign, - bool SrcIsVolatile, bool DstIsVolatile) { - Type *TypeOfCopyLen = CopyLen->getType(); - - BasicBlock *OrigBB = InsertBefore->getParent(); - Function *F = OrigBB->getParent(); - BasicBlock *NewBB = - InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split"); - BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop", - F, NewBB); - - IRBuilder<> Builder(OrigBB->getTerminator()); - - // SrcAddr and DstAddr are expected to be pointer types, - // so no check is made here. - unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); - unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); - - // Cast pointers to (char *) - SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS)); - DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS)); - - Builder.CreateCondBr( - Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, - LoopBB); - OrigBB->getTerminator()->eraseFromParent(); - - IRBuilder<> LoopBuilder(LoopBB); - PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); - LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); - - // load from SrcAddr+LoopIndex - // TODO: we can leverage the align parameter of llvm.memcpy for more efficient - // word-sized loads and stores. - Value *Element = - LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( - LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), - SrcIsVolatile); - // store at DstAddr+LoopIndex - LoopBuilder.CreateStore(Element, - LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), - DstAddr, LoopIndex), - DstIsVolatile); - - // The value for LoopIndex coming from backedge is (LoopIndex + 1) - Value *NewIndex = - LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); - LoopIndex->addIncoming(NewIndex, LoopBB); - - LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, - NewBB); -} - // Lower memmove to IR. memmove is required to correctly copy overlapping memory // regions; therefore, it has to check the relative positions of the source and // destination pointers and choose the copy direction accordingly. @@ -454,38 +404,26 @@ static void createMemSetLoop(Instruction *InsertBefore, void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, const TargetTransformInfo &TTI) { - // Original implementation - if (!TTI.useWideIRMemcpyLoopLowering()) { - createMemCpyLoop(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile()); + if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); } else { - if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { - createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ CI, + /* CopyLen */ Memcpy->getLength(), /* SrcAlign */ Memcpy->getAlignment(), /* DestAlign */ Memcpy->getAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransformInfo */ TTI); - } else { - createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile(), - /* TargetTransfomrInfo */ TTI); - } + /* TargetTransfomrInfo */ TTI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index e4b20b0faa15..b2231d68a301 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -147,11 +147,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (isa(BB->begin())) { SmallDenseMap ValueMapping(PredValues.begin(), PredValues.end()); - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast(It)); ++It) { - if (IsEquivalentPHI(SomePHI, ValueMapping)) - return SomePHI; + for (PHINode &SomePHI : BB->phis()) { + if (IsEquivalentPHI(&SomePHI, ValueMapping)) + return &SomePHI; } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 8f1626a149a5..c3343ed8ecc9 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -283,12 +283,8 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2, /// of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { - if (!isa(Succ->begin())) - return; // Quick exit if nothing to do - - PHINode *PN; - for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast(I)); ++I) - PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); + for (PHINode &PN : Succ->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred); } /// Compute an abstract "cost" of speculating the given instruction, @@ -1228,11 +1224,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { for (BasicBlock *Succ : successors(BB1)) { - PHINode *PN; - for (BasicBlock::iterator BBI = Succ->begin(); - (PN = dyn_cast(BBI)); ++BBI) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); + for (const PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { return false; } @@ -1282,6 +1276,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, if (isa(I1)) goto HoistTerminator; + // If we're going to hoist a call, make sure that the two instructions we're + // commoning/hoisting are both marked with musttail, or neither of them is + // marked as such. Otherwise, we might end up in a situation where we hoist + // from a block where the terminator is a `ret` to a block where the terminator + // is a `br`, and `musttail` calls expect to be followed by a return. + auto *C1 = dyn_cast(I1); + auto *C2 = dyn_cast(I2); + if (C1 && C2) + if (C1->isMustTailCall() != C2->isMustTailCall()) + return Changed; + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; @@ -1332,18 +1337,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, return Changed; for (BasicBlock *Succ : successors(BB1)) { - PHINode *PN; - for (BasicBlock::iterator BBI = Succ->begin(); - (PN = dyn_cast(BBI)); ++BBI) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; // Check for passingValueIsAlwaysUndefined here because we would rather // eliminate undefined control flow then converting it to a select. - if (passingValueIsAlwaysUndefined(BB1V, PN) || - passingValueIsAlwaysUndefined(BB2V, PN)) + if (passingValueIsAlwaysUndefined(BB1V, &PN) || + passingValueIsAlwaysUndefined(BB2V, &PN)) return Changed; if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) @@ -1369,11 +1372,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // nodes, so we insert select instruction to compute the final result. std::map, SelectInst *> InsertedSelects; for (BasicBlock *Succ : successors(BB1)) { - PHINode *PN; - for (BasicBlock::iterator BBI = Succ->begin(); - (PN = dyn_cast(BBI)); ++BBI) { - Value *BB1V = PN->getIncomingValueForBlock(BB1); - Value *BB2V = PN->getIncomingValueForBlock(BB2); + for (PHINode &PN : Succ->phis()) { + Value *BB1V = PN.getIncomingValueForBlock(BB1); + Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; @@ -1386,9 +1387,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, BB1V->getName() + "." + BB2V->getName(), BI)); // Make the PHI node use the select for all incoming values for BB1/BB2 - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) - PN->setIncomingValue(i, SI); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) + PN.setIncomingValue(i, SI); } } @@ -1654,14 +1655,11 @@ namespace { } // end anonymous namespace -/// Given an unconditional branch that goes to BBEnd, -/// check whether BBEnd has only two predecessors and the other predecessor -/// ends with an unconditional branch. If it is true, sink any common code -/// in the two predecessors to BBEnd. -static bool SinkThenElseCodeToEnd(BranchInst *BI1) { - assert(BI1->isUnconditional()); - BasicBlock *BBEnd = BI1->getSuccessor(0); - +/// Check whether BB's predecessors end with unconditional branches. If it is +/// true, sink any common code from the predecessors to BB. +/// We also allow one predecessor to end with conditional branch (but no more +/// than one). +static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) { // We support two situations: // (1) all incoming arcs are unconditional // (2) one incoming arc is conditional @@ -1705,7 +1703,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // SmallVector UnconditionalPreds; Instruction *Cond = nullptr; - for (auto *B : predecessors(BBEnd)) { + for (auto *B : predecessors(BB)) { auto *T = B->getTerminator(); if (isa(T) && cast(T)->isUnconditional()) UnconditionalPreds.push_back(B); @@ -1773,8 +1771,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. - if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds, - ".sink.split")) + if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) // Edges couldn't be split. return false; Changed = true; @@ -2003,10 +2000,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Check that the PHI nodes can be converted to selects. bool HaveRewritablePHIs = false; - for (BasicBlock::iterator I = EndBB->begin(); - PHINode *PN = dyn_cast(I); ++I) { - Value *OrigV = PN->getIncomingValueForBlock(BB); - Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + for (PHINode &PN : EndBB->phis()) { + Value *OrigV = PN.getIncomingValueForBlock(BB); + Value *ThenV = PN.getIncomingValueForBlock(ThenBB); // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. // Skip PHIs which are trivial. @@ -2014,8 +2010,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, continue; // Don't convert to selects if we could remove undefined behavior instead. - if (passingValueIsAlwaysUndefined(OrigV, PN) || - passingValueIsAlwaysUndefined(ThenV, PN)) + if (passingValueIsAlwaysUndefined(OrigV, &PN) || + passingValueIsAlwaysUndefined(ThenV, &PN)) return false; HaveRewritablePHIs = true; @@ -2076,12 +2072,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); - for (BasicBlock::iterator I = EndBB->begin(); - PHINode *PN = dyn_cast(I); ++I) { - unsigned OrigI = PN->getBasicBlockIndex(BB); - unsigned ThenI = PN->getBasicBlockIndex(ThenBB); - Value *OrigV = PN->getIncomingValue(OrigI); - Value *ThenV = PN->getIncomingValue(ThenI); + for (PHINode &PN : EndBB->phis()) { + unsigned OrigI = PN.getBasicBlockIndex(BB); + unsigned ThenI = PN.getBasicBlockIndex(ThenBB); + Value *OrigV = PN.getIncomingValue(OrigI); + Value *ThenV = PN.getIncomingValue(ThenI); // Skip PHIs which are trivial. if (OrigV == ThenV) @@ -2095,8 +2090,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, std::swap(TrueV, FalseV); Value *V = Builder.CreateSelect( BrCond, TrueV, FalseV, "spec.select", BI); - PN->setIncomingValue(OrigI, V); - PN->setIncomingValue(ThenI, V); + PN.setIncomingValue(OrigI, V); + PN.setIncomingValue(ThenI, V); } // Remove speculated dbg intrinsics. @@ -3339,17 +3334,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make // them agree. - PHINode *PN; - for (BasicBlock::iterator II = CommonDest->begin(); - (PN = dyn_cast(II)); ++II) { - Value *BIV = PN->getIncomingValueForBlock(BB); - unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); - Value *PBIV = PN->getIncomingValue(PBBIdx); + for (PHINode &PN : CommonDest->phis()) { + Value *BIV = PN.getIncomingValueForBlock(BB); + unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN.getIncomingValue(PBBIdx); if (BIV != PBIV) { // Insert a select in PBI to pick the right value. SelectInst *NV = cast( Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); - PN->setIncomingValue(PBBIdx, NV); + PN.setIncomingValue(PBBIdx, NV); // Although the select has the same condition as PBI, the original branch // weights for PBI do not apply to the new select because the select's // 'logical' edges are incoming edges of the phi that is eliminated, not @@ -4455,17 +4448,16 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *Succ = Branch->getSuccessor(0); - BasicBlock::iterator I = Succ->begin(); - while (PHINode *PHI = dyn_cast(I++)) { - int Idx = PHI->getBasicBlockIndex(BB); + for (PHINode &PHI : Succ->phis()) { + int Idx = PHI.getBasicBlockIndex(BB); assert(Idx >= 0 && "PHI has no entry for predecessor?"); - Value *InValue = PHI->getIncomingValue(Idx); + Value *InValue = PHI.getIncomingValue(Idx); if (InValue != CaseValue) continue; *PhiIndex = Idx; - return PHI; + return &PHI; } return nullptr; @@ -4495,19 +4487,16 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { // --> // %r = phi i32 ... [ %x, %switchbb ] ... - for (Instruction &InstInCaseDest : *CaseDest) { - auto *Phi = dyn_cast(&InstInCaseDest); - if (!Phi) break; - + for (PHINode &Phi : CaseDest->phis()) { // This only works if there is exactly 1 incoming edge from the switch to // a phi. If there is >1, that means multiple cases of the switch map to 1 // value in the phi, and that phi value is not the switch condition. Thus, // this transform would not make sense (the phi would be invalid because // a phi can't have different incoming values from the same block). - int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock); - if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue && - count(Phi->blocks(), SwitchBlock) == 1) { - Phi->setIncomingValue(SwitchBBIdx, SI->getCondition()); + int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock); + if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue && + count(Phi.blocks(), SwitchBlock) == 1) { + Phi.setIncomingValue(SwitchBBIdx, SI->getCondition()); Changed = true; } } @@ -4660,14 +4649,13 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, return false; // Get the values for this case from phi nodes in the destination block. - BasicBlock::iterator I = (*CommonDest)->begin(); - while (PHINode *PHI = dyn_cast(I++)) { - int Idx = PHI->getBasicBlockIndex(Pred); + for (PHINode &PHI : (*CommonDest)->phis()) { + int Idx = PHI.getBasicBlockIndex(Pred); if (Idx == -1) continue; Constant *ConstVal = - LookupConstant(PHI->getIncomingValue(Idx), ConstantPool); + LookupConstant(PHI.getIncomingValue(Idx), ConstantPool); if (!ConstVal) return false; @@ -4675,37 +4663,38 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, if (!ValidLookupTableConstant(ConstVal, TTI)) return false; - Res.push_back(std::make_pair(PHI, ConstVal)); + Res.push_back(std::make_pair(&PHI, ConstVal)); } return Res.size() > 0; } // Helper function used to add CaseVal to the list of cases that generate -// Result. -static void MapCaseToResult(ConstantInt *CaseVal, - SwitchCaseResultVectorTy &UniqueResults, - Constant *Result) { +// Result. Returns the updated number of cases that generate this result. +static uintptr_t MapCaseToResult(ConstantInt *CaseVal, + SwitchCaseResultVectorTy &UniqueResults, + Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); - return; + return I.second.size(); } } UniqueResults.push_back( std::make_pair(Result, SmallVector(1, CaseVal))); + return 1; } // Helper function that initializes a map containing // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. -static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, - BasicBlock *&CommonDest, - SwitchCaseResultVectorTy &UniqueResults, - Constant *&DefaultResult, - const DataLayout &DL, - const TargetTransformInfo &TTI) { +static bool +InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, + SwitchCaseResultVectorTy &UniqueResults, + Constant *&DefaultResult, const DataLayout &DL, + const TargetTransformInfo &TTI, + uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); @@ -4715,10 +4704,21 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, DL, TTI)) return false; - // Only one value per case is permitted + // Only one value per case is permitted. if (Results.size() > 1) return false; - MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Add the case->result mapping to UniqueResults. + const uintptr_t NumCasesForResult = + MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + + // Early out if there are too many cases for this result. + if (NumCasesForResult > MaxCasesPerResult) + return false; + + // Early out if there are too many unique results. + if (UniqueResults.size() > MaxUniqueResults) + return false; // Check the PHI consistency. if (!PHI) @@ -4818,7 +4818,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, - DL, TTI)) + DL, TTI, 2, 1)) return false; // Selects choose between maximum two values. if (UniqueResults.size() != 2) @@ -5174,7 +5174,7 @@ static void reuseTableCompare( for (auto ValuePair : Values) { Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), ValuePair.second, CmpOp1, true); - if (!CaseConst || CaseConst == DefaultConst) + if (!CaseConst || CaseConst == DefaultConst || isa(CaseConst)) return; assert((CaseConst == TrueConst || CaseConst == FalseConst) && "Expect true or false as compare result."); @@ -5728,9 +5728,6 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); - if (SinkCommon && SinkThenElseCodeToEnd(BI)) - return true; - // If the Terminator is the only non-phi instruction, simplify the block. // If LoopHeader is provided, check if the block or its successor is a loop // header. (This is for early invocations before loop simplify and @@ -5953,14 +5950,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { - for (BasicBlock::iterator i = BB->begin(); - PHINode *PHI = dyn_cast(i); ++i) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { - TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); + for (PHINode &PHI : BB->phis()) + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) + if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { + TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator(); IRBuilder<> Builder(T); if (BranchInst *BI = dyn_cast(T)) { - BB->removePredecessor(PHI->getIncomingBlock(i)); + BB->removePredecessor(PHI.getIncomingBlock(i)); // Turn uncoditional branches into unreachables and remove the dead // destination from conditional branches. if (BI->isUnconditional()) @@ -6008,6 +6004,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (MergeBlockIntoPredecessor(BB)) return true; + if (SinkCommon && Options.SinkCommonInsts) + Changed |= SinkCommonCodeFromPredecessors(BB); + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index fce7f8b81bac..ad1faea0a7ae 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -18,13 +18,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -200,13 +198,23 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, // TODO: Support multiple entry loops? (We currently bail out of these in // the IndVarSimplify pass) if (auto *BB = L->getLoopPredecessor()) { - Value *Incoming = PN->getIncomingValueForBlock(BB); - const SCEV *IncomingS = SE->getSCEV(Incoming); - CheapExpansions[IncomingS] = Incoming; + const int Idx = PN->getBasicBlockIndex(BB); + if (Idx >= 0) { + Value *Incoming = PN->getIncomingValue(Idx); + const SCEV *IncomingS = SE->getSCEV(Incoming); + CheapExpansions[IncomingS] = Incoming; + } } Value *NewLHS = CheapExpansions[InvariantLHS]; Value *NewRHS = CheapExpansions[InvariantRHS]; + if (!NewLHS) + if (auto *ConstLHS = dyn_cast(InvariantLHS)) + NewLHS = ConstLHS->getValue(); + if (!NewRHS) + if (auto *ConstRHS = dyn_cast(InvariantRHS)) + NewRHS = ConstRHS->getValue(); + if (!NewLHS || !NewRHS) // We could not find an existing value to replace either LHS or RHS. // Generating new instructions has subtler tradeoffs, so avoid doing that diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index c392492e331f..dcdff3e96b30 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -104,21 +104,6 @@ static bool callHasFloatingPointArgument(const CallInst *CI) { }); } -/// \brief Check whether the overloaded unary floating point function -/// corresponding to \a Ty is available. -static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, - LibFunc DoubleFn, LibFunc FloatFn, - LibFunc LongDoubleFn) { - switch (Ty->getTypeID()) { - case Type::FloatTyID: - return TLI->has(FloatFn); - case Type::DoubleTyID: - return TLI->has(DoubleFn); - default: - return TLI->has(LongDoubleFn); - } -} - //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -1033,6 +1018,35 @@ static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { return B.CreateFPExt(V, B.getDoubleTy()); } +// cabs(z) -> sqrt((creal(z)*creal(z)) + (cimag(z)*cimag(z))) +Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilder<> &B) { + if (!CI->isFast()) + return nullptr; + + // Propagate fast-math flags from the existing call to new instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + + Value *Real, *Imag; + if (CI->getNumArgOperands() == 1) { + Value *Op = CI->getArgOperand(0); + assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!"); + Real = B.CreateExtractValue(Op, 0, "real"); + Imag = B.CreateExtractValue(Op, 1, "imag"); + } else { + assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!"); + Real = CI->getArgOperand(0); + Imag = CI->getArgOperand(1); + } + + Value *RealReal = B.CreateFMul(Real, Real); + Value *ImagImag = B.CreateFMul(Imag, Imag); + + Function *FSqrt = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::sqrt, + CI->getType()); + return B.CreateCall(FSqrt, B.CreateFAdd(RealReal, ImagImag), "cabs"); +} + Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1204,11 +1218,17 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return Sel; } - if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + // Propagate fast-math-flags from the call to any created instructions. + IRBuilder<>::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + // pow(x, 1.0) --> x + if (Op2C->isExactlyValue(1.0)) return Op1; - if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + // pow(x, 2.0) --> x * x + if (Op2C->isExactlyValue(2.0)) return B.CreateFMul(Op1, Op1, "pow2"); - if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + // pow(x, -1.0) --> 1.0 / x + if (Op2C->isExactlyValue(-1.0)) return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); // In -ffast-math, generate repeated fmul instead of generating pow(x, n). @@ -1220,10 +1240,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { !V.isInteger()) return nullptr; - // Propagate fast math flags. - IRBuilder<>::FastMathFlagGuard Guard(B); - B.setFastMathFlags(CI->getFastMathFlags()); - // We will memoize intermediate products of the Addition Chain. Value *InnerChain[33] = {nullptr}; InnerChain[1] = Op1; @@ -1231,8 +1247,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { // We cannot readily convert a non-double type (like float) to a double. // So we first convert V to something which could be converted to double. - bool ignored; - V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored); + bool Ignored; + V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored); Value *FMul = getPow(InnerChain, V.convertToDouble(), B); // For negative exponents simply compute the reciprocal. @@ -2160,6 +2176,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_fmax: case LibFunc_fmaxl: return optimizeFMinFMax(CI, Builder); + case LibFunc_cabs: + case LibFunc_cabsf: + case LibFunc_cabsl: + return optimizeCAbs(CI, Builder); default: return nullptr; } diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index 934a1bd73c24..968eb0208f43 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -141,15 +141,15 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, } if (GV.hasLocalLinkage()) - addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); - }; - - llvm::for_each(M->functions(), recordGVSet); - llvm::for_each(M->globals(), recordGVSet); - llvm::for_each(M->aliases(), recordGVSet); - - // Assigned all GVs to merged clusters while balancing number of objects in - // each. + addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); + }; + + llvm::for_each(M->functions(), recordGVSet); + llvm::for_each(M->globals(), recordGVSet); + llvm::for_each(M->aliases(), recordGVSet); + + // Assigned all GVs to merged clusters while balancing number of objects in + // each. auto CompareClusters = [](const std::pair &a, const std::pair &b) { if (a.second || b.second) diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 9da862db6a78..3640541e63cc 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -90,7 +90,8 @@ using namespace SymbolRewriter; static cl::list RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), - cl::value_desc("filename")); + cl::value_desc("filename"), + cl::Hidden); static void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source, diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9385f825523c..ed444e4cf43c 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index dc83b6d4d292..2fd39766bd89 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -6,6 +6,38 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// This pass merges loads/stores to/from sequential memory addresses into vector +// loads/stores. Although there's nothing GPU-specific in here, this pass is +// motivated by the microarchitectural quirks of nVidia and AMD GPUs. +// +// (For simplicity below we talk about loads only, but everything also applies +// to stores.) +// +// This pass is intended to be run late in the pipeline, after other +// vectorization opportunities have been exploited. So the assumption here is +// that immediately following our new vector load we'll need to extract out the +// individual elements of the load, so we can operate on them individually. +// +// On CPUs this transformation is usually not beneficial, because extracting the +// elements of a vector register is expensive on most architectures. It's +// usually better just to load each element individually into its own scalar +// register. +// +// However, nVidia and AMD GPUs don't have proper vector registers. Instead, a +// "vector load" loads directly into a series of scalar registers. In effect, +// extracting the elements of the vector is free. It's therefore always +// beneficial to vectorize a sequence of loads on these architectures. +// +// Vectorizing (perhaps a better name might be "coalescing") loads can have +// large performance impacts on GPU kernels, and opportunities for vectorizing +// are common in GPU code. This pass tries very hard to find such +// opportunities; its runtime is quadratic in the number of loads in a BB. +// +// Some CPU architectures, such as ARM, have instructions that load into +// multiple scalar registers, similar to a GPU vectorized load. In theory ARM +// could use this pass (with some modifications), but currently it implements +// its own pass to do something similar to what we do here. #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" diff --git a/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h new file mode 100644 index 000000000000..e9e82681264b --- /dev/null +++ b/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -0,0 +1,256 @@ +//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides a LoopVectorizationPlanner class. +/// InnerLoopVectorizer vectorizes loops which contain only one basic +/// LoopVectorizationPlanner - drives the vectorization process after having +/// passed Legality checks. +/// The planner builds and optimizes the Vectorization Plans which record the +/// decisions how to vectorize the given loop. In particular, represent the +/// control-flow of the vectorized version, the replication of instructions that +/// are to be scalarized, and interleave access groups. +/// +/// Also provides a VPlan-based builder utility analogous to IRBuilder. +/// It provides an instruction-level API for generating VPInstructions while +/// abstracting away the Recipe manipulation details. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H +#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H + +#include "VPlan.h" + +namespace llvm { + +/// VPlan-based builder utility analogous to IRBuilder. +class VPBuilder { +private: + VPBasicBlock *BB = nullptr; + VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); + + VPInstruction *createInstruction(unsigned Opcode, + std::initializer_list Operands) { + VPInstruction *Instr = new VPInstruction(Opcode, Operands); + BB->insert(Instr, InsertPt); + return Instr; + } + +public: + VPBuilder() {} + + /// \brief This specifies that created VPInstructions should be appended to + /// the end of the specified block. + void setInsertPoint(VPBasicBlock *TheBB) { + assert(TheBB && "Attempting to set a null insert point"); + BB = TheBB; + InsertPt = BB->end(); + } + + VPValue *createNot(VPValue *Operand) { + return createInstruction(VPInstruction::Not, {Operand}); + } + + VPValue *createAnd(VPValue *LHS, VPValue *RHS) { + return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); + } + + VPValue *createOr(VPValue *LHS, VPValue *RHS) { + return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); + } +}; + + +/// TODO: The following VectorizationFactor was pulled out of +/// LoopVectorizationCostModel class. LV also deals with +/// VectorizerParams::VectorizationFactor and VectorizationCostTy. +/// We need to streamline them. + +/// Information about vectorization costs +struct VectorizationFactor { + // Vector width with best cost + unsigned Width; + // Cost of the loop with that width + unsigned Cost; +}; + +/// Planner drives the vectorization process after having passed +/// Legality checks. +class LoopVectorizationPlanner { + /// The loop that we evaluate. + Loop *OrigLoop; + + /// Loop Info analysis. + LoopInfo *LI; + + /// Target Library Info. + const TargetLibraryInfo *TLI; + + /// Target Transform Info. + const TargetTransformInfo *TTI; + + /// The legality analysis. + LoopVectorizationLegality *Legal; + + /// The profitablity analysis. + LoopVectorizationCostModel &CM; + + using VPlanPtr = std::unique_ptr; + + SmallVector VPlans; + + /// This class is used to enable the VPlan to invoke a method of ILV. This is + /// needed until the method is refactored out of ILV and becomes reusable. + struct VPCallbackILV : public VPCallback { + InnerLoopVectorizer &ILV; + + VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} + + Value *getOrCreateVectorValues(Value *V, unsigned Part) override; + }; + + /// A builder used to construct the current plan. + VPBuilder Builder; + + /// When we if-convert we need to create edge masks. We have to cache values + /// so that we don't end up with exponential recursion/IR. Note that + /// if-conversion currently takes place during VPlan-construction, so these + /// caches are only used at that stage. + using EdgeMaskCacheTy = + DenseMap, VPValue *>; + using BlockMaskCacheTy = DenseMap; + EdgeMaskCacheTy EdgeMaskCache; + BlockMaskCacheTy BlockMaskCache; + + unsigned BestVF = 0; + unsigned BestUF = 0; + +public: + LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + LoopVectorizationLegality *Legal, + LoopVectorizationCostModel &CM) + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} + + /// Plan how to best vectorize, return the best VF and its cost. + VectorizationFactor plan(bool OptForSize, unsigned UserVF); + + /// Finalize the best decision and dispose of all other VPlans. + void setBestPlan(unsigned VF, unsigned UF); + + /// Generate the IR code for the body of the vectorized loop according to the + /// best selected VPlan. + void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); + + void printPlans(raw_ostream &O) { + for (const auto &Plan : VPlans) + O << *Plan; + } + +protected: + /// Collect the instructions from the original loop that would be trivially + /// dead in the vectorized loop if generated. + void collectTriviallyDeadInstructions( + SmallPtrSetImpl &DeadInstructions); + + /// A range of powers-of-2 vectorization factors with fixed start and + /// adjustable end. The range includes start and excludes end, e.g.,: + /// [1, 9) = {1, 2, 4, 8} + struct VFRange { + // A power of 2. + const unsigned Start; + + // Need not be a power of 2. If End <= Start range is empty. + unsigned End; + }; + + /// Test a \p Predicate on a \p Range of VF's. Return the value of applying + /// \p Predicate on Range.Start, possibly decreasing Range.End such that the + /// returned value holds for the entire \p Range. + bool getDecisionAndClampRange(const std::function &Predicate, + VFRange &Range); + + /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, + /// according to the information gathered by Legal when it checked if it is + /// legal to vectorize the loop. + void buildVPlans(unsigned MinVF, unsigned MaxVF); + +private: + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); + + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); + + /// Check if \I belongs to an Interleave Group within the given VF \p Range, + /// \return true in the first returned value if so and false otherwise. + /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG + /// for \p Range.Start, and provide it as the second returned value. + /// Note that if \I is an adjunct member of an IG for \p Range.Start, the + /// \return value is , as it is handled by another recipe. + /// \p Range.End may be decreased to ensure same decision from \p Range.Start + /// to \p Range.End. + VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); + + // Check if \I is a memory instruction to be widened for \p Range.Start and + // potentially masked. Such instructions are handled by a recipe that takes an + // additional VPInstruction for the mask. + VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, + VFRange &Range, + VPlanPtr &Plan); + + /// Check if an induction recipe should be constructed for \I within the given + /// VF \p Range. If so build and return it. If not, return null. \p Range.End + /// may be decreased to ensure same decision from \p Range.Start to + /// \p Range.End. + VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, + VFRange &Range); + + /// Handle non-loop phi nodes. Currently all such phi nodes are turned into + /// a sequence of select instructions as the vectorizer currently performs + /// full if-conversion. + VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); + + /// Check if \p I can be widened within the given VF \p Range. If \p I can be + /// widened for \p Range.Start, check if the last recipe of \p VPBB can be + /// extended to include \p I or else build a new VPWidenRecipe for it and + /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, + /// false otherwise. Range.End may be decreased to ensure same decision from + /// \p Range.Start to \p Range.End. + bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); + + /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it + /// is predicated. \return \p VPBB augmented with this new recipe if \p I is + /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new + /// Region. Update the packing decision of predicated instructions if they + /// feed \p I. Range.End may be decreased to ensure same recipe behavior from + /// \p Range.Start to \p Range.End. + VPBasicBlock *handleReplication( + Instruction *I, VFRange &Range, VPBasicBlock *VPBB, + DenseMap &PredInst2Recipe, + VPlanPtr &Plan); + + /// Create a replicating region for instruction \p I that requires + /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. + VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, + VPlanPtr &Plan); + + /// Build a VPlan according to the information gathered by Legal. \return a + /// VPlan for vectorization factors \p Range.Start and up to \p Range.End + /// exclusive, possibly decreasing \p Range.End. + VPlanPtr buildVPlan(VFRange &Range, + const SmallPtrSetImpl &NeedDef); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3faf9f436bef..719ae4c9c52a 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -47,8 +47,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/LoopVectorize.h" -#include "VPlan.h" -#include "VPlanBuilder.h" +#include "LoopVectorizationPlanner.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -280,15 +279,7 @@ createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, namespace { -class LoopVectorizationLegality; -class LoopVectorizationCostModel; class LoopVectorizationRequirements; -class VPBlendRecipe; -class VPInterleaveRecipe; -class VPReplicateRecipe; -class VPWidenIntOrFpInductionRecipe; -class VPWidenRecipe; -class VPWidenMemoryInstructionRecipe; } // end anonymous namespace @@ -599,6 +590,20 @@ class InnerLoopVectorizer { /// Returns true if we should generate a scalar version of \p IV. bool needsScalarInduction(Instruction *IV) const; + /// If there is a cast involved in the induction variable \p ID, which should + /// be ignored in the vectorized loop body, this function records the + /// VectorLoopValue of the respective Phi also as the VectorLoopValue of the + /// cast. We had already proved that the casted Phi is equal to the uncasted + /// Phi in the vectorized loop (under a runtime guard), and therefore + /// there is no need to vectorize the cast - the same value can be used in the + /// vector loop for both the Phi and the cast. + /// If \p VectorLoopValue is a scalarized value, \p Lane is also specified, + /// Otherwise, \p VectorLoopValue is a widened/vectorized value. + void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID, + Value *VectorLoopValue, + unsigned Part, + unsigned Lane = UINT_MAX); + /// Generate a shuffle sequence that will reverse the vector Vec. virtual Value *reverseVector(Value *Vec); @@ -839,7 +844,7 @@ void InnerLoopVectorizer::addMetadata(ArrayRef To, } } -namespace { +namespace llvm { /// \brief The group of interleaved loads/stores sharing the same stride and /// close to each other. @@ -943,6 +948,19 @@ class InterleaveGroup { Instruction *getInsertPos() const { return InsertPos; } void setInsertPos(Instruction *Inst) { InsertPos = Inst; } + /// Add metadata (e.g. alias info) from the instructions in this group to \p + /// NewInst. + /// + /// FIXME: this function currently does not add noalias metadata a'la + /// addNewMedata. To do that we need to compute the intersection of the + /// noalias info from all members. + void addMetadata(Instruction *NewInst) const { + SmallVector VL; + std::transform(Members.begin(), Members.end(), std::back_inserter(VL), + [](std::pair p) { return p.second; }); + propagateMetadata(NewInst, VL); + } + private: unsigned Factor; // Interleave Factor. bool Reverse; @@ -964,6 +982,9 @@ class InterleaveGroup { // store i32 %odd // Insert Position Instruction *InsertPos; }; +} // end namespace llvm + +namespace { /// \brief Drive the analysis of interleaved memory accesses in the loop. /// @@ -1495,7 +1516,7 @@ static void emitMissedWarning(Function *F, Loop *L, } } -namespace { +namespace llvm { /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. @@ -1557,7 +1578,17 @@ class LoopVectorizationLegality { /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } - /// Returns True if V is an induction variable in this loop. + /// Returns True if V is a Phi node of an induction variable in this loop. + bool isInductionPhi(const Value *V); + + /// Returns True if V is a cast that is part of an induction def-use chain, + /// and had been proven to be redundant under a runtime guard (in other + /// words, the cast has the same SCEV expression as the induction phi). + bool isCastedInductionVariable(const Value *V); + + /// Returns True if V can be considered as an induction variable in this + /// loop. V can be the induction phi, or some redundant cast in the def-use + /// chain of the inducion phi. bool isInductionVariable(const Value *V); /// Returns True if PN is a reduction variable in this loop. @@ -1578,6 +1609,8 @@ class LoopVectorizationLegality { /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. /// -1 - Address is consecutive, and decreasing. + /// NOTE: This method must only be used before modifying the original scalar + /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). int isConsecutivePtr(Value *Ptr); /// Returns true if the value V is uniform within the loop. @@ -1768,6 +1801,12 @@ class LoopVectorizationLegality { /// variables can be pointers. InductionList Inductions; + /// Holds all the casts that participate in the update chain of the induction + /// variables, and that have been proven to be redundant (possibly under a + /// runtime guard). These casts can be ignored when creating the vectorized + /// loop body. + SmallPtrSet InductionCastsToIgnore; + /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; @@ -1819,15 +1858,6 @@ class LoopVectorizationCostModel { /// vectorization should be avoided up front. Optional computeMaxVF(bool OptForSize); - /// Information about vectorization costs - struct VectorizationFactor { - // Vector width with best cost - unsigned Width; - - // Cost of the loop with that width - unsigned Cost; - }; - /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is @@ -1926,7 +1956,8 @@ class LoopVectorizationCostModel { /// Decision that was taken during cost calculation for memory instruction. enum InstWidening { CM_Unknown, - CM_Widen, + CM_Widen, // For consecutive accesses with stride +1. + CM_Widen_Reverse, // For consecutive accesses with stride -1. CM_Interleave, CM_GatherScatter, CM_Scalarize @@ -2001,7 +2032,7 @@ class LoopVectorizationCostModel { return false; // If the truncated value is not an induction variable, return false. - return Legal->isInductionVariable(Op); + return Legal->isInductionPhi(Op); } /// Collects the instructions to scalarize for each predicated instruction in @@ -2186,189 +2217,6 @@ class LoopVectorizationCostModel { SmallPtrSet VecValuesToIgnore; }; -} // end anonymous namespace - -namespace llvm { - -/// InnerLoopVectorizer vectorizes loops which contain only one basic -/// LoopVectorizationPlanner - drives the vectorization process after having -/// passed Legality checks. -/// The planner builds and optimizes the Vectorization Plans which record the -/// decisions how to vectorize the given loop. In particular, represent the -/// control-flow of the vectorized version, the replication of instructions that -/// are to be scalarized, and interleave access groups. -class LoopVectorizationPlanner { - /// The loop that we evaluate. - Loop *OrigLoop; - - /// Loop Info analysis. - LoopInfo *LI; - - /// Target Library Info. - const TargetLibraryInfo *TLI; - - /// Target Transform Info. - const TargetTransformInfo *TTI; - - /// The legality analysis. - LoopVectorizationLegality *Legal; - - /// The profitablity analysis. - LoopVectorizationCostModel &CM; - - using VPlanPtr = std::unique_ptr; - - SmallVector VPlans; - - /// This class is used to enable the VPlan to invoke a method of ILV. This is - /// needed until the method is refactored out of ILV and becomes reusable. - struct VPCallbackILV : public VPCallback { - InnerLoopVectorizer &ILV; - - VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} - - Value *getOrCreateVectorValues(Value *V, unsigned Part) override { - return ILV.getOrCreateVectorValue(V, Part); - } - }; - - /// A builder used to construct the current plan. - VPBuilder Builder; - - /// When we if-convert we need to create edge masks. We have to cache values - /// so that we don't end up with exponential recursion/IR. Note that - /// if-conversion currently takes place during VPlan-construction, so these - /// caches are only used at that stage. - using EdgeMaskCacheTy = - DenseMap, VPValue *>; - using BlockMaskCacheTy = DenseMap; - EdgeMaskCacheTy EdgeMaskCache; - BlockMaskCacheTy BlockMaskCache; - - unsigned BestVF = 0; - unsigned BestUF = 0; - -public: - LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, - LoopVectorizationLegality *Legal, - LoopVectorizationCostModel &CM) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} - - /// Plan how to best vectorize, return the best VF and its cost. - LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize, - unsigned UserVF); - - /// Finalize the best decision and dispose of all other VPlans. - void setBestPlan(unsigned VF, unsigned UF); - - /// Generate the IR code for the body of the vectorized loop according to the - /// best selected VPlan. - void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); - - void printPlans(raw_ostream &O) { - for (const auto &Plan : VPlans) - O << *Plan; - } - -protected: - /// Collect the instructions from the original loop that would be trivially - /// dead in the vectorized loop if generated. - void collectTriviallyDeadInstructions( - SmallPtrSetImpl &DeadInstructions); - - /// A range of powers-of-2 vectorization factors with fixed start and - /// adjustable end. The range includes start and excludes end, e.g.,: - /// [1, 9) = {1, 2, 4, 8} - struct VFRange { - // A power of 2. - const unsigned Start; - - // Need not be a power of 2. If End <= Start range is empty. - unsigned End; - }; - - /// Test a \p Predicate on a \p Range of VF's. Return the value of applying - /// \p Predicate on Range.Start, possibly decreasing Range.End such that the - /// returned value holds for the entire \p Range. - bool getDecisionAndClampRange(const std::function &Predicate, - VFRange &Range); - - /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, - /// according to the information gathered by Legal when it checked if it is - /// legal to vectorize the loop. - void buildVPlans(unsigned MinVF, unsigned MaxVF); - -private: - /// A helper function that computes the predicate of the block BB, assuming - /// that the header block of the loop is set to True. It returns the *entry* - /// mask for the block BB. - VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); - - /// A helper function that computes the predicate of the edge between SRC - /// and DST. - VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); - - /// Check if \I belongs to an Interleave Group within the given VF \p Range, - /// \return true in the first returned value if so and false otherwise. - /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG - /// for \p Range.Start, and provide it as the second returned value. - /// Note that if \I is an adjunct member of an IG for \p Range.Start, the - /// \return value is , as it is handled by another recipe. - /// \p Range.End may be decreased to ensure same decision from \p Range.Start - /// to \p Range.End. - VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); - - // Check if \I is a memory instruction to be widened for \p Range.Start and - // potentially masked. Such instructions are handled by a recipe that takes an - // additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, - VFRange &Range, - VPlanPtr &Plan); - - /// Check if an induction recipe should be constructed for \I within the given - /// VF \p Range. If so build and return it. If not, return null. \p Range.End - /// may be decreased to ensure same decision from \p Range.Start to - /// \p Range.End. - VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, - VFRange &Range); - - /// Handle non-loop phi nodes. Currently all such phi nodes are turned into - /// a sequence of select instructions as the vectorizer currently performs - /// full if-conversion. - VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); - - /// Check if \p I can be widened within the given VF \p Range. If \p I can be - /// widened for \p Range.Start, check if the last recipe of \p VPBB can be - /// extended to include \p I or else build a new VPWidenRecipe for it and - /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, - /// false otherwise. Range.End may be decreased to ensure same decision from - /// \p Range.Start to \p Range.End. - bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); - - /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it - /// is predicated. \return \p VPBB augmented with this new recipe if \p I is - /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new - /// Region. Update the packing decision of predicated instructions if they - /// feed \p I. Range.End may be decreased to ensure same recipe behavior from - /// \p Range.Start to \p Range.End. - VPBasicBlock *handleReplication( - Instruction *I, VFRange &Range, VPBasicBlock *VPBB, - DenseMap &PredInst2Recipe, - VPlanPtr &Plan); - - /// Create a replicating region for instruction \p I that requires - /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, - VPlanPtr &Plan); - - /// Build a VPlan according to the information gathered by Legal. \return a - /// VPlan for vectorization factors \p Range.Start and up to \p Range.End - /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range, - const SmallPtrSetImpl &NeedDef); -}; - } // end namespace llvm namespace { @@ -2587,8 +2435,12 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { VectorLoopValueMap.setVectorValue(EntryVal, Part, LastInduction); + if (isa(EntryVal)) addMetadata(LastInduction, EntryVal); + else + recordVectorLoopValueForInductionCast(II, LastInduction, Part); + LastInduction = cast(addFastMathFlag( Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"))); } @@ -2620,6 +2472,22 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { return llvm::any_of(IV->users(), isScalarInst); } +void InnerLoopVectorizer::recordVectorLoopValueForInductionCast( + const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part, + unsigned Lane) { + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (Casts.empty()) + return; + // Only the first Cast instruction in the Casts vector is of interest. + // The rest of the Casts (if exist) have no uses outside the + // induction update chain itself. + Instruction *CastInst = *Casts.begin(); + if (Lane < UINT_MAX) + VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal); + else + VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal); +} + void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2694,6 +2562,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { // If we haven't yet vectorized the induction variable, splat the scalar // induction variable, and build the necessary step vectors. + // TODO: Don't do it unless the vectorized IV is really required. if (!VectorizedIV) { Value *Broadcasted = getBroadcastInstrs(ScalarIV); for (unsigned Part = 0; Part < UF; ++Part) { @@ -2702,6 +2571,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart); if (Trunc) addMetadata(EntryPart, Trunc); + else + recordVectorLoopValueForInductionCast(ID, EntryPart, Part); } } @@ -2807,6 +2678,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); + recordVectorLoopValueForInductionCast(ID, Add, Part, Lane); } } } @@ -3044,7 +2916,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { for (unsigned Part = 0; Part < UF; Part++) { auto *NewLoad = Builder.CreateAlignedLoad( NewPtrs[Part], Group->getAlignment(), "wide.vec"); - addMetadata(NewLoad, Instr); + Group->addMetadata(NewLoad); NewLoads.push_back(NewLoad); } @@ -3112,7 +2984,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { Instruction *NewStoreInstr = Builder.CreateAlignedStore(IVec, NewPtrs[Part], Group->getAlignment()); - addMetadata(NewStoreInstr, Instr); + + Group->addMetadata(NewStoreInstr); } } @@ -3144,8 +3017,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. - int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); - bool Reverse = ConsecutiveStride < 0; + bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse); + bool ConsecutiveStride = + Reverse || (Decision == LoopVectorizationCostModel::CM_Widen); bool CreateGatherScatter = (Decision == LoopVectorizationCostModel::CM_GatherScatter); @@ -4100,15 +3974,12 @@ void InnerLoopVectorizer::fixCrossIterationPHIs() { // the currently empty PHI nodes. At this point every instruction in the // original loop is widened to a vector form so we can use them to construct // the incoming edges. - for (Instruction &I : *OrigLoop->getHeader()) { - PHINode *Phi = dyn_cast(&I); - if (!Phi) - break; + for (PHINode &Phi : OrigLoop->getHeader()->phis()) { // Handle first-order recurrences and reductions that need to be fixed. - if (Legal->isFirstOrderRecurrence(Phi)) - fixFirstOrderRecurrence(Phi); - else if (Legal->isReductionVariable(Phi)) - fixReduction(Phi); + if (Legal->isFirstOrderRecurrence(&Phi)) + fixFirstOrderRecurrence(&Phi); + else if (Legal->isReductionVariable(&Phi)) + fixReduction(&Phi); } } @@ -4273,12 +4144,9 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // vector recurrence we extracted in the middle block. Since the loop is in // LCSSA form, we just need to find the phi node for the original scalar // recurrence in the exit block, and then add an edge for the middle block. - for (auto &I : *LoopExitBlock) { - auto *LCSSAPhi = dyn_cast(&I); - if (!LCSSAPhi) - break; - if (LCSSAPhi->getIncomingValue(0) == Phi) { - LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); + for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { + if (LCSSAPhi.getIncomingValue(0) == Phi) { + LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); break; } } @@ -4435,21 +4303,15 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // inside and outside of the scalar remainder loop. // We know that the loop is in LCSSA form. We need to update the // PHI nodes in the exit blocks. - for (BasicBlock::iterator LEI = LoopExitBlock->begin(), - LEE = LoopExitBlock->end(); - LEI != LEE; ++LEI) { - PHINode *LCSSAPhi = dyn_cast(LEI); - if (!LCSSAPhi) - break; - + for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { // All PHINodes need to have a single entry edge, or two if // we already fixed them. - assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); + assert(LCSSAPhi.getNumIncomingValues() < 3 && "Invalid LCSSA PHI"); // We found a reduction value exit-PHI. Update it with the // incoming bypass edge. - if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) - LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); + if (LCSSAPhi.getIncomingValue(0) == LoopExitInst) + LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock); } // end of the LCSSA phi scan. // Fix the scalar loop reduction variable with the incoming reduction sum @@ -4464,14 +4326,11 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { } void InnerLoopVectorizer::fixLCSSAPHIs() { - for (Instruction &LEI : *LoopExitBlock) { - auto *LCSSAPhi = dyn_cast(&LEI); - if (!LCSSAPhi) - break; - if (LCSSAPhi->getNumIncomingValues() == 1) { - assert(OrigLoop->isLoopInvariant(LCSSAPhi->getIncomingValue(0)) && + for (PHINode &LCSSAPhi : LoopExitBlock->phis()) { + if (LCSSAPhi.getNumIncomingValues() == 1) { + assert(OrigLoop->isLoopInvariant(LCSSAPhi.getIncomingValue(0)) && "Incoming value isn't loop invariant"); - LCSSAPhi->addIncoming(LCSSAPhi->getIncomingValue(0), LoopMiddleBlock); + LCSSAPhi.addIncoming(LCSSAPhi.getIncomingValue(0), LoopMiddleBlock); } } } @@ -4917,11 +4776,8 @@ void InnerLoopVectorizer::updateAnalysis() { /// Phi nodes with constant expressions that can trap are not safe to if /// convert. static bool canIfConvertPHINodes(BasicBlock *BB) { - for (Instruction &I : *BB) { - auto *Phi = dyn_cast(&I); - if (!Phi) - return true; - for (Value *V : Phi->incoming_values()) + for (PHINode &Phi : BB->phis()) { + for (Value *V : Phi.incoming_values()) if (auto *C = dyn_cast(V)) if (C->canTrap()) return false; @@ -4985,13 +4841,13 @@ bool LoopVectorizationLegality::canVectorize() { bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); - if (DoExtraAnalysis) // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { + DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - if (DoExtraAnalysis) + if (DoExtraAnalysis) Result = false; else return false; @@ -5155,6 +5011,15 @@ void LoopVectorizationLegality::addInductionPhi( PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit) { Inductions[Phi] = ID; + + // In case this induction also comes with casts that we know we can ignore + // in the vectorized loop body, record them here. All casts could be recorded + // here for ignoring, but suffices to record only the first (as it is the + // only one that may bw used outside the cast sequence). + const SmallVectorImpl &Casts = ID.getCastInsts(); + if (!Casts.empty()) + InductionCastsToIgnore.insert(*Casts.begin()); + Type *PhiTy = Phi->getType(); const DataLayout &DL = Phi->getModule()->getDataLayout(); @@ -5642,6 +5507,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { "Widening decision should be ready at this moment"); return (WideningDecision == CM_Widen || + WideningDecision == CM_Widen_Reverse || WideningDecision == CM_Interleave); }; // Iterate over the instructions in the loop, and collect all @@ -5784,7 +5650,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { return true; } -bool LoopVectorizationLegality::isInductionVariable(const Value *V) { +bool LoopVectorizationLegality::isInductionPhi(const Value *V) { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); if (!PN) @@ -5793,6 +5659,15 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) { return Inductions.count(PN); } +bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { + auto *Inst = dyn_cast(V); + return (Inst && InductionCastsToIgnore.count(Inst)); +} + +bool LoopVectorizationLegality::isInductionVariable(const Value *V) { + return isInductionPhi(V) || isCastedInductionVariable(V); +} + bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } @@ -6290,7 +6165,7 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, return MaxVF; } -LoopVectorizationCostModel::VectorizationFactor +VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { float Cost = expectedCost(1).first; #ifndef NDEBUG @@ -6863,7 +6738,8 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { continue; // Skip ignored values. - if (ValuesToIgnore.count(&I)) + if (ValuesToIgnore.count(&I) || + (VF > 1 && VecValuesToIgnore.count(&I))) continue; VectorizationCostTy C = getInstructionCost(&I, VF); @@ -6902,14 +6778,16 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { static const SCEV *getAddressAccessSCEV( Value *Ptr, LoopVectorizationLegality *Legal, - ScalarEvolution *SE, + PredicatedScalarEvolution &PSE, const Loop *TheLoop) { + auto *Gep = dyn_cast(Ptr); if (!Gep) return nullptr; // We are looking for a gep with all loop invariant indices except for one // which should be an induction variable. + auto SE = PSE.getSE(); unsigned NumOperands = Gep->getNumOperands(); for (unsigned i = 1; i < NumOperands; ++i) { Value *Opd = Gep->getOperand(i); @@ -6919,7 +6797,7 @@ static const SCEV *getAddressAccessSCEV( } // Now we know we have a GEP ptr, %inv, %ind, %inv. return the Ptr SCEV. - return SE->getSCEV(Ptr); + return PSE.getSCEV(Ptr); } static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) { @@ -6939,7 +6817,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Figure out whether the access is strided and get the stride value // if it's known in compile time - const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop); + const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop); // Get the cost of the scalar memory instruction and address computation. unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV); @@ -7099,7 +6977,12 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { // We assume that widening is the best solution when possible. if (Legal->memoryInstructionCanBeWidened(&I, VF)) { unsigned Cost = getConsecutiveMemOpCost(&I, VF); - setWideningDecision(&I, VF, CM_Widen, Cost); + int ConsecutiveStride = Legal->isConsecutivePtr(getPointerOperand(&I)); + assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && + "Expected consecutive stride."); + InstWidening Decision = + ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse; + setWideningDecision(&I, VF, Decision, Cost); continue; } @@ -7189,7 +7072,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { // by cost functions, but since this involves the task of finding out // if the loaded register is involved in an address computation, it is // instead changed here when we know this is the case. - if (getWideningDecision(I, VF) == CM_Widen) + InstWidening Decision = getWideningDecision(I, VF); + if (Decision == CM_Widen || Decision == CM_Widen_Reverse) // Scalarize a widened load of address. setWideningDecision(I, VF, CM_Scalarize, (VF * getMemoryInstructionCost(I, 1))); @@ -7493,13 +7377,19 @@ void LoopVectorizationCostModel::collectValuesToIgnore() { SmallPtrSetImpl &Casts = RedDes.getCastInsts(); VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } + // Ignore type-casting instructions we identified during induction + // detection. + for (auto &Induction : *Legal->getInductionVars()) { + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + VecValuesToIgnore.insert(Casts.begin(), Casts.end()); + } } -LoopVectorizationCostModel::VectorizationFactor +VectorizationFactor LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { // Width 1 means no vectorize, cost 0 means uncomputed cost. - const LoopVectorizationCostModel::VectorizationFactor NoVectorization = {1U, - 0U}; + const VectorizationFactor NoVectorization = {1U, 0U}; Optional MaybeMaxVF = CM.computeMaxVF(OptForSize); if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize. return NoVectorization; @@ -7598,6 +7488,18 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( return U == Ind || DeadInstructions.count(cast(U)); })) DeadInstructions.insert(IndUpdate); + + // We record as "Dead" also the type-casting instructions we had identified + // during induction analysis. We don't need any handling for them in the + // vectorized loop because we have proven that, under a proper runtime + // test guarding the vectorized loop, the value of the phi, and the casted + // value of the phi, are the same. The last instruction in this casting chain + // will get its scalar/vector/widened def from the scalar/vector/widened def + // of the respective phi node. Any other casts in the induction def-use chain + // have no other uses outside the phi update chain, and will be ignored. + InductionDescriptor &IndDes = Induction.second; + const SmallVectorImpl &Casts = IndDes.getCastInsts(); + DeadInstructions.insert(Casts.begin(), Casts.end()); } } @@ -7656,391 +7558,6 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) { } } -namespace { - -/// VPWidenRecipe is a recipe for producing a copy of vector type for each -/// Instruction in its ingredients independently, in order. This recipe covers -/// most of the traditional vectorization cases where each ingredient transforms -/// into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeBase { -private: - /// Hold the ingredients by pointing to their original BasicBlock location. - BasicBlock::iterator Begin; - BasicBlock::iterator End; - -public: - VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC) { - End = I->getIterator(); - Begin = End++; - } - - ~VPWidenRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; - } - - /// Produce widened copies of all Ingredients. - void execute(VPTransformState &State) override { - for (auto &Instr : make_range(Begin, End)) - State.ILV->widenInstruction(Instr); - } - - /// Augment the recipe to include Instr, if it lies at its End. - bool appendInstruction(Instruction *Instr) { - if (End != Instr->getIterator()) - return false; - End++; - return true; - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"WIDEN\\l\""; - for (auto &Instr : make_range(Begin, End)) - O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\""; - } -}; - -/// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector and scalar values. -class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { -private: - PHINode *IV; - TruncInst *Trunc; - -public: - VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) - : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} - ~VPWidenIntOrFpInductionRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; - } - - /// Generate the vectorized and scalarized versions of the phi node as - /// needed by their users. - void execute(VPTransformState &State) override { - assert(!State.Instance && "Int or FP induction being replicated."); - State.ILV->widenIntOrFpInduction(IV, Trunc); - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"WIDEN-INDUCTION"; - if (Trunc) { - O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc) << "\\l\""; - } else - O << " " << VPlanIngredient(IV) << "\\l\""; - } -}; - -/// A recipe for handling all phi nodes except for integer and FP inductions. -class VPWidenPHIRecipe : public VPRecipeBase { -private: - PHINode *Phi; - -public: - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} - ~VPWidenPHIRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; - } - - /// Generate the phi/select nodes. - void execute(VPTransformState &State) override { - State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; - } -}; - -/// A recipe for vectorizing a phi-node as a sequence of mask-based select -/// instructions. -class VPBlendRecipe : public VPRecipeBase { -private: - PHINode *Phi; - - /// The blend operation is a User of a mask, if not null. - std::unique_ptr User; - -public: - VPBlendRecipe(PHINode *Phi, ArrayRef Masks) - : VPRecipeBase(VPBlendSC), Phi(Phi) { - assert((Phi->getNumIncomingValues() == 1 || - Phi->getNumIncomingValues() == Masks.size()) && - "Expected the same number of incoming values and masks"); - if (!Masks.empty()) - User.reset(new VPUser(Masks)); - } - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; - } - - /// Generate the phi/select nodes. - void execute(VPTransformState &State) override { - State.ILV->setDebugLocFromInst(State.Builder, Phi); - // We know that all PHIs in non-header blocks are converted into - // selects, so we don't have to worry about the insertion order and we - // can just use the builder. - // At this point we generate the predication tree. There may be - // duplications since this is a simple recursive scan, but future - // optimizations will clean it up. - - unsigned NumIncoming = Phi->getNumIncomingValues(); - - assert((User || NumIncoming == 1) && - "Multiple predecessors with predecessors having a full mask"); - // Generate a sequence of selects of the form: - // SELECT(Mask3, In3, - // SELECT(Mask2, In2, - // ( ...))) - InnerLoopVectorizer::VectorParts Entry(State.UF); - for (unsigned In = 0; In < NumIncoming; ++In) { - for (unsigned Part = 0; Part < State.UF; ++Part) { - // We might have single edge PHIs (blocks) - use an identity - // 'select' for the first PHI operand. - Value *In0 = - State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part); - if (In == 0) - Entry[Part] = In0; // Initialize with the first incoming value. - else { - // Select between the current value and the previous incoming edge - // based on the incoming mask. - Value *Cond = State.get(User->getOperand(In), Part); - Entry[Part] = - State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); - } - } - } - for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"BLEND "; - Phi->printAsOperand(O, false); - O << " ="; - if (!User) { - // Not a User of any mask: not really blending, this is a - // single-predecessor phi. - O << " "; - Phi->getIncomingValue(0)->printAsOperand(O, false); - } else { - for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { - O << " "; - Phi->getIncomingValue(I)->printAsOperand(O, false); - O << "/"; - User->getOperand(I)->printAsOperand(O); - } - } - O << "\\l\""; - } -}; - -/// VPInterleaveRecipe is a recipe for transforming an interleave group of load -/// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase { -private: - const InterleaveGroup *IG; - -public: - VPInterleaveRecipe(const InterleaveGroup *IG) - : VPRecipeBase(VPInterleaveSC), IG(IG) {} - ~VPInterleaveRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; - } - - /// Generate the wide load or store, and shuffles. - void execute(VPTransformState &State) override { - assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override; - - const InterleaveGroup *getInterleaveGroup() { return IG; } -}; - -/// VPReplicateRecipe replicates a given instruction producing multiple scalar -/// copies of the original scalar type, one per lane, instead of producing a -/// single copy of widened type for all lanes. If the instruction is known to be -/// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeBase { -private: - /// The instruction being replicated. - Instruction *Ingredient; - - /// Indicator if only a single replica per lane is needed. - bool IsUniform; - - /// Indicator if the replicas are also predicated. - bool IsPredicated; - - /// Indicator if the scalar values should also be packed into a vector. - bool AlsoPack; - -public: - VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform), - IsPredicated(IsPredicated) { - // Retain the previous behavior of predicateInstructions(), where an - // insert-element of a predicated instruction got hoisted into the - // predicated basic block iff it was its only user. This is achieved by - // having predicated instructions also pack their values into a vector by - // default unless they have a replicated user which uses their scalar value. - AlsoPack = IsPredicated && !I->use_empty(); - } - - ~VPReplicateRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; - } - - /// Generate replicas of the desired Ingredient. Replicas will be generated - /// for all parts and lanes unless a specific part and lane are specified in - /// the \p State. - void execute(VPTransformState &State) override; - - void setAlsoPack(bool Pack) { AlsoPack = Pack; } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" - << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") - << VPlanIngredient(Ingredient); - if (AlsoPack) - O << " (S->V)"; - O << "\\l\""; - } -}; - -/// A recipe for generating conditional branches on the bits of a mask. -class VPBranchOnMaskRecipe : public VPRecipeBase { -private: - std::unique_ptr User; - -public: - VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { - if (BlockInMask) // nullptr means all-one mask. - User.reset(new VPUser({BlockInMask})); - } - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; - } - - /// Generate the extraction of the appropriate bit from the block mask and the - /// conditional branch. - void execute(VPTransformState &State) override; - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"BRANCH-ON-MASK "; - if (User) - O << *User->getOperand(0); - else - O << " All-One"; - O << "\\l\""; - } -}; - -/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when -/// control converges back from a Branch-on-Mask. The phi nodes are needed in -/// order to merge values that are set under such a branch and feed their uses. -/// The phi nodes can be scalar or vector depending on the users of the value. -/// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase { -private: - Instruction *PredInst; - -public: - /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi - /// nodes after merging back from a Branch-on-Mask. - VPPredInstPHIRecipe(Instruction *PredInst) - : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} - ~VPPredInstPHIRecipe() override = default; - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; - } - - /// Generates phi nodes for live-outs as needed to retain SSA form. - void execute(VPTransformState &State) override; - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" - << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) - << "\\l\""; - } -}; - -/// A Recipe for widening load/store operations. -/// TODO: We currently execute only per-part unless a specific instance is -/// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase { -private: - Instruction &Instr; - std::unique_ptr User; - -public: - VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) { - if (Mask) // Create a VPInstruction to register as a user of the mask. - User.reset(new VPUser({Mask})); - } - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; - } - - /// Generate the wide load/store. - void execute(VPTransformState &State) override { - if (!User) - return State.ILV->vectorizeMemoryInstruction(&Instr); - - // Last (and currently only) operand is a mask. - InnerLoopVectorizer::VectorParts MaskValues(State.UF); - VPValue *Mask = User->getOperand(User->getNumOperands() - 1); - for (unsigned Part = 0; Part < State.UF; ++Part) - MaskValues[Part] = State.get(Mask, Part); - State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); - } - - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent) const override { - O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); - if (User) { - O << ", "; - User->getOperand(0)->printAsOperand(O); - } - O << "\\l\""; - } -}; -} // end anonymous namespace - bool LoopVectorizationPlanner::getDecisionAndClampRange( const std::function &Predicate, VFRange &Range) { assert(Range.End > Range.Start && "Trying to test an empty VF range."); @@ -8593,6 +8110,11 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range, return Plan; } +Value* LoopVectorizationPlanner::VPCallbackILV:: +getOrCreateVectorValues(Value *V, unsigned Part) { + return ILV.getOrCreateVectorValue(V, Part); +} + void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { O << " +\n" << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at "; @@ -8604,6 +8126,64 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { << Indent << "\" " << VPlanIngredient(I) << " " << i << "\\l\""; } +void VPWidenRecipe::execute(VPTransformState &State) { + for (auto &Instr : make_range(Begin, End)) + State.ILV->widenInstruction(Instr); +} + +void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "Int or FP induction being replicated."); + State.ILV->widenIntOrFpInduction(IV, Trunc); +} + +void VPWidenPHIRecipe::execute(VPTransformState &State) { + State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); +} + +void VPBlendRecipe::execute(VPTransformState &State) { + State.ILV->setDebugLocFromInst(State.Builder, Phi); + // We know that all PHIs in non-header blocks are converted into + // selects, so we don't have to worry about the insertion order and we + // can just use the builder. + // At this point we generate the predication tree. There may be + // duplications since this is a simple recursive scan, but future + // optimizations will clean it up. + + unsigned NumIncoming = Phi->getNumIncomingValues(); + + assert((User || NumIncoming == 1) && + "Multiple predecessors with predecessors having a full mask"); + // Generate a sequence of selects of the form: + // SELECT(Mask3, In3, + // SELECT(Mask2, In2, + // ( ...))) + InnerLoopVectorizer::VectorParts Entry(State.UF); + for (unsigned In = 0; In < NumIncoming; ++In) { + for (unsigned Part = 0; Part < State.UF; ++Part) { + // We might have single edge PHIs (blocks) - use an identity + // 'select' for the first PHI operand. + Value *In0 = + State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part); + if (In == 0) + Entry[Part] = In0; // Initialize with the first incoming value. + else { + // Select between the current value and the previous incoming edge + // based on the incoming mask. + Value *Cond = State.get(User->getOperand(In), Part); + Entry[Part] = + State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); + } + } + } + for (unsigned Part = 0; Part < State.UF; ++Part) + State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); +} + +void VPInterleaveRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "Interleave group being replicated."); + State.ILV->vectorizeInterleaveGroup(IG->getInsertPos()); +} + void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. State.ILV->scalarizeInstruction(Ingredient, *State.Instance, IsPredicated); @@ -8687,6 +8267,18 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { } } +void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { + if (!User) + return State.ILV->vectorizeMemoryInstruction(&Instr); + + // Last (and currently only) operand is a mask. + InnerLoopVectorizer::VectorParts MaskValues(State.UF); + VPValue *Mask = User->getOperand(User->getNumOperands() - 1); + for (unsigned Part = 0; Part < State.UF; ++Part) + MaskValues[Part] = State.get(Mask, Part); + State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues); +} + bool LoopVectorizePass::processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -8811,8 +8403,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { unsigned UserVF = Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. - LoopVectorizationCostModel::VectorizationFactor VF = - LVP.plan(OptForSize, UserVF); + VectorizationFactor VF = LVP.plan(OptForSize, UserVF); // Select the interleave count. unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index d30c1063c0d3..f748ba4b31b4 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -585,8 +585,7 @@ class BoUpSLP { ScalarToTreeEntry.clear(); MustGather.clear(); ExternalUses.clear(); - NumLoadsWantToKeepOrder = 0; - NumLoadsWantToChangeOrder = 0; + NumOpsWantToKeepOrder.clear(); for (auto &Iter : BlocksSchedules) { BlockScheduling *BS = Iter.second.get(); BS->clear(); @@ -597,11 +596,16 @@ class BoUpSLP { unsigned getTreeSize() const { return VectorizableTree.size(); } /// \brief Perform LICM and CSE on the newly generated gather sequences. - void optimizeGatherSequence(Function &F); + void optimizeGatherSequence(); /// \returns true if it is beneficial to reverse the vector order. bool shouldReorder() const { - return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder; + return std::accumulate( + NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(), 0, + [](int Val1, + const decltype(NumOpsWantToKeepOrder)::value_type &Val2) { + return Val1 + (Val2.second < 0 ? 1 : -1); + }) > 0; } /// \return The vector element size in bits to use when vectorizing the @@ -1201,11 +1205,10 @@ class BoUpSLP { /// List of users to ignore during scheduling and that don't need extracting. ArrayRef UserIgnoreList; - // Number of load bundles that contain consecutive loads. - int NumLoadsWantToKeepOrder = 0; - - // Number of load bundles that contain consecutive loads in reversed order. - int NumLoadsWantToChangeOrder = 0; + /// Number of operation bundles that contain consecutive operations - number + /// of operation bundles that contain consecutive operations in reversed + /// order. + DenseMap NumOpsWantToKeepOrder; // Analysis and block reference. Function *F; @@ -1347,7 +1350,6 @@ void BoUpSLP::buildTree(ArrayRef Roots, DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " << Lane << " from " << *Scalar << ".\n"); ExternalUses.emplace_back(Scalar, nullptr, Lane); - continue; } for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); @@ -1544,7 +1546,11 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, bool Reuse = canReuseExtract(VL, VL0); if (Reuse) { DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); + ++NumOpsWantToKeepOrder[S.Opcode]; } else { + SmallVector ReverseVL(VL.rbegin(), VL.rend()); + if (canReuseExtract(ReverseVL, VL0)) + --NumOpsWantToKeepOrder[S.Opcode]; BS.cancelScheduling(VL, VL0); } newTreeEntry(VL, Reuse, UserTreeIdx); @@ -1594,7 +1600,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } if (Consecutive) { - ++NumLoadsWantToKeepOrder; + ++NumOpsWantToKeepOrder[S.Opcode]; newTreeEntry(VL, true, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of loads.\n"); return; @@ -1613,7 +1619,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, newTreeEntry(VL, false, UserTreeIdx); if (ReverseConsecutive) { - ++NumLoadsWantToChangeOrder; + --NumOpsWantToKeepOrder[S.Opcode]; DEBUG(dbgs() << "SLP: Gathering reversed loads.\n"); } else { DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); @@ -2059,7 +2065,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { VL0->getType(), SrcTy, VL0); VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); - int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0); + int VecCost = 0; + // Check if the values are candidates to demote. + if (!MinBWs.count(VL0) || VecTy != SrcVecTy) + VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0); return VecCost - ScalarCost; } case Instruction::FCmp: @@ -3310,7 +3319,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { return VectorizableTree[0].VectorizedValue; } -void BoUpSLP::optimizeGatherSequence(Function &F) { +void BoUpSLP::optimizeGatherSequence() { DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. @@ -3344,16 +3353,30 @@ void BoUpSLP::optimizeGatherSequence(Function &F) { Insert->moveBefore(PreHeader->getTerminator()); } + // Make a list of all reachable blocks in our CSE queue. + SmallVector CSEWorkList; + CSEWorkList.reserve(CSEBlocks.size()); + for (BasicBlock *BB : CSEBlocks) + if (DomTreeNode *N = DT->getNode(BB)) { + assert(DT->isReachableFromEntry(N)); + CSEWorkList.push_back(N); + } + + // Sort blocks by domination. This ensures we visit a block after all blocks + // dominating it are visited. + std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), + [this](const DomTreeNode *A, const DomTreeNode *B) { + return DT->properlyDominates(A, B); + }); + // Perform O(N^2) search over the gather sequences and merge identical // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector Visited; - ReversePostOrderTraversal RPOT(&F); - for (auto BB : RPOT) { - // Traverse CSEBlocks by RPOT order. - if (!CSEBlocks.count(BB)) - continue; - + for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { + assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && + "Worklist not sorted properly!"); + BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; @@ -3905,6 +3928,7 @@ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl &Expr, // seed additional demotion, we save the truncated value. case Instruction::Trunc: Roots.push_back(I->getOperand(0)); + break; case Instruction::ZExt: case Instruction::SExt: break; @@ -3993,9 +4017,24 @@ void BoUpSLP::computeMinimumValueSizes() { // additional roots that require investigating in Roots. SmallVector ToDemote; SmallVector Roots; - for (auto *Root : TreeRoot) + for (auto *Root : TreeRoot) { + // Do not include top zext/sext/trunc operations to those to be demoted, it + // produces noise cast, trunc , exctract , cast + // sequence. + if (isa(Root)) + continue; + auto *I = dyn_cast(Root); + if (!I || !I->hasOneUse() || !Expr.count(I)) + return; + if (isa(I) || isa(I)) + continue; + if (auto *TI = dyn_cast(I)) { + Roots.push_back(TI->getOperand(0)); + continue; + } if (!collectValuesToDemote(Root, Expr, ToDemote, Roots)) return; + } // The maximum bit width required to represent all the values that can be // demoted without loss of precision. It would be safe to truncate the roots @@ -4221,7 +4260,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } if (Changed) { - R.optimizeGatherSequence(F); + R.optimizeGatherSequence(); DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); DEBUG(verifyFunction(F)); } @@ -4416,11 +4455,10 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { if (!A || !B) return false; Value *VL[] = { A, B }; - return tryToVectorizeList(VL, R, None, true); + return tryToVectorizeList(VL, R, true); } bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, - ArrayRef BuildVector, bool AllowReorder) { if (VL.size() < 2) return false; @@ -4515,11 +4553,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << "\n"); ArrayRef Ops = VL.slice(I, OpsWidth); - ArrayRef BuildVectorSlice; - if (!BuildVector.empty()) - BuildVectorSlice = BuildVector.slice(I, OpsWidth); - - R.buildTree(Ops, BuildVectorSlice); + R.buildTree(Ops); // TODO: check if we can allow reordering for more cases. if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to @@ -4527,7 +4561,6 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // reductions. However, at this point, we only expect to get here when // there are exactly two operations. assert(Ops.size() == 2); - assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; R.buildTree(ReorderedOps, None); } @@ -4547,31 +4580,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, << " and with tree size " << ore::NV("TreeSize", R.getTreeSize())); - Value *VectorizedRoot = R.vectorizeTree(); - - // Reconstruct the build vector by extracting the vectorized root. This - // way we handle the case where some elements of the vector are - // undefined. - // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) - if (!BuildVectorSlice.empty()) { - // The insert point is the last build vector instruction. The - // vectorized root will precede it. This guarantees that we get an - // instruction. The vectorized tree could have been constant folded. - Instruction *InsertAfter = cast(BuildVectorSlice.back()); - unsigned VecIdx = 0; - for (auto &V : BuildVectorSlice) { - IRBuilder Builder(InsertAfter->getParent(), - ++BasicBlock::iterator(InsertAfter)); - Instruction *I = cast(V); - assert(isa(I) || isa(I)); - Instruction *Extract = - cast(Builder.CreateExtractElement( - VectorizedRoot, Builder.getInt32(VecIdx++))); - I->setOperand(1, Extract); - I->moveAfter(Extract); - InsertAfter = I; - } - } + R.vectorizeTree(); // Move to the next bundle. I += VF - 1; NextInst = I + 1; @@ -5492,11 +5501,9 @@ class HorizontalReduction { /// /// Returns true if it matches static bool findBuildVector(InsertElementInst *LastInsertElem, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V = nullptr; do { - BuildVector.push_back(LastInsertElem); BuildVectorOpds.push_back(LastInsertElem->getOperand(1)); V = LastInsertElem->getOperand(0); if (isa(V)) @@ -5505,7 +5512,6 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, if (!LastInsertElem || !LastInsertElem->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5514,11 +5520,9 @@ static bool findBuildVector(InsertElementInst *LastInsertElem, /// /// \return true if it matches. static bool findBuildAggregate(InsertValueInst *IV, - SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { Value *V; do { - BuildVector.push_back(IV); BuildVectorOpds.push_back(IV->getInsertedValueOperand()); V = IV->getAggregateOperand(); if (isa(V)) @@ -5527,7 +5531,6 @@ static bool findBuildAggregate(InsertValueInst *IV, if (!IV || !IV->hasOneUse()) return false; } while (true); - std::reverse(BuildVector.begin(), BuildVector.end()); std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); return true; } @@ -5703,25 +5706,25 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, if (!R.canMapToVector(IVI->getType(), DL)) return false; - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildAggregate(IVI, BuildVector, BuildVectorOpds)) + if (!findBuildAggregate(IVI, BuildVectorOpds)) return false; DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); - return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false); + // Aggregate value is unlikely to be processed in vector register, we need to + // extract scalars into scalar registers, so NeedExtraction is set true. + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { - SmallVector BuildVector; SmallVector BuildVectorOpds; - if (!findBuildVector(IEI, BuildVector, BuildVectorOpds)) + if (!findBuildVector(IEI, BuildVectorOpds)) return false; // Vectorize starting with the build vector operands ignoring the BuildVector // instructions for the purpose of scheduling and user extraction. - return tryToVectorizeList(BuildVectorOpds, R, BuildVector); + return tryToVectorizeList(BuildVectorOpds, R); } bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, @@ -5799,8 +5802,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // is done when there are exactly two elements since tryToVectorizeList // asserts that there are only two values when AllowReorder is true. bool AllowReorder = NumElts == 2; - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, - None, AllowReorder)) { + if (NumElts > 1 && + tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 5bbe5edb3862..4e54fc6db2a5 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -489,3 +489,69 @@ void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { RSO.flush(); O << DOT::EscapeString(IngredientString); } + +void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN\\l\""; + for (auto &Instr : make_range(Begin, End)) + O << " +\n" << Indent << "\" " << VPlanIngredient(&Instr) << "\\l\""; +} + +void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, + const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN-INDUCTION"; + if (Trunc) { + O << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc) << "\\l\""; + } else + O << " " << VPlanIngredient(IV) << "\\l\""; +} + +void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN-PHI " << VPlanIngredient(Phi) << "\\l\""; +} + +void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" << Indent << "\"BLEND "; + Phi->printAsOperand(O, false); + O << " ="; + if (!User) { + // Not a User of any mask: not really blending, this is a + // single-predecessor phi. + O << " "; + Phi->getIncomingValue(0)->printAsOperand(O, false); + } else { + for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { + O << " "; + Phi->getIncomingValue(I)->printAsOperand(O, false); + O << "/"; + User->getOperand(I)->printAsOperand(O); + } + } + O << "\\l\""; +} + +void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" + << Indent << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") + << VPlanIngredient(Ingredient); + if (AlsoPack) + O << " (S->V)"; + O << "\\l\""; +} + +void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent) const { + O << " +\n" + << Indent << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst) + << "\\l\""; +} + +void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, + const Twine &Indent) const { + O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr); + if (User) { + O << ", "; + User->getOperand(0)->printAsOperand(O); + } + O << "\\l\""; +} diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index a39eda0c08c9..555a31fbb862 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -42,18 +42,14 @@ #include #include -// The (re)use of existing LoopVectorize classes is subject to future VPlan -// refactoring. -namespace { -class LoopVectorizationLegality; -class LoopVectorizationCostModel; -} // namespace - namespace llvm { +class LoopVectorizationLegality; +class LoopVectorizationCostModel; class BasicBlock; class DominatorTree; class InnerLoopVectorizer; +class InterleaveGroup; class LoopInfo; class raw_ostream; class Value; @@ -586,6 +582,280 @@ class VPInstruction : public VPUser, public VPRecipeBase { void print(raw_ostream &O) const; }; +/// VPWidenRecipe is a recipe for producing a copy of vector type for each +/// Instruction in its ingredients independently, in order. This recipe covers +/// most of the traditional vectorization cases where each ingredient transforms +/// into a vectorized version of itself. +class VPWidenRecipe : public VPRecipeBase { +private: + /// Hold the ingredients by pointing to their original BasicBlock location. + BasicBlock::iterator Begin; + BasicBlock::iterator End; + +public: + VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC) { + End = I->getIterator(); + Begin = End++; + } + + ~VPWidenRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; + } + + /// Produce widened copies of all Ingredients. + void execute(VPTransformState &State) override; + + /// Augment the recipe to include Instr, if it lies at its End. + bool appendInstruction(Instruction *Instr) { + if (End != Instr->getIterator()) + return false; + End++; + return true; + } + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// A recipe for handling phi nodes of integer and floating-point inductions, +/// producing their vector and scalar values. +class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { +private: + PHINode *IV; + TruncInst *Trunc; + +public: + VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) + : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} + ~VPWidenIntOrFpInductionRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; + } + + /// Generate the vectorized and scalarized versions of the phi node as + /// needed by their users. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// A recipe for handling all phi nodes except for integer and FP inductions. +class VPWidenPHIRecipe : public VPRecipeBase { +private: + PHINode *Phi; + +public: + VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} + ~VPWidenPHIRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; + } + + /// Generate the phi/select nodes. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// A recipe for vectorizing a phi-node as a sequence of mask-based select +/// instructions. +class VPBlendRecipe : public VPRecipeBase { +private: + PHINode *Phi; + + /// The blend operation is a User of a mask, if not null. + std::unique_ptr User; + +public: + VPBlendRecipe(PHINode *Phi, ArrayRef Masks) + : VPRecipeBase(VPBlendSC), Phi(Phi) { + assert((Phi->getNumIncomingValues() == 1 || + Phi->getNumIncomingValues() == Masks.size()) && + "Expected the same number of incoming values and masks"); + if (!Masks.empty()) + User.reset(new VPUser(Masks)); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; + } + + /// Generate the phi/select nodes. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// VPInterleaveRecipe is a recipe for transforming an interleave group of load +/// or stores into one wide load/store and shuffles. +class VPInterleaveRecipe : public VPRecipeBase { +private: + const InterleaveGroup *IG; + +public: + VPInterleaveRecipe(const InterleaveGroup *IG) + : VPRecipeBase(VPInterleaveSC), IG(IG) {} + ~VPInterleaveRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; + } + + /// Generate the wide load or store, and shuffles. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; + + const InterleaveGroup *getInterleaveGroup() { return IG; } +}; + +/// VPReplicateRecipe replicates a given instruction producing multiple scalar +/// copies of the original scalar type, one per lane, instead of producing a +/// single copy of widened type for all lanes. If the instruction is known to be +/// uniform only one copy, per lane zero, will be generated. +class VPReplicateRecipe : public VPRecipeBase { +private: + /// The instruction being replicated. + Instruction *Ingredient; + + /// Indicator if only a single replica per lane is needed. + bool IsUniform; + + /// Indicator if the replicas are also predicated. + bool IsPredicated; + + /// Indicator if the scalar values should also be packed into a vector. + bool AlsoPack; + +public: + VPReplicateRecipe(Instruction *I, bool IsUniform, bool IsPredicated = false) + : VPRecipeBase(VPReplicateSC), Ingredient(I), IsUniform(IsUniform), + IsPredicated(IsPredicated) { + // Retain the previous behavior of predicateInstructions(), where an + // insert-element of a predicated instruction got hoisted into the + // predicated basic block iff it was its only user. This is achieved by + // having predicated instructions also pack their values into a vector by + // default unless they have a replicated user which uses their scalar value. + AlsoPack = IsPredicated && !I->use_empty(); + } + + ~VPReplicateRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; + } + + /// Generate replicas of the desired Ingredient. Replicas will be generated + /// for all parts and lanes unless a specific part and lane are specified in + /// the \p State. + void execute(VPTransformState &State) override; + + void setAlsoPack(bool Pack) { AlsoPack = Pack; } + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// A recipe for generating conditional branches on the bits of a mask. +class VPBranchOnMaskRecipe : public VPRecipeBase { +private: + std::unique_ptr User; + +public: + VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { + if (BlockInMask) // nullptr means all-one mask. + User.reset(new VPUser({BlockInMask})); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; + } + + /// Generate the extraction of the appropriate bit from the block mask and the + /// conditional branch. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override { + O << " +\n" << Indent << "\"BRANCH-ON-MASK "; + if (User) + O << *User->getOperand(0); + else + O << " All-One"; + O << "\\l\""; + } +}; + +/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when +/// control converges back from a Branch-on-Mask. The phi nodes are needed in +/// order to merge values that are set under such a branch and feed their uses. +/// The phi nodes can be scalar or vector depending on the users of the value. +/// This recipe works in concert with VPBranchOnMaskRecipe. +class VPPredInstPHIRecipe : public VPRecipeBase { +private: + Instruction *PredInst; + +public: + /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi + /// nodes after merging back from a Branch-on-Mask. + VPPredInstPHIRecipe(Instruction *PredInst) + : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} + ~VPPredInstPHIRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; + } + + /// Generates phi nodes for live-outs as needed to retain SSA form. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + +/// A Recipe for widening load/store operations. +/// TODO: We currently execute only per-part unless a specific instance is +/// provided. +class VPWidenMemoryInstructionRecipe : public VPRecipeBase { +private: + Instruction &Instr; + std::unique_ptr User; + +public: + VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask) + : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) { + if (Mask) // Create a VPInstruction to register as a user of the mask. + User.reset(new VPUser({Mask})); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; + } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent) const override; +}; + /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It /// holds a sequence of zero or more VPRecipe's each representing a sequence of /// output IR instructions. diff --git a/lib/Transforms/Vectorize/VPlanBuilder.h b/lib/Transforms/Vectorize/VPlanBuilder.h deleted file mode 100644 index d6eb3397d044..000000000000 --- a/lib/Transforms/Vectorize/VPlanBuilder.h +++ /dev/null @@ -1,61 +0,0 @@ -//===- VPlanBuilder.h - A VPlan utility for constructing VPInstructions ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file provides a VPlan-based builder utility analogous to IRBuilder. -/// It provides an instruction-level API for generating VPInstructions while -/// abstracting away the Recipe manipulation details. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H -#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H - -#include "VPlan.h" - -namespace llvm { - -class VPBuilder { -private: - VPBasicBlock *BB = nullptr; - VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); - - VPInstruction *createInstruction(unsigned Opcode, - std::initializer_list Operands) { - VPInstruction *Instr = new VPInstruction(Opcode, Operands); - BB->insert(Instr, InsertPt); - return Instr; - } - -public: - VPBuilder() {} - - /// \brief This specifies that created VPInstructions should be appended to - /// the end of the specified block. - void setInsertPoint(VPBasicBlock *TheBB) { - assert(TheBB && "Attempting to set a null insert point"); - BB = TheBB; - InsertPt = BB->end(); - } - - VPValue *createNot(VPValue *Operand) { - return createInstruction(VPInstruction::Not, {Operand}); - } - - VPValue *createAnd(VPValue *LHS, VPValue *RHS) { - return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); - } - - VPValue *createOr(VPValue *LHS, VPValue *RHS) { - return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); - } -}; - -} // namespace llvm - -#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_BUILDER_H diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index fb2f509dcbaa..b04905bfc6fa 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -18,7 +18,6 @@ #include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index 9102efbdcb46..32617fd4ba62 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -11,7 +11,8 @@ foreach(entry ${entries}) (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/parallel-libs) AND - (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp)) + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp) AND + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests)) add_subdirectory(${entry}) endif() endif() @@ -39,3 +40,7 @@ endif() add_llvm_external_project(dragonegg) add_llvm_external_project(parallel-libs) add_llvm_external_project(openmp) + +if(LLVM_INCLUDE_TESTS) + add_llvm_external_project(debuginfo-tests) +endif() diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index b02c486322b0..c020b851bb97 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -209,6 +209,9 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) if(TARGET install-${component}) list(APPEND SUB_INSTALL_TARGETS install-${component}) endif() + if(TARGET install-${component}-stripped) + list(APPEND SUB_INSTALL_TARGETS install-${component}-stripped) + endif() endforeach() if(LLVM_RUNTIMES_TARGET) @@ -289,6 +292,7 @@ else() # if this is included from LLVM's CMake else() add_custom_target(builtins) add_custom_target(install-builtins) + add_custom_target(install-builtins-stripped) endif() foreach(target ${LLVM_BUILTIN_TARGETS}) @@ -296,6 +300,7 @@ else() # if this is included from LLVM's CMake add_dependencies(builtins builtins-${target}) add_dependencies(install-builtins install-builtins-${target}) + add_dependencies(install-builtins-stripped install-builtins-${target}-stripped) endforeach() endif() set(deps builtins) @@ -331,7 +336,8 @@ else() # if this is included from LLVM's CMake foreach(runtime_name ${runtime_names}) list(APPEND extra_targets ${runtime_name} - install-${runtime_name}) + install-${runtime_name} + install-${runtime_name}-stripped) if(LLVM_INCLUDE_TESTS) list(APPEND test_targets check-${runtime_name}) endif() @@ -348,6 +354,9 @@ else() # if this is included from LLVM's CMake CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR} + -DCMAKE_C_COMPILER_TARGET=${TARGET_TRIPLE} + -DCMAKE_CXX_COMPILER_TARGET=${TARGET_TRIPLE} + -DCMAKE_ASM_COMPILER_TARGET=${TARGET_TRIPLE} -DCMAKE_C_COMPILER_WORKS=ON -DCMAKE_CXX_COMPILER_WORKS=ON -DCMAKE_ASM_COMPILER_WORKS=ON @@ -377,7 +386,8 @@ else() # if this is included from LLVM's CMake foreach(runtime_name ${runtime_names}) list(APPEND ${name}_extra_targets "${runtime_name}:${runtime_name}-${name}" - "install-${runtime_name}:install-${runtime_name}-${name}") + "install-${runtime_name}:install-${runtime_name}-${name}" + "install-${runtime_name}-stripped:install-${runtime_name}-${name}-stripped") if(LLVM_INCLUDE_TESTS) list(APPEND ${name}_test_targets "check-${runtime_name}:check-${runtime_name}-${name}") endif() @@ -452,6 +462,7 @@ else() # if this is included from LLVM's CMake add_custom_target(runtimes) add_custom_target(runtimes-configure) add_custom_target(install-runtimes) + add_custom_target(install-runtimes-stripped) if(LLVM_INCLUDE_TESTS) add_custom_target(check-runtimes) add_custom_target(runtimes-test-depends) @@ -475,6 +486,7 @@ else() # if this is included from LLVM's CMake add_dependencies(runtimes runtimes-${name}) add_dependencies(runtimes-configure runtimes-${name}-configure) add_dependencies(install-runtimes install-runtimes-${name}) + add_dependencies(install-runtimes-stripped install-runtimes-${name}-stripped) if(LLVM_INCLUDE_TESTS) add_dependencies(check-runtimes check-runtimes-${name}) add_dependencies(runtimes-test-depends runtimes-test-depends-${name}) diff --git a/test/Analysis/AliasSet/memtransfer.ll b/test/Analysis/AliasSet/memtransfer.ll index 9f1ed63edf22..c1940fcd8cd7 100644 --- a/test/Analysis/AliasSet/memtransfer.ll +++ b/test/Analysis/AliasSet/memtransfer.ll @@ -14,7 +14,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -30,7 +30,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true) store i8 1, i8* %b, align 1 ret void } @@ -46,7 +46,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -62,7 +62,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true) store i8 1, i8* %b, align 1 ret void } @@ -76,7 +76,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -90,7 +90,7 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -104,11 +104,11 @@ entry: %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll index 05b56a07e44b..b31fb26f1c9b 100644 --- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -308,4 +308,9 @@ define void @caller_a(double* %arg_a0, ; CHECK-NEXT: 0 mod responses (0.0%) ; CHECK-NEXT: 0 ref responses (0.0%) ; CHECK-NEXT: 140 mod & ref responses (76.0%) -; CHECK-NEXT: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76% +; CHECK-NEXT: 0 must responses (0.0%) +; CHECK-NEXT: 0 must mod responses (0.0%) +; CHECK-NEXT: 0 must ref responses (0.0%) +; CHECK-NEXT: 0 must mod & ref responses (0.0%) +; CHECK-NEXT: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76%/0%/0%/0%/0% + diff --git a/test/Analysis/BasicAA/assume.ll b/test/Analysis/BasicAA/assume.ll index f9f5353a4528..49189dee0350 100644 --- a/test/Analysis/BasicAA/assume.ll +++ b/test/Analysis/BasicAA/assume.ll @@ -1,12 +1,12 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.assume(i1) #0 define void @test1(i8* %P, i8* %Q) nounwind ssp { tail call void @llvm.assume(i1 true) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test1: @@ -14,10 +14,10 @@ define void @test1(i8* %P, i8* %Q) nounwind ssp { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true) ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.assume(i1 true) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true) } attributes #0 = { nounwind } diff --git a/test/Analysis/BasicAA/call-attrs.ll b/test/Analysis/BasicAA/call-attrs.ll index 9cd17e486799..8538e8b4771d 100644 --- a/test/Analysis/BasicAA/call-attrs.ll +++ b/test/Analysis/BasicAA/call-attrs.ll @@ -31,12 +31,12 @@ entry: ret void } -; CHECK: Just Ref: Ptr: i8* %p <-> call void @readonly_attr(i8* %p) +; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> call void @readonly_attr(i8* %p) ; CHECK: Just Ref: Ptr: i8* %p <-> call void @readonly_func(i8* %p) -; CHECK: Just Mod: Ptr: i8* %p <-> call void @writeonly_attr(i8* %p) +; CHECK: Just Mod (MustAlias): Ptr: i8* %p <-> call void @writeonly_attr(i8* %p) ; CHECK: Just Mod: Ptr: i8* %p <-> call void @writeonly_func(i8* %p) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @readnone_attr(i8* %p) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @readnone_func(i8* %p) ; CHECK: Both ModRef: Ptr: i8* %p <-> call void @read_write(i8* %p, i8* %p, i8* %p) -; CHECK: Just Ref: Ptr: i8* %p <-> call void @func() [ "deopt"(i8* %p) ] +; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> call void @func() [ "deopt"(i8* %p) ] ; CHECK: Both ModRef: Ptr: i8* %p <-> call void @writeonly_attr(i8* %p) [ "deopt"(i8* %p) ] diff --git a/test/Analysis/BasicAA/cs-cs-arm.ll b/test/Analysis/BasicAA/cs-cs-arm.ll index 1580af9ea826..e4367bb6d61b 100644 --- a/test/Analysis/BasicAA/cs-cs-arm.ll +++ b/test/Analysis/BasicAA/cs-cs-arm.ll @@ -19,11 +19,11 @@ entry: ; CHECK-LABEL: Function: test1: ; CHECK: NoAlias: i8* %p, i8* %q -; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll index 3695275649b2..314aff849f93 100644 --- a/test/Analysis/BasicAA/cs-cs.ll +++ b/test/Analysis/BasicAA/cs-cs.ll @@ -2,48 +2,48 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #0 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0 declare void @a_readonly_func(i8*) #1 declare void @a_writeonly_func(i8*) #2 define void @test2(i8* %P, i8* %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2a(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2b(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2b: @@ -51,20 +51,20 @@ define void @test2b(i8* noalias %P, i8* noalias %Q) #3 { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2c(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2c: @@ -72,20 +72,20 @@ define void @test2c(i8* noalias %P, i8* noalias %Q) #3 { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2d(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2d: @@ -93,20 +93,20 @@ define void @test2d(i8* noalias %P, i8* noalias %Q) #3 { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2e(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2e: @@ -114,67 +114,67 @@ define void @test2e(i8* noalias %P, i8* noalias %Q) #3 { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test3(i8* %P, i8* %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test3a(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test4(i8* %P, i8* noalias %Q) #3 { - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test4: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) +; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) } define void @test5(i8* %P, i8* %Q, i8* %R) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test5: @@ -182,27 +182,47 @@ define void @test5(i8* %P, i8* %Q, i8* %R) #3 { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: MayAlias: i8* %P, i8* %R ; CHECK: MayAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +} + +define void @test5a(i8* noalias %P, i8* noalias %Q, i8* noalias %R) nounwind ssp { + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void + +; CHECK-LABEL: Function: test5a: + +; CHECK: NoAlias: i8* %P, i8* %Q +; CHECK: NoAlias: i8* %P, i8* %R +; CHECK: NoAlias: i8* %Q, i8* %R +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test6(i8* %P) #3 { - call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) call void @a_readonly_func(i8* %P) ret void ; CHECK-LABEL: Function: test6: -; CHECK: Just Mod: Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) ; CHECK: Just Ref: Ptr: i8* %P <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) <-> call void @a_readonly_func(i8* %P) +; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) } define void @test7(i8* %P) #3 { @@ -237,9 +257,9 @@ entry: ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() ; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_inaccessiblememonly_func() ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_inaccessibleorargmemonly_func(i8* %q) ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) <-> call void @an_argmemonly_func(i8* %q) @@ -254,12 +274,34 @@ entry: ; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @a_readonly_func(i8* %p) ; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @a_writeonly_func(i8* %q) ; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @an_inaccessiblememonly_func() -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @an_argmemonly_func(i8* %q) +; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) <-> call void @an_argmemonly_func(i8* %q) ; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @a_readonly_func(i8* %p) ; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @a_writeonly_func(i8* %q) ; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) <-> call void @an_inaccessiblememonly_func() -; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) <-> call void @an_inaccessibleorargmemonly_func(i8* %q) +; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) <-> call void @an_inaccessibleorargmemonly_func(i8* %q) +} + +;; test that MustAlias is set for calls when no MayAlias is found. +declare void @another_argmemonly_func(i8*, i8*) #0 +define void @test8a(i8* noalias %p, i8* noalias %q) { +entry: + call void @another_argmemonly_func(i8* %p, i8* %q) + ret void + +; CHECK-LABEL: Function: test8a +; CHECK: Both ModRef: Ptr: i8* %p <-> call void @another_argmemonly_func(i8* %p, i8* %q) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @another_argmemonly_func(i8* %p, i8* %q) } +define void @test8b(i8* %p, i8* %q) { +entry: + call void @another_argmemonly_func(i8* %p, i8* %q) + ret void + +; CHECK-LABEL: Function: test8b +; CHECK: Both ModRef: Ptr: i8* %p <-> call void @another_argmemonly_func(i8* %p, i8* %q) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @another_argmemonly_func(i8* %p, i8* %q) +} + ;; test that unknown operand bundle has unknown effect to the heap define void @test9(i8* %p) { @@ -310,9 +352,9 @@ entry: ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] ; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] ; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] @@ -321,10 +363,10 @@ entry: ; CHECK: NoModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] ; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] ; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] ; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] ; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] } attributes #0 = { argmemonly nounwind } diff --git a/test/Analysis/BasicAA/gep-and-alias.ll b/test/Analysis/BasicAA/gep-and-alias.ll index 4ec64305900d..e2e5811d2639 100644 --- a/test/Analysis/BasicAA/gep-and-alias.ll +++ b/test/Analysis/BasicAA/gep-and-alias.ll @@ -6,13 +6,13 @@ target triple = "i386-apple-macosx10.6.0" ; The load and store address in the loop body could alias so the load ; can't be hoisted above the store and out of the loop. -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) define i32 @foo(i32 %x, i32 %z, i32 %n) { entry: %pool = alloca [59 x i32], align 4 %tmp = bitcast [59 x i32]* %pool to i8* - call void @llvm.memset.p0i8.i32(i8* nonnull %tmp, i8 0, i32 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 nonnull %tmp, i8 0, i32 236, i1 false) %cmp3 = icmp eq i32 %n, 0 br i1 %cmp3, label %for.end, label %for.body.lr.ph diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll index f0f1a631d08d..755a9ccb23cc 100644 --- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -12,15 +12,15 @@ define void @test0() { ret void } -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @A = external global i8 @B = external global i8 define void @test1() { - call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) - call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) ret void } diff --git a/test/Analysis/BasicAA/guards.ll b/test/Analysis/BasicAA/guards.ll index e90328255252..c5d078346a22 100644 --- a/test/Analysis/BasicAA/guards.ll +++ b/test/Analysis/BasicAA/guards.ll @@ -1,23 +1,23 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.experimental.guard(i1, ...) declare void @unknown_but_readonly() readonly define void @test1(i8* %P, i8* %Q) { tail call void(i1,...) @llvm.experimental.guard(i1 true) [ "deopt"() ] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test1: ; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] } define void @test2() { diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index 71a3eac3a74e..a364a81a4e05 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -11,7 +11,7 @@ define i32 @test0(i8* %P) { store i32 0, i32* %A - call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i1 false) %B = load i32, i32* %A ret i32 %B @@ -27,7 +27,7 @@ define i8 @test1() { store i8 2, i8* %B ;; Not written to by memcpy - call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false) %C = load i8, i8* %B ret i8 %C @@ -38,7 +38,7 @@ define i8 @test2(i8* %P) { ; CHECK-LABEL: @test2 %P2 = getelementptr i8, i8* %P, i32 127 store i8 1, i8* %P2 ;; Not dead across memset - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK: ret i8 1 @@ -51,7 +51,7 @@ define i8 @test2a(i8* %P) { ;; FIXME: DSE isn't zapping this dead store. store i8 1, i8* %P2 ;; Dead, clobbered by memset. - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK-NOT: load @@ -91,7 +91,7 @@ define void @test3a(i8* %P, i8 %X) { define i32 @test4(i8* %P) { %tmp = load i32, i32* @G1 - call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -106,7 +106,7 @@ define i32 @test4(i8* %P) { ; write to G1. define i32 @test5(i8* %P, i32 %Len) { %tmp = load i32, i32* @G1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -227,7 +227,7 @@ define i32 @test13(i32* %P, i32* %P2) { ; CHECK: ret i32 0 } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/Analysis/BasicAA/pr35821.ll b/test/Analysis/BasicAA/pr35821.ll new file mode 100644 index 000000000000..ca840da679b6 --- /dev/null +++ b/test/Analysis/BasicAA/pr35821.ll @@ -0,0 +1,11 @@ +; RUN: opt %s -aa-eval -disable-output 2>&1 | FileCheck %s + +; CHECK: 6 Total Alias Queries Performed +; CHECK-NEXT: 6 no alias responses + +define void @patatino() { + %G26 = getelementptr i1, i1* undef, i1 undef + %B20 = shl i8 -128, 16 + %G47 = getelementptr i1*, i1** undef, i8 %B20 + ret void +} diff --git a/test/Analysis/BasicAA/pr35843.ll b/test/Analysis/BasicAA/pr35843.ll new file mode 100644 index 000000000000..2830e973dee6 --- /dev/null +++ b/test/Analysis/BasicAA/pr35843.ll @@ -0,0 +1,12 @@ +; RUN: opt %s -aa-eval -disable-output 2>&1 | FileCheck %s + +; CHECK: 6 Total Alias Queries Performed +; CHECK-NEXT: 6 no alias responses + +define void @patatino() { +BB: + %G22 = getelementptr i1*, i1** undef, i8 -1 + %B1 = mul i66 undef, 9223372036854775808 + %G45 = getelementptr i1**, i1*** undef, i66 %B1 + ret void +} diff --git a/test/Analysis/BlockFrequencyInfo/redundant_edges.ll b/test/Analysis/BlockFrequencyInfo/redundant_edges.ll new file mode 100644 index 000000000000..20ed1406c5af --- /dev/null +++ b/test/Analysis/BlockFrequencyInfo/redundant_edges.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s +; RUN: opt < %s -analyze -lazy-block-freq | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +define void @test1() { +; CHECK-LABEL: Printing analysis {{.*}} for function 'test1': +; CHECK-NEXT: block-frequency-info: test1 +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + br label %loop + +; CHECK-NEXT: loop: float = 32.0 +loop: + switch i32 undef, label %loop [ + i32 0, label %return + i32 1, label %return + ] + +; CHECK-NEXT: return: float = 1.0 +return: + ret void +} diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll index 69bfce779185..3d941039edb7 100644 --- a/test/Analysis/CallGraph/no-intrinsics.ll +++ b/test/Analysis/CallGraph/no-intrinsics.ll @@ -3,10 +3,10 @@ ; Check that intrinsics aren't added to the call graph -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @f(i8* %out, i8* %in) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %out, i8* align 4 %in, i32 100, i1 false) ret void } diff --git a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll index 16bc8a983e48..a09fd550715d 100644 --- a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll +++ b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll @@ -43,10 +43,10 @@ entry: %scevgep = getelementptr [6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i32 0, i32 %5, i32 %8 %9 = add i32 %f.promoted, %smax %10 = add i32 %9, 2 - call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false) -; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i1 false) ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1) ret i32 0 } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) diff --git a/test/Analysis/CostModel/X86/bitreverse.ll b/test/Analysis/CostModel/X86/bitreverse.ll index 9321b7323b57..fc395d7e095a 100644 --- a/test/Analysis/CostModel/X86/bitreverse.ll +++ b/test/Analysis/CostModel/X86/bitreverse.ll @@ -1,17 +1,17 @@ -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE2 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE42 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX2 -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512F -; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE2 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=SSE42 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX2 +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,avx512bw,avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X86 -check-prefix=AVX512 -check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=X64 -check-prefix=AVX512 -check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+xop,+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 ; Verify the cost of scalar bitreverse instructions. diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index 39e03c6c3b2f..1b33f5048f6a 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -8,11 +8,17 @@ target triple = "x86_64-apple-macosx10.8.0" define i32 @add(i32 %arg) { ; CHECK-LABEL: for function 'add' ; -- Same size registeres -- - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX512: cost of 12 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 1 {{.*}} zext %A = zext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 2 {{.*}} sext + ;CHECK-AVX512: cost of 12 {{.*}} sext + ;CHECK-AVX2: cost of 2 {{.*}} sext + ;CHECK-AVX: cost of 2 {{.*}} sext %B = sext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 0 {{.*}} trunc + ;CHECK-AVX512: cost of 0 {{.*}} trunc + ;CHECK-AVX2: cost of 0 {{.*}} trunc + ;CHECK-AVX: cost of 0 {{.*}} trunc %C = trunc <4 x i32> undef to <4 x i1> ; -- Different size registers -- diff --git a/test/Analysis/CostModel/X86/ctlz.ll b/test/Analysis/CostModel/X86/ctlz.ll index 769d73915e36..9e2d8a53697a 100644 --- a/test/Analysis/CostModel/X86/ctlz.ll +++ b/test/Analysis/CostModel/X86/ctlz.ll @@ -1,12 +1,10 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=+avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512CD +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq,+avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512CD ; Verify the cost of scalar leading zero count instructions. diff --git a/test/Analysis/CostModel/X86/ctpop.ll b/test/Analysis/CostModel/X86/ctpop.ll index e6a14e98e37a..691a231d5619 100644 --- a/test/Analysis/CostModel/X86/ctpop.ll +++ b/test/Analysis/CostModel/X86/ctpop.ll @@ -1,11 +1,9 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+popcnt,+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW -check-prefix=POPCNT ; Verify the cost of scalar population count instructions. diff --git a/test/Analysis/CostModel/X86/cttz.ll b/test/Analysis/CostModel/X86/cttz.ll index e7a39781385e..66b8bac4ff2e 100644 --- a/test/Analysis/CostModel/X86/cttz.ll +++ b/test/Analysis/CostModel/X86/cttz.ll @@ -1,11 +1,9 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512bw,+avx512dq -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW ; Verify the cost of scalar trailing zero count instructions. diff --git a/test/Analysis/CostModel/X86/fptosi.ll b/test/Analysis/CostModel/X86/fptosi.ll index d5e21f8685a7..dc0a041f94c6 100644 --- a/test/Analysis/CostModel/X86/fptosi.ll +++ b/test/Analysis/CostModel/X86/fptosi.ll @@ -224,7 +224,7 @@ define i32 @fptosi_float_i16(i32 %arg) { ; SSE42: cost of 7 {{.*}} %V16I16 = fptosi ; AVX1: cost of 3 {{.*}} %V16I16 = fptosi ; AVX2: cost of 3 {{.*}} %V16I16 = fptosi - ; AVX512: cost of 48 {{.*}} %V16I16 = fptosi + ; AVX512: cost of 1 {{.*}} %V16I16 = fptosi %V16I16 = fptosi <16 x float> undef to <16 x i16> ret i32 undef @@ -254,7 +254,7 @@ define i32 @fptosi_float_i8(i32 %arg) { ; SSE42: cost of 7 {{.*}} %V16I8 = fptosi ; AVX1: cost of 15 {{.*}} %V16I8 = fptosi ; AVX2: cost of 15 {{.*}} %V16I8 = fptosi - ; AVX512: cost of 48 {{.*}} %V16I8 = fptosi + ; AVX512: cost of 1 {{.*}} %V16I8 = fptosi %V16I8 = fptosi <16 x float> undef to <16 x i8> ret i32 undef diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll index 5b8652396b3a..6f44465315e6 100644 --- a/test/Analysis/DemandedBits/basic.ll +++ b/test/Analysis/DemandedBits/basic.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF for %1 = add nsw i32 %a, 5 -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 -; CHECK-DAG: DemandedBits: 0xFF for %2 = mul nsw i32 %1, %b +; CHECK-DAG: DemandedBits: 0xff for %1 = add nsw i32 %a, 5 +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff for %2 = mul nsw i32 %1, %b define i8 @test_mul(i32 %a, i32 %b) { %1 = add nsw i32 %a, 5 %2 = mul nsw i32 %1, %b diff --git a/test/Analysis/DemandedBits/intrinsics.ll b/test/Analysis/DemandedBits/intrinsics.ll index 5a6d17284a72..48f6d4624422 100644 --- a/test/Analysis/DemandedBits/intrinsics.ll +++ b/test/Analysis/DemandedBits/intrinsics.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bitreverse.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bitreverse.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bswap(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bswap.i32(i32 %1) @@ -12,9 +12,9 @@ define i8 @test_bswap(i32 %x) { } declare i32 @llvm.bswap.i32(i32) -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bswap.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bswap.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bitreverse(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bitreverse.i32(i32 %1) diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll index d6500cc03367..31bd5712af80 100644 --- a/test/Analysis/DependenceAnalysis/Preliminary.ll +++ b/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -696,4 +696,4 @@ while.end: ; preds = %while.end.loopexit, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/Analysis/GlobalsModRef/memset-escape.ll b/test/Analysis/GlobalsModRef/memset-escape.ll index b26f31389058..b3b902b3dfb3 100644 --- a/test/Analysis/GlobalsModRef/memset-escape.ll +++ b/test/Analysis/GlobalsModRef/memset-escape.ll @@ -22,7 +22,7 @@ entry: %c = alloca [1 x i32], align 4 store i32 0, i32* %retval, align 4 %0 = bitcast [1 x i32]* %c to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false) store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 store i32 0, i32* @b, align 4 br label %for.cond @@ -59,7 +59,7 @@ if.end: ; preds = %for.end } ; Function Attrs: nounwind argmemonly -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly ; Function Attrs: noreturn nounwind declare void @abort() noreturn nounwind diff --git a/test/Analysis/GlobalsModRef/no-escape.ll b/test/Analysis/GlobalsModRef/no-escape.ll index 752763c43478..a47ffeb537c5 100644 --- a/test/Analysis/GlobalsModRef/no-escape.ll +++ b/test/Analysis/GlobalsModRef/no-escape.ll @@ -59,7 +59,7 @@ for.end: ; preds = %for.cond } ; Function Attrs: nounwind argmemonly -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly ; Function Attrs: noreturn nounwind declare void @abort() noreturn nounwind diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll index 5cabd6f1f120..2aa270a6b6b6 100644 --- a/test/Analysis/GlobalsModRef/pr12351.ll +++ b/test/Analysis/GlobalsModRef/pr12351.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -basicaa -globals-aa -gvn -S -disable-verify | FileCheck %s -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @foo(i8* %x, i8* %y) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false); + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i1 false); ret void } diff --git a/test/Analysis/GlobalsModRef/pr35899-dbg-value.ll b/test/Analysis/GlobalsModRef/pr35899-dbg-value.ll new file mode 100644 index 000000000000..c0600d3fcdce --- /dev/null +++ b/test/Analysis/GlobalsModRef/pr35899-dbg-value.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -strip-debug -globals-aa -instcombine < %s | FileCheck %s +; RUN: opt -S -globals-aa -instcombine < %s | FileCheck %s + +; Having debug info around shouldn't affect what globals-aa and instcombine do. + +@g = global i8 0 + +define void @bar(i8 %p) { + call void @llvm.dbg.value(metadata i64 0, metadata !14, metadata !DIExpression()), !dbg !15 + ret void +} + +declare void @gaz(i8 %p) + +define void @foo() { + store i8 42, i8* @g, align 1 + call void @bar(i8 1) + %_tmp = load i8, i8* @g, align 1 + call void @gaz(i8 %_tmp) + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!5} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = !DIFile(filename: "foo.c", directory: "/tmp") +!1 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !2, line: 77, baseType: !3) +!2 = !DIFile(filename: "foo.h", directory: "/tmp") +!3 = !DIDerivedType(tag: DW_TAG_typedef, name: "__u64_t", file: !0, baseType: !4) +!4 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned) +!5 = distinct !DICompileUnit(language: DW_LANG_C, file: !0, producer: "My Compiler", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, retainedTypes: !6, globals: !7) +!6 = !{} +!7 = !{} +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !{!"My Compiler"} +!11 = distinct !DISubprogram(name: "func_5", scope: !0, file: !0, line: 117, type: !12, isLocal: true, isDefinition: true, scopeLine: 118, isOptimized: false, unit: !5, variables: !6) +!12 = !DISubroutineType(types: !13) +!13 = !{} +!14 = !DILocalVariable(name: "p_6", arg: 1, scope: !11, line: 117, type: !1) +!15 = !DILocation(line: 117, column: 34, scope: !11) + +; instcombine should realize that the load will read 42 from g and pass 42 to +; gaz regardless of the dbg.value in bar. + +; CHECK: define void @foo() { +; CHECK-NEXT: store i8 42, i8* @g, align 1 +; CHECK-NEXT: call void @bar(i8 1) +; CHECK-NEXT: call void @gaz(i8 42) +; CHECK-NEXT: ret void + diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll index 5dd47bca3a08..85d2e887e1cb 100644 --- a/test/Analysis/GlobalsModRef/volatile-instrs.ll +++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.8.0" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare i32 @printf(i8* nocapture, ...) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; Make sure that the initial memcpy call does not go away @@ -21,10 +21,10 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, define i32 @main() nounwind uwtable ssp { main_entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false) %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4 store i32 %0, i32* @c, align 4 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false) nounwind %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind ret i32 0 } diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index 41bb8c9c8201..27cd2263beaa 100644 --- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -19,10 +19,13 @@ entry: ; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined ; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined ; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400> +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400> ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401> +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401> ; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 ; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1> ; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 ; CHECK-NOT: loop loop: diff --git a/test/Analysis/LazyValueAnalysis/lvi-for-ashr.ll b/test/Analysis/LazyValueAnalysis/lvi-for-ashr.ll new file mode 100644 index 000000000000..cdc27e4d0b3c --- /dev/null +++ b/test/Analysis/LazyValueAnalysis/lvi-for-ashr.ll @@ -0,0 +1,27 @@ +; RUN: opt -correlated-propagation -S %s | FileCheck %s +; CHECK-LABEL: @test-ashr +; CHECK: bb_then +; CHECK: %. = select i1 true, i32 3, i32 2 +define i32 @test-ashr(i32 %c) { +chk65: + %cmp = icmp sgt i32 %c, 65 + br i1 %cmp, label %return, label %chk0 + +chk0: + %cmp1 = icmp slt i32 %c, 0 + br i1 %cmp, label %return, label %bb_if + +bb_if: + %ashr.val = ashr exact i32 %c, 2 + %cmp2 = icmp sgt i32 %ashr.val, 15 + br i1 %cmp2, label %bb_then, label %return + +bb_then: + %cmp3 = icmp eq i32 %ashr.val, 16 + %. = select i1 %cmp3, i32 3, i32 2 + br label %return + +return: + %retval = phi i32 [0, %chk65], [1, %chk0], [%., %bb_then], [4, %bb_if] + ret i32 %retval +} diff --git a/test/Analysis/Lint/noalias-byval.ll b/test/Analysis/Lint/noalias-byval.ll new file mode 100644 index 000000000000..76e2d03d29fa --- /dev/null +++ b/test/Analysis/Lint/noalias-byval.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -lint -disable-output 2>&1 | FileCheck %s + +%s = type { i8 } + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) #0 + +declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone) + +define void @f2() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false) + call void @f1(%s* sret %c, %s* %c) + ret void +} + +; Lint should complain about us passing %c to both arguments since one of them +; is noalias. +; CHECK: Unusual: noalias argument aliases another argument +; CHECK-NEXT: call void @f1(%s* sret %c, %s* %c) + +declare void @f3(%s* noalias nocapture sret, %s* byval nocapture readnone) + +define void @f4() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false) + call void @f3(%s* sret %c, %s* byval %c) + ret void +} + +; Lint should not complain about passing %c to both arguments even if one is +; noalias, since the other one is byval, effectively copying the data to the +; stack instead of passing the pointer itself. +; CHECK-NOT: Unusual: noalias argument aliases another argument +; CHECK-NOT: call void @f3(%s* sret %c, %s* %c) + +attributes #0 = { argmemonly nounwind } diff --git a/test/Analysis/MemorySSA/basicaa-memcpy.ll b/test/Analysis/MemorySSA/basicaa-memcpy.ll index bfd7c899b59a..28af1e5238c5 100644 --- a/test/Analysis/MemorySSA/basicaa-memcpy.ll +++ b/test/Analysis/MemorySSA/basicaa-memcpy.ll @@ -1,16 +1,16 @@ ; RUN: opt -disable-output -basicaa -print-memoryssa %s 2>&1 | FileCheck %s -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @source_clobber(i8* %a, i8* %b) { ; CHECK-LABEL: @source_clobber( ; CHECK-NEXT: ; 1 = MemoryDef(liveOnEntry) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) ; CHECK-NEXT: ; MemoryUse(liveOnEntry) ; CHECK-NEXT: [[X:%.*]] = load i8, i8* %b ; CHECK-NEXT: ret void ; - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) %x = load i8, i8* %b ret void } diff --git a/test/Analysis/MemorySSA/volatile-clobber.ll b/test/Analysis/MemorySSA/volatile-clobber.ll index d6f960f3e382..53df7de499bd 100644 --- a/test/Analysis/MemorySSA/volatile-clobber.ll +++ b/test/Analysis/MemorySSA/volatile-clobber.ll @@ -22,8 +22,7 @@ define i32 @foo() { ret i32 %4 } -; Ensuring that we don't automatically hoist nonvolatile loads around volatile -; loads +; Ensuring we allow hoisting nonvolatile loads around volatile loads. ; CHECK-LABEL define void @volatile_only define void @volatile_only(i32* %arg1, i32* %arg2) { ; Trivially NoAlias/MustAlias @@ -36,7 +35,7 @@ define void @volatile_only(i32* %arg1, i32* %arg2) { ; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: load i32, i32* %b load i32, i32* %b -; CHECK: MemoryUse(1) +; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: load i32, i32* %a load i32, i32* %a @@ -44,7 +43,7 @@ define void @volatile_only(i32* %arg1, i32* %arg2) { ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: load volatile i32, i32* %arg1 load volatile i32, i32* %arg1 -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: load i32, i32* %arg2 load i32, i32* %arg2 @@ -75,10 +74,10 @@ define void @volatile_atomics(i32* %arg1, i32* %arg2) { ; CHECK: MemoryUse(1) ; CHECK-NEXT: load atomic i32, i32* %b unordered, align 4 load atomic i32, i32* %b unordered, align 4 -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(1) ; CHECK-NEXT: load atomic i32, i32* %a unordered, align 4 load atomic i32, i32* %a unordered, align 4 -; CHECK: MemoryUse(2) +; CHECK: MemoryUse(1) ; CHECK-NEXT: load i32, i32* %a load i32, i32* %a @@ -86,7 +85,7 @@ define void @volatile_atomics(i32* %arg1, i32* %arg2) { ; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: load atomic volatile i32, i32* %arg1 monotonic, align 4 load atomic volatile i32, i32* %arg1 monotonic, align 4 -; CHECK: MemoryUse(3) +; CHECK: MemoryUse(1) ; CHECK-NEXT: load i32, i32* %arg2 load i32, i32* %arg2 diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index e6c62ee6b475..50c30431af58 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -172,7 +172,7 @@ bb23: ; preds = %bb24, %bb.nph %55 = mul i32 %y.21, %w ; [#uses=1] %.sum5 = add i32 %55, %.sum3 ; [#uses=1] %56 = getelementptr i8, i8* %j, i32 %.sum5 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i1 false) %57 = add i32 %y.21, 1 ; [#uses=2] br label %bb24 @@ -189,7 +189,7 @@ bb26: ; preds = %bb24.bb26_crit_edge, %bb22 %60 = getelementptr i8, i8* %j, i32 %.sum4 ; [#uses=1] %61 = mul i32 %x, %w ; [#uses=1] %62 = sdiv i32 %61, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i1 false) ret void bb29: ; preds = %bb20, %entry @@ -207,7 +207,7 @@ bb30: ; preds = %bb31, %bb.nph11 %67 = getelementptr i8, i8* %r, i32 %66 ; [#uses=1] %68 = mul i32 %y.310, %w ; [#uses=1] %69 = getelementptr i8, i8* %j, i32 %68 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i1 false) %70 = add i32 %y.310, 1 ; [#uses=2] br label %bb31 @@ -223,12 +223,12 @@ bb33: ; preds = %bb31.bb33_crit_edge, %bb29 %73 = getelementptr i8, i8* %j, i32 %72 ; [#uses=1] %74 = mul i32 %x, %w ; [#uses=1] %75 = sdiv i32 %74, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i1 false) ret void return: ; preds = %bb20 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/test/Analysis/ScalarEvolution/shift-op.ll b/test/Analysis/ScalarEvolution/shift-op.ll index fe832d567687..ae13b2879df8 100644 --- a/test/Analysis/ScalarEvolution/shift-op.ll +++ b/test/Analysis/ScalarEvolution/shift-op.ll @@ -160,5 +160,24 @@ define void @test8(i32 %init) { ret void } +define void @test9() { +; CHECK-LABEL: Determining loop execution counts for: @test9 +; CHECK: Loop %loop: Unpredictable max backedge-taken count. + +; This is an infinite loop, make sure that it recognized as such. + +entry: + br label %loop + +leave: + ret void + +loop: + %iv = phi i32 [ -20, %entry ], [ %iv.shift, %loop ] + %iv.shift = ashr i32 %iv, 1 + %exit.cond = icmp sgt i32 %iv, -1 + br i1 %exit.cond, label %leave, label %loop +} + !0 = !{i32 0, i32 50000} !1 = !{i32 -5000, i32 -1} diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll index d21ace9f2501..b5ff1c3d8a37 100644 --- a/test/Analysis/ScalarEvolution/trip-count.ll +++ b/test/Analysis/ScalarEvolution/trip-count.ll @@ -41,7 +41,7 @@ define i32 @test2() { entry: %bins = alloca [16 x i64], align 16 %0 = bitcast [16 x i64]* %bins to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 128, i1 false) br label %preheader preheader: ; preds = %for.inc.1, %entry @@ -88,7 +88,7 @@ for.inc.1: ; preds = %for.body.1, %for.in } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 declare void @may_exit() nounwind diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index cce0182d6493..df6637a4ced3 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -50,7 +50,7 @@ sha_update.exit.exitStub: ; preds = %bb3.i bb2.i: ; preds = %bb3.i %1 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3 %2 = bitcast [16 x i32]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i1 false) %3 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0 %4 = bitcast i32* %3 to i8* br label %codeRepl @@ -74,7 +74,7 @@ bb3.i: ; preds = %byte_reverse.exit.i declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/Analysis/ScalarEvolution/truncate.ll b/test/Analysis/ScalarEvolution/truncate.ll new file mode 100644 index 000000000000..e9bd39d7a268 --- /dev/null +++ b/test/Analysis/ScalarEvolution/truncate.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -passes='print' +; Regression test for assert ScalarEvolution::getTruncateExpr. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +define void @snork(i8* %arg, i8 %arg1, i64 %arg2) { +bb: + br label %bb12 + +bb3: ; preds = %bb34 + br i1 true, label %bb4, label %bb12 + +bb4: ; preds = %bb3 + br label %bb6 + +bb5: ; preds = %bb6 + ret void + +bb6: ; preds = %bb6, %bb4 + %tmp = phi i64 [ %tmp28, %bb4 ], [ %tmp10, %bb6 ] + %tmp7 = phi i32 [ 3, %bb4 ], [ %tmp11, %bb6 ] + %tmp8 = trunc i64 %tmp to i32 + %tmp9 = sdiv i32 %tmp8, %tmp7 + %tmp10 = add i64 %tmp, -1 + %tmp11 = add i32 %tmp9, %tmp7 + br i1 true, label %bb5, label %bb6 + +bb12: ; preds = %bb3, %bb + br label %bb13 + +bb13: ; preds = %bb34, %bb12 + %tmp14 = phi i64 [ %arg2, %bb12 ], [ %tmp28, %bb34 ] + %tmp15 = phi i8 [ %arg1, %bb12 ], [ %tmp26, %bb34 ] + %tmp16 = phi i32 [ 1, %bb12 ], [ %tmp35, %bb34 ] + %tmp17 = add i8 %tmp15, -1 + %tmp18 = sext i8 %tmp17 to i64 + %tmp19 = sub i64 1, %tmp14 + %tmp20 = add i64 %tmp19, %tmp18 + %tmp21 = trunc i64 %tmp20 to i32 + %tmp22 = icmp eq i32 %tmp21, 0 + br i1 %tmp22, label %bb32, label %bb23 + +bb23: ; preds = %bb13 + br i1 true, label %bb25, label %bb24 + +bb24: ; preds = %bb23 + br label %bb25 + +bb25: ; preds = %bb24, %bb23 + %tmp26 = add i8 %tmp15, -2 + %tmp27 = sext i8 %tmp26 to i64 + %tmp28 = sub i64 %tmp27, %tmp20 + %tmp29 = trunc i64 %tmp28 to i32 + %tmp30 = icmp eq i32 %tmp29, 0 + br i1 %tmp30, label %bb31, label %bb34 + +bb31: ; preds = %bb25 + br label %bb33 + +bb32: ; preds = %bb13 + br label %bb33 + +bb33: ; preds = %bb32, %bb31 + unreachable + +bb34: ; preds = %bb25 + %tmp35 = add nuw nsw i32 %tmp16, 2 + %tmp36 = icmp ugt i32 %tmp16, 52 + br i1 %tmp36, label %bb3, label %bb13 +} diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index ed091466165e..71f606c37f65 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -45,13 +45,13 @@ define void @test1_no(i32* %p) nounwind { ; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #4 { define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !1 ret void } ; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #3 { define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !2 ret void } @@ -70,7 +70,7 @@ define i32 @test3_no(i8* %p) nounwind { } declare void @callee(i32* %p) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind ; CHECK: attributes #0 = { norecurse nounwind readnone } ; CHECK: attributes #1 = { norecurse nounwind } diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll index 64e35788429b..ecc737c77e4a 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -6,17 +6,17 @@ target datalayout = "e-p:64:64:64" ; it has a TBAA tag which declares that it is unrelated. ; CHECK: @foo -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %p, i8* align 1 %q, i64 16, i1 false), !tbaa !0 ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]] ; CHECK-NEXT: ret void define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i1 false), !tbaa !2 store i8 2, i8* %s, align 1, !tbaa !1 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i1 false), !tbaa !2 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0} ; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll index 29c31b95b188..2e9453f670ce 100644 --- a/test/Analysis/ValueTracking/memory-dereferenceable.ll +++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll @@ -20,9 +20,12 @@ declare i32* @foo() @globalptr.align16 = external global i8, align 16 ; CHECK-LABEL: 'test' -define void @test(i32 addrspace(1)* dereferenceable(8) %dparam, +define void @test(%struct.A* sret %result, + i32 addrspace(1)* dereferenceable(8) %dparam, i8 addrspace(1)* dereferenceable(32) align 1 %dparam.align1, - i8 addrspace(1)* dereferenceable(32) align 16 %dparam.align16) + i8 addrspace(1)* dereferenceable(32) align 16 %dparam.align16, + i8* byval %i8_byval, + %struct.A* byval %A_byval) gc "statepoint-example" { ; CHECK: The following are dereferenceable: entry: @@ -34,6 +37,20 @@ entry: %alloca = alloca i1 %load2 = load i1, i1* %alloca + ; Load from empty array alloca +; CHECK-NOT: %empty_alloca + %empty_alloca = alloca i8, i64 0 + %empty_load = load i8, i8* %empty_alloca + + ; Loads from sret arguments +; CHECK: %sret_gep{{.*}}(aligned) + %sret_gep = getelementptr inbounds %struct.A, %struct.A* %result, i64 0, i32 1, i64 2 + load i8, i8* %sret_gep + +; CHECK-NOT: %sret_gep_outside + %sret_gep_outside = getelementptr %struct.A, %struct.A* %result, i64 0, i32 1, i64 7 + load i8, i8* %sret_gep_outside + ; CHECK: %dparam{{.*}}(aligned) %load3 = load i32, i32 addrspace(1)* %dparam @@ -94,6 +111,18 @@ entry: %load15 = load i8, i8 addrspace(1)* %dparam.align1, align 16 %load16 = load i8, i8 addrspace(1)* %dparam.align16, align 16 + ; Loads from byval arguments +; CHECK: %i8_byval{{.*}}(aligned) + %i8_byval_load = load i8, i8* %i8_byval + +; CHECK-NOT: %byval_cast + %byval_cast = bitcast i8* %i8_byval to i32* + %bad_byval_load = load i32, i32* %byval_cast + +; CHECK: %byval_gep{{.*}}(aligned) + %byval_gep = getelementptr inbounds %struct.A, %struct.A* %A_byval, i64 0, i32 1, i64 2 + load i8, i8* %byval_gep + ; Loads from aligned allocas ; CHECK: %alloca.align1{{.*}}(unaligned) ; CHECK: %alloca.align16{{.*}}(aligned) diff --git a/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/test/Analysis/ValueTracking/non-negative-phi-bits.ll new file mode 100755 index 000000000000..059bbaa3c4e7 --- /dev/null +++ b/test/Analysis/ValueTracking/non-negative-phi-bits.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine < %s -S | FileCheck %s + +define void @test() #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 40 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %exitcond = icmp slt i64 %indvars.iv.next, 40 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} diff --git a/test/Assembler/getelementptr_vec_ce.ll b/test/Assembler/getelementptr_vec_ce.ll index 4cf2964a57f7..67029698bfc5 100644 --- a/test/Assembler/getelementptr_vec_ce.ll +++ b/test/Assembler/getelementptr_vec_ce.ll @@ -3,7 +3,7 @@ @G = global [4 x i32] zeroinitializer ; CHECK-LABEL: @foo -; CHECK: ret <4 x i32*> getelementptr ([4 x i32], [4 x i32]* @G, <4 x i32> zeroinitializer, <4 x i32> ) +; CHECK: ret <4 x i32*> getelementptr inbounds ([4 x i32], [4 x i32]* @G, <4 x i32> zeroinitializer, <4 x i32> ) define <4 x i32*> @foo() { ret <4 x i32*> getelementptr ([4 x i32], [4 x i32]* @G, i32 0, <4 x i32> ) } diff --git a/test/Assembler/ifunc-dsolocal-daig.ll b/test/Assembler/ifunc-dsolocal.ll similarity index 50% rename from test/Assembler/ifunc-dsolocal-daig.ll rename to test/Assembler/ifunc-dsolocal.ll index 86e941d6cac1..63242cb3f24f 100644 --- a/test/Assembler/ifunc-dsolocal-daig.ll +++ b/test/Assembler/ifunc-dsolocal.ll @@ -1,7 +1,7 @@ -; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | FileCheck %s @foo = dso_local ifunc i32 (i32), i64 ()* @foo_ifunc -; CHECK: error: dso_local is invalid on ifunc +; CHECK: @foo = dso_local ifunc i32 (i32), i64 ()* @foo_ifunc define internal i64 @foo_ifunc() { entry: diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll index 18aa12c7af97..8c74b3fb36b4 100644 --- a/test/Bitcode/attributes.ll +++ b/test/Bitcode/attributes.ll @@ -204,7 +204,7 @@ define void @f34() ; CHECK: define void @f34() { call void @nobuiltin() nobuiltin -; CHECK: call void @nobuiltin() #34 +; CHECK: call void @nobuiltin() #35 ret void; } @@ -339,6 +339,12 @@ define void @f57() speculatable { ret void } +; CHECK: define void @f58() #34 +define void @f58() sanitize_hwaddress +{ + ret void; +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } @@ -373,4 +379,5 @@ define void @f57() speculatable { ; CHECK: attributes #31 = { allocsize(0,1) } ; CHECK: attributes #32 = { writeonly } ; CHECK: attributes #33 = { speculatable } -; CHECK: attributes #34 = { nobuiltin } +; CHECK: attributes #34 = { sanitize_hwaddress } +; CHECK: attributes #35 = { nobuiltin } diff --git a/test/Bitcode/dso_location.ll b/test/Bitcode/dso_location.ll index 4dc9fe24c198..3ad511bad430 100644 --- a/test/Bitcode/dso_location.ll +++ b/test/Bitcode/dso_location.ll @@ -15,11 +15,11 @@ @default_local_global = dso_local default global i32 0 ; CHECK: @default_local_global = dso_local global i32 0 -@hidden_local_global = dso_local hidden global i32 0 -; CHECK: @hidden_local_global = dso_local hidden global i32 0 +@hidden_local_global = hidden global i32 0 +; CHECK: @hidden_local_global = hidden global i32 0 -@protected_local_global = dso_local protected global i32 0 -; CHECK: @protected_local_global = dso_local protected global i32 0 +@protected_local_global = protected global i32 0 +; CHECK: @protected_local_global = protected global i32 0 @local_alias = dso_local alias i32, i32* @local_global ; CHECK-DAG: @local_alias = dso_local alias i32, i32* @local_global @@ -32,11 +32,11 @@ declare dso_local default void @default_local() ; CHECK: declare dso_local void @default_local() -declare dso_local hidden void @hidden_local() -; CHECK: declare dso_local hidden void @hidden_local() +declare hidden void @hidden_local() +; CHECK: declare hidden void @hidden_local() -define dso_local protected void @protected_local() { -; CHECK: define dso_local protected void @protected_local() +define protected void @protected_local() { +; CHECK: define protected void @protected_local() entry: ret void } diff --git a/test/Bitcode/standardCIntrinsic.3.2.ll b/test/Bitcode/standardCIntrinsic.3.2.ll index 09f2378a2217..d556682786d7 100644 --- a/test/Bitcode/standardCIntrinsic.3.2.ll +++ b/test/Bitcode/standardCIntrinsic.3.2.ll @@ -7,10 +7,10 @@ define void @memcpyintrinsic(i8* %dest, i8* %src, i32 %len) { entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %len, i1 true) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 true) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile) \ No newline at end of file +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) diff --git a/test/Bitcode/thinlto-function-summary-refgraph.ll b/test/Bitcode/thinlto-function-summary-refgraph.ll index 08dae47bbf75..83039ad89b93 100644 --- a/test/Bitcode/thinlto-function-summary-refgraph.ll +++ b/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -50,7 +50,7 @@ ; a reference to it when reached while earlier analyzing the phi using its ; return value: ; op0=Y op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: diff --git a/test/Bitcode/thinlto-summary-linkage-types.ll b/test/Bitcode/thinlto-summary-linkage-types.ll index e8fea12e40ba..e3e45181095f 100644 --- a/test/Bitcode/thinlto-summary-linkage-types.ll +++ b/test/Bitcode/thinlto-summary-linkage-types.ll @@ -5,8 +5,8 @@ ; RUN: llvm-bcanalyzer -dump %t2.thinlto.bc | FileCheck %s --check-prefix=COMBINED define private void @private() -; CHECK: &1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc -O0 -global-isel -global-isel-abort=1 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: llc -O0 -global-isel -global-isel-abort=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefix=FALLBACK ; RUN: llc -O0 -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o %t.out 2> %t.err ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-OUT < %t.out ; RUN: FileCheck %s --check-prefix=FALLBACK-WITH-REPORT-ERR < %t.err +; RUN: not llc -global-isel -mtriple aarch64_be %s -o - 2>&1 | FileCheck %s --check-prefix=BIG-ENDIAN ; This file checks that the fallback path to selection dag works. ; The test is fragile in the sense that it must be updated to expose ; something that fails with global-isel. @@ -12,6 +13,8 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" +; BIG-ENDIAN: unable to translate in big endian mode + ; We use __fixunstfti as the common denominator for __fixunstfti on Linux and ; ___fixunstfti on iOS ; ERROR: unable to lower arguments: i128 (i128)* (in function: ABIi128) @@ -43,7 +46,7 @@ define [1 x double] @constant() { ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %vreg6, %vreg2; mem:ST4[%addr] GPR:%vreg6,%vreg2 (in function: pending_phis) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %6:gpr(s32), %2:gpr(p0); mem:ST4[%addr] (in function: pending_phis) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { @@ -63,7 +66,7 @@ false: } ; General legalizer inability to handle types whose size wasn't a power of 2. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg1, %vreg0; mem:ST6[%addr](align=8) (in function: odd_type) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s42), %0:_(p0); mem:ST6[%addr](align=8) (in function: odd_type) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type: define void @odd_type(i42* %addr) { @@ -72,7 +75,7 @@ define void @odd_type(i42* %addr) { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg1, %vreg0; mem:ST28[%addr](align=32) (in function: odd_vector) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(<7 x s32>), %0:_(p0); mem:ST28[%addr](align=32) (in function: odd_vector) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: define void @odd_vector(<7 x i32>* %addr) { @@ -91,7 +94,7 @@ define i128 @sequence_sizes([8 x i8] %in) { } ; Just to make sure we don't accidentally emit a normal load/store. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %vreg2(s64) = G_LOAD %vreg0; mem:LD8[%addr] GPR:%vreg2,%vreg0 (in function: atomic_ops) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %2:gpr(s64) = G_LOAD %0:gpr(p0); mem:LD8[%addr] (in function: atomic_ops) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops ; FALLBACK-WITH-REPORT-LABEL: atomic_ops: define i64 @atomic_ops(i64* %addr) { @@ -132,14 +135,14 @@ continue: } ; Check that we fallback on invoke translation failures. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %vreg0(s128) = G_FCONSTANT quad 2 +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s128) = G_FCONSTANT fp128 0xL00000000000000004000000000000000 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_quad_dump ; FALLBACK-WITH-REPORT-OUT-LABEL: test_quad_dump: define fp128 @test_quad_dump() { ret fp128 0xL00000000000000004000000000000000 } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %vreg0(p0) = G_EXTRACT_VECTOR_ELT %vreg1, %vreg2; (in function: vector_of_pointers_extractelement) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(p0) = G_EXTRACT_VECTOR_ELT %1:_(<2 x p0>), %2:_(s32) (in function: vector_of_pointers_extractelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement: @var = global <2 x i16*> zeroinitializer @@ -156,7 +159,7 @@ end: br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg0, %vreg4; mem:ST16[undef] (in function: vector_of_pointers_insertelement) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0:_(<2 x p0>), %4:_(p0); mem:ST16[undef] (in function: vector_of_pointers_insertelement) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: define void @vector_of_pointers_insertelement() { @@ -172,7 +175,7 @@ end: br label %block } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg1, %vreg3; mem:ST12[undef](align=4) (in function: nonpow2_insertvalue_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1:_(s96), %3:_(p0); mem:ST12[undef](align=4) (in function: nonpow2_insertvalue_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing: %struct96 = type { float, float, float } @@ -182,7 +185,7 @@ define void @nonpow2_insertvalue_narrowing(float %a) { ret void } -; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %vreg3, %vreg4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %3, %4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing: define void @nonpow2_add_narrowing() { @@ -193,7 +196,7 @@ define void @nonpow2_add_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %vreg3, %vreg4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %3, %4; mem:ST12[undef](align=16) (in function: nonpow2_add_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing: define void @nonpow2_or_narrowing() { @@ -204,7 +207,7 @@ define void @nonpow2_or_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %vreg0, %vreg1; mem:ST12[undef](align=16) (in function: nonpow2_load_narrowing) +; FALLBACK-WITH-REPORT-ERR remark: :0:0: unable to legalize instruction: G_STORE %0, %1; mem:ST12[undef](align=16) (in function: nonpow2_load_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing: define void @nonpow2_load_narrowing() { @@ -213,7 +216,7 @@ define void @nonpow2_load_narrowing() { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg3, %vreg0; mem:ST12[%c](align=16) (in function: nonpow2_store_narrowing +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3:_(s96), %0:_(p0); mem:ST12[%c](align=16) (in function: nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { @@ -223,7 +226,7 @@ define void @nonpow2_store_narrowing(i96* %c) { ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %vreg0, %vreg1; mem:ST12[undef](align=16) (in function: nonpow2_constant_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0:_(s96), %1:_(p0); mem:ST12[undef](align=16) (in function: nonpow2_constant_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_constant_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_constant_narrowing: define void @nonpow2_constant_narrowing() { @@ -233,8 +236,8 @@ define void @nonpow2_constant_narrowing() { ; Currently can't handle vector lengths that aren't an exact multiple of ; natively supported vector lengths. Test that the fall-back works for those. -; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %vreg1(<7 x s64>) = G_ADD %vreg0, %vreg0; (in function: nonpow2_vector_add_fewerelements -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %vreg2(s64) = G_EXTRACT_VECTOR_ELT %vreg1, %vreg3; (in function: nonpow2_vector_add_fewerelements) +; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0; (in function: nonpow2_vector_add_fewerelements +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(s64) = G_EXTRACT_VECTOR_ELT %1:_(<7 x s64>), %3:_(s64) (in function: nonpow2_vector_add_fewerelements) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements: define void @nonpow2_vector_add_fewerelements() { diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 7c67a22e23c8..077c21c0557d 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -58,19 +58,19 @@ define void @allocai64() { ; CHECK: body: ; ; ABI/constant lowering and IR-level entry basic block. -; CHECK: {{bb.[0-9]+}}.entry: +; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: ; ; Make sure we have one successor and only one. -; CHECK-NEXT: successors: %[[BB2:bb.[0-9]+.bb2]](0x80000000) +; CHECK-NEXT: successors: %[[BB2:bb.[0-9]+]](0x80000000) ; ; Check that we emit the correct branch. ; CHECK: G_BR %[[BB2]] ; ; Check that end contains the return instruction. -; CHECK: [[END:bb.[0-9]+.end]]: +; CHECK: [[END:bb.[0-9]+]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: RET_ReallyLR ; -; CHECK: {{bb.[0-9]+}}.bb2: +; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: successors: %[[END]](0x80000000) ; CHECK: G_BR %[[END]] define void @uncondbr() { @@ -84,11 +84,11 @@ bb2: ; CHECK-LABEL: name: uncondbr_fallthrough ; CHECK: body: -; CHECK: {{bb.[0-9]+}}.entry: -; CHECK-NEXT: successors: %[[END:bb.[0-9]+.end]](0x80000000) +; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[END:bb.[0-9]+]](0x80000000) ; We don't emit a branch here, as we can fallthrough to the successor. ; CHECK-NOT: G_BR -; CHECK: [[END]]: +; CHECK: [[END]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: RET_ReallyLR define void @uncondbr_fallthrough() { entry: @@ -102,10 +102,10 @@ end: ; CHECK: body: ; ; ABI/constant lowering and IR-level entry basic block. -; CHECK: {{bb.[0-9]+}} (%ir-block.{{[0-9]+}}): +; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}): ; Make sure we have two successors -; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+.true]](0x40000000), -; CHECK: %[[FALSE:bb.[0-9]+.false]](0x40000000) +; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+]](0x40000000), +; CHECK: %[[FALSE:bb.[0-9]+]](0x40000000) ; ; CHECK: [[ADDR:%.*]]:_(p0) = COPY %x0 ; @@ -115,9 +115,9 @@ end: ; CHECK: G_BR %[[FALSE]] ; ; Check that each successor contains the return instruction. -; CHECK: [[TRUE]]: +; CHECK: [[TRUE]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: RET_ReallyLR -; CHECK: [[FALSE]]: +; CHECK: [[FALSE]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: RET_ReallyLR define void @condbr(i1* %tstaddr) { %tst = load i1, i1* %tstaddr @@ -133,8 +133,8 @@ false: ; CHECK-LABEL: name: switch ; CHECK: body: ; -; CHECK: {{bb.[0-9]+.entry}}: -; CHECK-NEXT: successors: %[[BB_CASE100:bb.[0-9]+.case100]](0x40000000), %[[BB_NOTCASE100_CHECKNEXT:bb.[0-9]+.entry]](0x40000000) +; CHECK: bb.{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[BB_CASE100:bb.[0-9]+]](0x40000000), %[[BB_NOTCASE100_CHECKNEXT:bb.[0-9]+]](0x40000000) ; CHECK: %0:_(s32) = COPY %w0 ; CHECK: %[[reg100:[0-9]+]]:_(s32) = G_CONSTANT i32 100 ; CHECK: %[[reg200:[0-9]+]]:_(s32) = G_CONSTANT i32 200 @@ -145,31 +145,31 @@ false: ; CHECK: G_BRCOND %[[regicmp100]](s1), %[[BB_CASE100]] ; CHECK: G_BR %[[BB_NOTCASE100_CHECKNEXT]] ; -; CHECK: [[BB_NOTCASE100_CHECKNEXT]]: -; CHECK-NEXT: successors: %[[BB_CASE200:bb.[0-9]+.case200]](0x40000000), %[[BB_NOTCASE200_CHECKNEXT:bb.[0-9]+.entry]](0x40000000) +; CHECK: [[BB_NOTCASE100_CHECKNEXT]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[BB_CASE200:bb.[0-9]+]](0x40000000), %[[BB_NOTCASE200_CHECKNEXT:bb.[0-9]+]](0x40000000) ; CHECK: %[[regicmp200:[0-9]+]]:_(s1) = G_ICMP intpred(eq), %[[reg200]](s32), %0 ; CHECK: G_BRCOND %[[regicmp200]](s1), %[[BB_CASE200]] ; CHECK: G_BR %[[BB_NOTCASE200_CHECKNEXT]] ; -; CHECK: [[BB_NOTCASE200_CHECKNEXT]]: -; CHECK-NEXT: successors: %[[BB_DEFAULT:bb.[0-9]+.default]](0x80000000) +; CHECK: [[BB_NOTCASE200_CHECKNEXT]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[BB_DEFAULT:bb.[0-9]+]](0x80000000) ; CHECK: G_BR %[[BB_DEFAULT]] ; -; CHECK: [[BB_DEFAULT]]: -; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000) +; CHECK: [[BB_DEFAULT]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+]](0x80000000) ; CHECK: %[[regretdefault:[0-9]+]]:_(s32) = G_ADD %0, %[[reg0]] ; CHECK: G_BR %[[BB_RET]] ; -; CHECK: [[BB_CASE100]]: -; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000) +; CHECK: [[BB_CASE100]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+]](0x80000000) ; CHECK: %[[regretc100:[0-9]+]]:_(s32) = G_ADD %0, %[[reg1]] ; CHECK: G_BR %[[BB_RET]] ; -; CHECK: [[BB_CASE200]]: +; CHECK: [[BB_CASE200]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: successors: %[[BB_RET]](0x80000000) ; CHECK: %[[regretc200:[0-9]+]]:_(s32) = G_ADD %0, %[[reg2]] ; -; CHECK: [[BB_RET]]: +; CHECK: [[BB_RET]].{{[a-zA-Z0-9.]+}}: ; CHECK-NEXT: %[[regret:[0-9]+]]:_(s32) = G_PHI %[[regretdefault]](s32), %[[BB_DEFAULT]], %[[regretc100]](s32), %[[BB_CASE100]] ; CHECK: %w0 = COPY %[[regret]](s32) ; CHECK: RET_ReallyLR implicit %w0 @@ -202,16 +202,16 @@ return: ; %entry block is no longer a predecessor for the phi instruction. We need to ; use the correct lowered MachineBasicBlock instead. ; CHECK-LABEL: name: test_cfg_remap -; CHECK: {{bb.[0-9]+.entry}}: -; CHECK-NEXT: successors: %{{bb.[0-9]+.next}}(0x40000000), %[[NOTCASE1_BLOCK:bb.[0-9]+.entry]](0x40000000) -; CHECK: [[NOTCASE1_BLOCK]]: -; CHECK-NEXT: successors: %{{bb.[0-9]+.other}}(0x40000000), %[[NOTCASE57_BLOCK:bb.[0-9]+.entry]](0x40000000) -; CHECK: [[NOTCASE57_BLOCK]]: -; CHECK-NEXT: successors: %[[PHI_BLOCK:bb.[0-9]+.phi.block]](0x80000000) +; CHECK: bb.{{[0-9]+.[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %{{bb.[0-9]+}}(0x40000000), %[[NOTCASE1_BLOCK:bb.[0-9]+]](0x40000000) +; CHECK: [[NOTCASE1_BLOCK]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %{{bb.[0-9]+}}(0x40000000), %[[NOTCASE57_BLOCK:bb.[0-9]+]](0x40000000) +; CHECK: [[NOTCASE57_BLOCK]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: successors: %[[PHI_BLOCK:bb.[0-9]+]](0x80000000) ; CHECK: G_BR %[[PHI_BLOCK]] ; -; CHECK: [[PHI_BLOCK]]: -; CHECK-NEXT: G_PHI %{{.*}}(s32), %[[NOTCASE57_BLOCK:bb.[0-9]+.entry]], %{{.*}}(s32), +; CHECK: [[PHI_BLOCK]].{{[a-zA-Z0-9.]+}}: +; CHECK-NEXT: G_PHI %{{.*}}(s32), %[[NOTCASE57_BLOCK:bb.[0-9]+]], %{{.*}}(s32), ; define i32 @test_cfg_remap(i32 %in) { entry: @@ -230,7 +230,7 @@ phi.block: } ; CHECK-LABEL: name: test_cfg_remap_multiple_preds -; CHECK: G_PHI [[ENTRY:%.*]](s32), %bb.{{[0-9]+}}.entry, [[ENTRY]](s32), %bb.{{[0-9]+}}.entry +; CHECK: G_PHI [[ENTRY:%.*]](s32), %bb.{{[0-9]+}}, [[ENTRY]](s32), %bb.{{[0-9]+}} define i32 @test_cfg_remap_multiple_preds(i32 %in) { entry: switch i32 %in, label %odd [i32 1, label %next @@ -256,19 +256,19 @@ phi.block: ; CHECK: body: ; ; ABI/constant lowering and IR-level entry basic block. -; CHECK: {{bb.[0-9]+.entry}}: +; CHECK: bb.{{[0-9]+.[a-zA-Z0-9.]+}}: ; Make sure we have one successor -; CHECK-NEXT: successors: %[[BB_L1:bb.[0-9]+.L1]](0x80000000) +; CHECK-NEXT: successors: %[[BB_L1:bb.[0-9]+]](0x80000000) ; CHECK-NOT: G_BR ; ; Check basic block L1 has 2 successors: BBL1 and BBL2 -; CHECK: [[BB_L1]] (address-taken): +; CHECK: [[BB_L1]].{{[a-zA-Z0-9.]+}} (address-taken): ; CHECK-NEXT: successors: %[[BB_L1]](0x40000000), -; CHECK: %[[BB_L2:bb.[0-9]+.L2]](0x40000000) +; CHECK: %[[BB_L2:bb.[0-9]+]](0x40000000) ; CHECK: G_BRINDIRECT %{{[0-9]+}}(p0) ; ; Check basic block L2 is the return basic block -; CHECK: [[BB_L2]] (address-taken): +; CHECK: [[BB_L2]].{{[a-zA-Z0-9.]+}} (address-taken): ; CHECK-NEXT: RET_ReallyLR @indirectbr.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@indirectbr, %L1), i8* blockaddress(@indirectbr, %L2), i8* null], align 8 @@ -410,11 +410,11 @@ define i64* @trivial_bitcast(i8* %a) { ; CHECK-LABEL: name: trivial_bitcast_with_copy ; CHECK: [[A:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: G_BR %[[CAST:bb\.[0-9]+.cast]] +; CHECK: G_BR %[[CAST:bb\.[0-9]+]] -; CHECK: [[END:bb\.[0-9]+.end]]: +; CHECK: [[END:bb\.[0-9]+]].{{[a-zA-Z0-9.]+}}: -; CHECK: [[CAST]]: +; CHECK: [[CAST]].{{[a-zA-Z0-9.]+}}: ; CHECK: {{%[0-9]+}}:_(p0) = COPY [[A]] ; CHECK: G_BR %[[END]] define i64* @trivial_bitcast_with_copy(i8* %a) { @@ -512,13 +512,13 @@ define void @intrinsics(i32 %cur, i32 %bits) { } ; CHECK-LABEL: name: test_phi -; CHECK: G_BRCOND {{%.*}}, %[[TRUE:bb\.[0-9]+.true]] -; CHECK: G_BR %[[FALSE:bb\.[0-9]+.false]] +; CHECK: G_BRCOND {{%.*}}, %[[TRUE:bb\.[0-9]+]] +; CHECK: G_BR %[[FALSE:bb\.[0-9]+]] -; CHECK: [[TRUE]]: +; CHECK: [[TRUE]].{{[a-zA-Z0-9.]+}}: ; CHECK: [[RES1:%[0-9]+]]:_(s32) = G_LOAD -; CHECK: [[FALSE]]: +; CHECK: [[FALSE]].{{[a-zA-Z0-9.]+}}: ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_LOAD ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]] @@ -554,7 +554,7 @@ define void @unreachable(i32 %a) { ; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 ; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 -; CHECK: {{bb.[0-9]+}}.next: +; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: ; CHECK: [[SUM1:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] ; CHECK: [[SUM2:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ADD [[SUM1]], [[SUM2]] @@ -1147,7 +1147,7 @@ define void()* @test_global_func() { ret void()* @allocai64 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1156,12 +1156,12 @@ define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK: %x0 = COPY [[DST]] ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) +; CHECK: BL &memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memmove ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1170,12 +1170,12 @@ define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK: %x0 = COPY [[DST]] ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) +; CHECK: BL &memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 + call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1186,8 +1186,8 @@ define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK: [[SRC_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[SRC]] ; CHECK: %w1 = COPY [[SRC_TMP]] ; CHECK: %x2 = COPY [[SIZE]] -; CHECK: BL $memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 - call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0) +; CHECK: BL &memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) ret void } @@ -1226,7 +1226,7 @@ define i8* @test_const_placement() { ; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}): ; CHECK: [[VAL_INT:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = G_INTTOPTR [[VAL_INT]](s32) -; CHECK: {{bb.[0-9]+}}.next: +; CHECK: bb.{{[0-9]+}}.{{[a-zA-Z0-9.]+}}: br label %next next: @@ -1370,8 +1370,8 @@ define double @test_fneg_f64(double %x) { define void @test_trivial_inlineasm() { ; CHECK-LABEL: name: test_trivial_inlineasm -; CHECK: INLINEASM $wibble, 1 -; CHECK: INLINEASM $wibble, 0 +; CHECK: INLINEASM &wibble, 1 +; CHECK: INLINEASM &wibble, 0 call void asm sideeffect "wibble", ""() call void asm "wibble", ""() ret void @@ -1636,3 +1636,16 @@ define i32 @test_target_mem_intrinsic(i32* %addr) { } declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind + +%zerosize_type = type {} + +define %zerosize_type @test_empty_load_store(%zerosize_type *%ptr, %zerosize_type %in) noinline optnone { +; CHECK-LABEL: name: test_empty_load_store +; CHECK-NOT: G_STORE +; CHECK-NOT: G_LOAD +; CHECK: RET_ReallyLR +entry: + store %zerosize_type undef, %zerosize_type* undef, align 4 + %val = load %zerosize_type, %zerosize_type* %ptr, align 4 + ret %zerosize_type %in +} diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll index eb2d2ec4307c..be510b5f7e3b 100644 --- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -6,7 +6,7 @@ ; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, ; CHECK-NEXT: callee-saved-register: '', callee-saved-restored: true, ; CHECK-NEXT: di-variable: '!11', di-expression: '!DIExpression()', -; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !DIExpression(), debug-location !12 +; CHECK: DBG_VALUE debug-use %0(s32), debug-use %noreg, !11, !DIExpression(), debug-location !12 define void @debug_declare(i32 %in) #0 !dbg !7 { entry: %in.addr = alloca i32, align 4 @@ -17,7 +17,7 @@ entry: } ; CHECK-LABEL: name: debug_declare_vla -; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use _, !14, !DIExpression(), debug-location !15 +; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use %noreg, !14, !DIExpression(), debug-location !15 define void @debug_declare_vla(i32 %in) #0 !dbg !13 { entry: %vla.addr = alloca i32, i32 %in @@ -29,16 +29,16 @@ entry: ; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 define void @debug_value(i32 %in) #0 !dbg !16 { %addr = alloca i32 -; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use _, !17, !DIExpression(), debug-location !18 +; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use %noreg, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 %in, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 store i32 %in, i32* %addr -; CHECK: DBG_VALUE debug-use %1(p0), debug-use _, !17, !DIExpression(DW_OP_deref), debug-location !18 +; CHECK: DBG_VALUE debug-use %1(p0), debug-use %noreg, !17, !DIExpression(DW_OP_deref), debug-location !18 call void @llvm.dbg.value(metadata i32* %addr, i64 0, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18 ; CHECK: DBG_VALUE 123, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32 123, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ; CHECK: DBG_VALUE float 1.000000e+00, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 -; CHECK: DBG_VALUE _, 0, !17, !DIExpression(), debug-location !18 +; CHECK: DBG_VALUE %noreg, 0, !17, !DIExpression(), debug-location !18 call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !17, metadata !DIExpression()), !dbg !18 ret void } diff --git a/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir b/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir new file mode 100644 index 000000000000..47fda8f998d7 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/fp128-legalize-crash-pr35690.mir @@ -0,0 +1,44 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + define fp128 @x(fp128 %a) { + entry: + %a.addr = alloca fp128, align 16 + store fp128 %a, fp128* %a.addr, align 16 + %0 = load fp128, fp128* %a.addr, align 16 + %sub = fsub fp128 0xL00000000000000008000000000000000, %0 + ret fp128 %sub + } + +... +--- +name: x +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +fixedStack: +stack: + - { id: 0, name: a.addr, type: default, offset: 0, size: 16, alignment: 16, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +body: | + bb.1.entry: + liveins: %q0 + + ; This test just checks we don't crash on G_FNEG of FP128 types. Expect to fall + ; back until support is added for fp128. + ; CHECK: ret + %0:_(s128) = COPY %q0 + %1:_(p0) = G_FRAME_INDEX %stack.0.a.addr + G_STORE %0(s128), %1(p0) :: (store 16 into %ir.a.addr) + %2:_(s128) = G_LOAD %1(p0) :: (load 16 from %ir.a.addr) + %3:_(s128) = G_FNEG %2 + %q0 = COPY %3(s128) + RET_ReallyLR implicit %q0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll index 0972840de47b..3920e1d99c28 100644 --- a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -1,5 +1,8 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ -; RUN: -O0 -aarch64-enable-global-isel-at-O=0 \ +; RUN: -O0 | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix FALLBACK + +; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ +; RUN: -O0 -aarch64-enable-global-isel-at-O=0 -global-isel-abort=1 \ ; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix NOFALLBACK ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ @@ -29,6 +32,9 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: | FileCheck %s --check-prefix DISABLED +; RUN: llc -mtriple=aarch64-- -fast-isel=0 -global-isel=false \ +; RUN: -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix DISABLED + ; ENABLED: IRTranslator ; ENABLED-NEXT: Legalizer ; ENABLED-NEXT: RegBankSelect diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll index 0e7fbd32c6fa..827fdd261082 100644 --- a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -9,7 +9,7 @@ declare i32 @llvm.eh.typeid.for(i8*) ; CHECK-LABEL: name: bar ; CHECK: body: ; CHECK-NEXT: bb.1 (%ir-block.0): -; CHECK: successors: %[[GOOD:bb.[0-9]+.continue]]{{.*}}%[[BAD:bb.[0-9]+.broken]] +; CHECK: successors: %[[GOOD:bb.[0-9]+]]{{.*}}%[[BAD:bb.[0-9]+]] ; CHECK: EH_LABEL ; CHECK: %w0 = COPY ; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit-def %w0 @@ -17,7 +17,7 @@ declare i32 @llvm.eh.typeid.for(i8*) ; CHECK: EH_LABEL ; CHECK: G_BR %[[GOOD]] -; CHECK: [[BAD]] (landing-pad): +; CHECK: [[BAD]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL ; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF ; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 @@ -30,7 +30,7 @@ declare i32 @llvm.eh.typeid.for(i8*) ; CHECK: %x0 = COPY [[PTR_RET]] ; CHECK: %w1 = COPY [[SEL_RET]] -; CHECK: [[GOOD]]: +; CHECK: [[GOOD]].{{[a-z]+}}: ; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{%[0-9]+}}:_(s128) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 64 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir b/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir new file mode 100644 index 000000000000..b77d5e9a1d6d --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-atomicrmw.mir @@ -0,0 +1,85 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @cmpxchg_i8(i8* %addr) { ret void } + define void @cmpxchg_i16(i16* %addr) { ret void } + define void @cmpxchg_i32(i32* %addr) { ret void } + define void @cmpxchg_i64(i64* %addr) { ret void } +... + +--- +name: cmpxchg_i8 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[CST2:%[0-9]+]]:_(s8) = G_TRUNC [[CST]] + ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST2]] :: (load store monotonic 1 on %ir.addr) + ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] + ; CHECK: %w0 = COPY [[RES2]] + %0:_(p0) = COPY %x0 + %1:_(s8) = G_CONSTANT i8 1 + %2:_(s8) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 1 on %ir.addr) + %3:_(s32) = G_ANYEXT %2 + %w0 = COPY %3(s32) +... + +--- +name: cmpxchg_i16 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[CST2:%[0-9]+]]:_(s16) = G_TRUNC [[CST]] + ; CHECK: [[RES:%[0-9]+]]:_(s16) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST2]] :: (load store monotonic 2 on %ir.addr) + ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] + ; CHECK: %w0 = COPY [[RES2]] + %0:_(p0) = COPY %x0 + %1:_(s16) = G_CONSTANT i16 1 + %2:_(s16) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 2 on %ir.addr) + %3:_(s32) = G_ANYEXT %2 + %w0 = COPY %3(s32) +... + +--- +name: cmpxchg_i32 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:_(p0) = COPY %x0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 4 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: cmpxchg_i64 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMICRMW_ADD [[COPY]](p0), [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %x0 = COPY [[RES]] + %0:_(p0) = COPY %x0 + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) + %x0 = COPY %2(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir new file mode 100644 index 000000000000..633033670cc9 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-with-success.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @cmpxchg_i32(i64* %addr) { ret void } + define void @cmpxchg_i64(i64* %addr) { ret void } +... + +--- +name: cmpxchg_i32 + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s32), [[CMP]] + ; CHECK: [[SRES32:%[0-9]+]]:_(s32) = COPY [[SRES]] + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[RES]], [[SRES32]] + ; CHECK: %w0 = COPY [[MUL]] + %0:_(p0) = COPY %x0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %5:_(s32) = G_ANYEXT %4 + %6:_(s32) = G_MUL %3, %5 + %w0 = COPY %6(s32) +... + +--- +name: cmpxchg_i64 + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: [[SRES:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[RES]](s64), [[CMP]] + ; CHECK: [[SRES64:%[0-9]+]]:_(s64) = G_ANYEXT [[SRES]] + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[RES]], [[SRES64]] + ; CHECK: %x0 = COPY [[MUL]] + %0:_(p0) = COPY %x0 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64), %4:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %5:_(s64) = G_ANYEXT %4 + %6:_(s64) = G_MUL %3, %5 + %x0 = COPY %6(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir new file mode 100644 index 000000000000..898cd12d1180 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=legalizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @cmpxchg_i8(i8* %addr) { ret void } + define void @cmpxchg_i16(i16* %addr) { ret void } + define void @cmpxchg_i32(i32* %addr) { ret void } + define void @cmpxchg_i64(i64* %addr) { ret void } +... + +--- +name: cmpxchg_i8 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[CMPT:%[0-9]+]]:_(s8) = G_TRUNC [[CMP]] + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[CSTT:%[0-9]+]]:_(s8) = G_TRUNC [[CST]] + ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMPT]], [[CSTT]] :: (load store monotonic 1 on %ir.addr) + ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]](s8) + ; CHECK: %w0 = COPY [[RES2]] + %0:_(p0) = COPY %x0 + %1:_(s8) = G_CONSTANT i8 0 + %2:_(s8) = G_CONSTANT i8 1 + %3:_(s8) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 1 on %ir.addr) + %4:_(s32) = G_ANYEXT %3 + %w0 = COPY %4(s32) +... + +--- +name: cmpxchg_i16 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i16 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[CMPT:%[0-9]+]]:_(s16) = G_TRUNC [[CMP]] + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[CSTT:%[0-9]+]]:_(s16) = G_TRUNC [[CST]] + ; CHECK: [[RES:%[0-9]+]]:_(s16) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMPT]], [[CSTT]] :: (load store monotonic 2 on %ir.addr) + ; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]](s16) + ; CHECK: %w0 = COPY [[RES2]] + %0:_(p0) = COPY %x0 + %1:_(s16) = G_CONSTANT i16 0 + %2:_(s16) = G_CONSTANT i16 1 + %3:_(s16) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 2 on %ir.addr) + %4:_(s32) = G_ANYEXT %3 + %w0 = COPY %4(s32) +... + +--- +name: cmpxchg_i32 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i32 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[CST:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 4 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:_(p0) = COPY %x0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 4 on %ir.addr) + %w0 = COPY %3(s32) +... + +--- +name: cmpxchg_i64 +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[CST:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[CMP]], [[CST]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %x0 = COPY [[RES]] + %0:_(p0) = COPY %x0 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %x0 = COPY %3(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir index 82594b8c476a..9cf0f8fd0e71 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -8,7 +8,6 @@ define void @test_combines_3() { ret void } define void @test_combines_4() { ret void } define void @test_combines_5() { ret void } - define void @test_combines_6() { ret void } ... --- @@ -90,23 +89,3 @@ body: | %5:_(s32) = G_ADD %3, %4 %w0 = COPY %5 ... - ---- -name: test_combines_6 -body: | - bb.0: - liveins: %w0 - - ; Check that we replace all the uses of a G_EXTRACT. - ; CHECK-LABEL: name: test_combines_6 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 - ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[MUL]] - %0:_(s32) = COPY %w0 - - %1:_(s32) = G_MERGE_VALUES %0 - %2:_(s32) = G_UNMERGE_VALUES %1 - %3:_(s32) = G_MUL %2, %2 - %4:_(s32) = G_ADD %2, %3 - %w0 = COPY %4 -... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll index da40b274aa62..01f955bc1d10 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -10,9 +10,9 @@ declare void @_Unwind_Resume(i8*) ; CHECK: name: bar ; CHECK: body: ; CHECK-NEXT: bb.1 (%ir-block.0): -; CHECK: successors: %{{bb.[0-9]+.continue.*}}%[[LP:bb.[0-9]+.cleanup]] +; CHECK: successors: %{{bb.[0-9]+.*}}%[[LP:bb.[0-9]+]] -; CHECK: [[LP]] (landing-pad): +; CHECK: [[LP]].{{[a-z]+}} (landing-pad): ; CHECK: EH_LABEL ; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir new file mode 100644 index 000000000000..e6171380344e --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -0,0 +1,30 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_merge_s4() { + ret void + } +... + +--- +name: test_merge_s4 +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + %0(s64) = G_CONSTANT i64 0 + %1(s4) = G_TRUNC %0(s64) + ; Previously, LegalizerInfo was assuming all G_MERGE_VALUES and G_UNMERGE_VALUES + ; instructions are legal. Make sure that is no longer happening. + ; CHECK: unable to legalize instruction: {{.*}} G_MERGE_VALUES + %2(s8) = G_MERGE_VALUES %1(s4), %1(s4) + %3(s8) = COPY %2(s8) + %4(s64) = G_ANYEXT %3(s8) + %x0 = COPY %4(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index c94d73920ca3..efe9105b90c7 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -8,7 +8,12 @@ entry: ret void } - define void @test_mul_overflow() { ret void } + define void @test_smul_overflow() { + ret void + } + define void @test_umul_overflow() { + ret void + } ... --- @@ -43,18 +48,19 @@ body: | --- -name: test_mul_overflow +name: test_smul_overflow body: | bb.0: liveins: %x0, %x1, %w2, %w3 - ; CHECK-LABEL: name: test_mul_overflow + ; CHECK-LABEL: name: test_smul_overflow ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[C]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MUL]], [[C]] + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[ASHR]] ; CHECK: %x0 = COPY [[MUL]](s64) ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; CHECK: %w0 = COPY [[COPY2]](s32) @@ -66,3 +72,29 @@ body: | %w0 = COPY %4 ... + + +--- +name: test_umul_overflow +body: | + bb.0: + liveins: %x0, %x1, %w2, %w3 + + ; CHECK-LABEL: name: test_umul_overflow + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK: [[UMULH:%[0-9]+]]:_(s64) = G_UMULH [[COPY]], [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UMULH]](s64), [[C]] + ; CHECK: %x0 = COPY [[MUL]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; CHECK: %w0 = COPY [[COPY2]](s32) + %0:_(s64) = COPY %x0 + %1:_(s64) = COPY %x1 + %2:_(s64), %3:_(s1) = G_UMULO %0, %1 + %x0 = COPY %2 + %4:_(s32) = G_ANYEXT %3 + %w0 = COPY %4 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir index a586e69c855c..168e1df02775 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir @@ -4,33 +4,31 @@ --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" - define void @test_legalize_merge_v3s32() { + define void @test_legalize_merge_v3s64() { ret void } ... --- -name: test_legalize_merge_v3s32 +name: test_legalize_merge_v3s64 registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: - liveins: %w0, %w1, %w2 - ; CHECK-LABEL: name: test_legalize_merge_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %w2 - ; CHECK: %w0 = COPY [[COPY]](s32) - ; CHECK: %w1 = COPY [[COPY1]](s32) - ; CHECK: %w2 = COPY [[COPY2]](s32) - %0(s32) = COPY %w0 - %1(s32) = COPY %w1 - %2(s32) = COPY %w2 - %3(<3 x s32>) = G_MERGE_VALUES %0(s32), %1(s32), %2(s32) - %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %3 - %w0 = COPY %4 - %w1 = COPY %5 - %w2 = COPY %6 + liveins: %w0 + ; CHECK-LABEL: name: test_legalize_merge_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[MV:%[0-9]+]]:_(<3 x s64>) = G_MERGE_VALUES [[COPY]](s64), [[COPY]](s64), [[COPY]](s64) + ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s64>) = COPY [[MV]](<3 x s64>) + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<3 x s64>) + ; CHECK: %x0 = COPY [[UV]](s64) + %0(s64) = COPY %x0 + %1(<3 x s64>) = G_MERGE_VALUES %0(s64), %0(s64), %0(s64) + %2(<3 x s64>) = COPY %1(<3 x s64>) + %3(s64), %4(s64), %5(s64) = G_UNMERGE_VALUES %2(<3 x s64>) + %x0 = COPY %3(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir index be3485919973..8b08331a69c0 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir @@ -25,14 +25,14 @@ body: | ; CHECK: %d0 = COPY %0 ; CHECK: %d1 = COPY %1 - ; CHECK: BL $pow, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 + ; CHECK: BL &pow, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: %4:_(s64) = COPY %d0 %4:_(s64) = G_FPOW %0, %1 %x0 = COPY %4 ; CHECK: %s0 = COPY %2 ; CHECK: %s1 = COPY %3 - ; CHECK: BL $powf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 + ; CHECK: BL &powf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 ; CHECK: %5:_(s32) = COPY %s0 %5:_(s32) = G_FPOW %2, %3 %w0 = COPY %5 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir index 7303a9c26fc9..00d0f883b54e 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -135,7 +135,7 @@ body: | ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp ; CHECK: %d0 = COPY [[COPY]](s64) ; CHECK: %d1 = COPY [[COPY1]](s64) - ; CHECK: BL $fmod, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 + ; CHECK: BL &fmod, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %d0 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp ; CHECK: %x0 = COPY [[COPY2]](s64) @@ -144,7 +144,7 @@ body: | ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp ; CHECK: %s0 = COPY [[TRUNC]](s32) ; CHECK: %s1 = COPY [[TRUNC1]](s32) - ; CHECK: BL $fmodf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 + ; CHECK: BL &fmodf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp ; CHECK: %w0 = COPY [[COPY3]](s32) diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir index 9c028eb9d95b..a7329916ea83 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -43,16 +43,16 @@ registers: - { id: 16, class: _ } body: | ; CHECK-LABEL: name: test_simple - ; CHECK: bb.0.entry: - ; CHECK: successors: %bb.1.next(0x80000000) + ; CHECK: bb.0.{{[a-zA-Z0-9]+}}: + ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY]](s64) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[INTTOPTR]](p0) ; CHECK: %x0 = COPY [[PTRTOINT]](s64) - ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1.next - ; CHECK: bb.1.next: + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: bb.1.{{[a-zA-Z0-9]+}}: ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[TRUNC2]], [[TRUNC3]] @@ -95,7 +95,7 @@ body: | %6(s64) = G_PTRTOINT %5 %x0 = COPY %6 - G_BRCOND %1, %bb.1.next + G_BRCOND %1, %bb.1 bb.1.next: diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir b/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir new file mode 100644 index 000000000000..85b65e945486 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/legalize-unmerge-values.mir @@ -0,0 +1,28 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel -global-isel-abort=0 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + define void @test_unmerge_s4() { + ret void + } +... + +--- +name: test_unmerge_s4 +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + %0(s8) = G_CONSTANT i8 0 + ; Previously, LegalizerInfo was assuming all G_MERGE_VALUES and G_UNMERGE_VALUES + ; instructions are legal. Make sure that is no longer happening. + ; CHECK: unable to legalize instruction: {{.*}} G_UNMERGE_VALUES + %1(s4), %2(s4)= G_UNMERGE_VALUES %0(s8) + %3(s64) = G_ANYEXT %1(s4) + %x0 = COPY %3(s64) + +... diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir index 997205bc0ef6..d4ed70fa5316 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir @@ -59,19 +59,19 @@ registers: # CHECK: %5:fpr(s32) = G_FCONSTANT float 2.000000e+00 # Second block will get the constant 1.0 when the localizer is enabled. -# CHECK: bb.1.true: +# CHECK: bb.1.{{[a-zA-Z0-9]+}}: # OPT-NOT: G_FCONSTANT # OPTNONE: [[FONE:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 -# CHECK: G_BR %bb.3.end +# CHECK: G_BR %bb.3 # Thrid block will get the constant 2.0 when the localizer is enabled. -# CHECK: bb.2.false: +# CHECK: bb.2.{{[a-zA-Z0-9]+}}: # OPT-NOT: G_FCONSTANT # OPTNONE: [[FTWO:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00 # CHECK: bb.3.end -# OPTNONE: %2:fpr(s32) = PHI [[FONE]](s32), %bb.1.true, [[FTWO]](s32), %bb.2.false -# OPT: %2:fpr(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false +# OPTNONE: %2:fpr(s32) = PHI [[FONE]](s32), %bb.1, [[FTWO]](s32), %bb.2 +# OPT: %2:fpr(s32) = PHI %4(s32), %bb.1, %5(s32), %bb.2 # CHECK-NEXT: G_FADD %0, %2 body: | bb.0 (%ir-block.0): @@ -82,16 +82,16 @@ body: | %1(s1) = G_TRUNC %6 %4(s32) = G_FCONSTANT float 1.000000e+00 %5(s32) = G_FCONSTANT float 2.000000e+00 - G_BRCOND %1(s1), %bb.1.true - G_BR %bb.2.false + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 bb.1.true: - G_BR %bb.3.end + G_BR %bb.3 bb.2.false: bb.3.end: - %2(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false + %2(s32) = PHI %4(s32), %bb.1, %5(s32), %bb.2 %3(s32) = G_FADD %0, %2 %s0 = COPY %3(s32) RET_ReallyLR implicit %s0 diff --git a/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir new file mode 100644 index 000000000000..a14c93cf2c20 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir @@ -0,0 +1,181 @@ +# RUN: llc -run-pass machine-cse -global-isel -verify-machineinstrs -mtriple aarch64-apple-ios %s -o - | FileCheck %s +--- +name: irtranslated +legalized: false +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: irtranslated + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:_(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: %[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit %[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:_(s32) = G_ADD %0, %0 + %2:_(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + %w0 = COPY %3(s32) + RET_ReallyLR implicit %w0 +... +--- +name: regbankselected +legalized: true +regBankSelected: true +selected: false +body: | + ; CHECK-LABEL: name: regbankselected + ; CHECK: %[[ONE:[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:gpr(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: %[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit %[[RET]] + bb.0: + %0:gpr(s32) = G_CONSTANT i32 1 + %1:gpr(s32) = G_ADD %0, %0 + %2:gpr(s32) = G_ADD %0, %0 + %3:gpr(s32) = G_ADD %1, %2 + %w0 = COPY %3(s32) + RET_ReallyLR implicit %w0 +... +--- +name: legalized +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: legalized + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: %[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit %[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:_(s32) = G_ADD %0, %0 + %2:gpr(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + %w0 = COPY %3(s32) + RET_ReallyLR implicit %w0 +... +--- +name: legalized_sym +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: legalized_sym + ; CHECK: %[[ONE:[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %[[TWO:[0-9]+]]:gpr(s32) = G_ADD %[[ONE]], %[[ONE]] + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s32) = G_ADD %[[TWO]], %[[TWO]] + ; CHECK-NEXT: %[[RET:[wx][0-9]+]] = COPY %[[SUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit %[[RET]] + bb.0: + %0:_(s32) = G_CONSTANT i32 1 + %1:gpr(s32) = G_ADD %0, %0 + %2:_(s32) = G_ADD %0, %0 + %3:_(s32) = G_ADD %1, %2 + %w0 = COPY %3(s32) + RET_ReallyLR implicit %w0 +... +--- +name: int_extensions +alignment: 2 +legalized: false +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: int_extensions + ; CHECK: %[[ONE:[0-9]+]]:_(s8) = G_CONSTANT i8 1 + ; CHECK-NEXT: %[[S16:[0-9]+]]:_(s16) = G_SEXT %[[ONE]](s8) + ; CHECK-NEXT: %[[S32:[0-9]+]]:_(s32) = G_SEXT %[[ONE]](s8) + ; CHECK-NEXT: %[[S16_Z64:[0-9]+]]:_(s64) = G_ZEXT %[[S16]](s16) + ; CHECK-NEXT: %[[S32_Z64:[0-9]+]]:_(s64) = G_ZEXT %[[S32]](s32) + ; CHECK-NEXT: %[[SUM:[0-9]+]]:_(s64) = G_ADD %[[S16_Z64]], %[[S32_Z64]] + ; CHECK-NEXT: %[[RET:[wx][0-9]+]] = COPY %[[SUM]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit %[[RET]] + bb.0.entry: + %0:_(s8) = G_CONSTANT i8 1 + %1:_(s16) = G_SEXT %0(s8) + %2:_(s32) = G_SEXT %0(s8) + %3:_(s64) = G_ZEXT %1(s16) + %4:_(s64) = G_ZEXT %2(s32) + %5:_(s64) = G_ADD %3, %4 + %x0 = COPY %5(s64) + RET_ReallyLR implicit %x0 +... +--- +name: generic +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: generic + ; CHECK: %[[SG:[0-9]+]]:_(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[SG]], %[[SG]] + bb.0: + %0:_(s32) = COPY %w0 + %1:_(s32) = COPY %w1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = COPY %2(s32) + %4:_(s32) = G_ADD %3, %3 + %w0 = COPY %4(s32) + RET_ReallyLR implicit %w0 +... +--- +name: generic_to_concrete_copy +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: generic_to_concrete_copy + ; CHECK: %[[S1:[0-9]+]]:_(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %[[S2:[0-9]+]]:gpr32 = COPY %[[S1]](s32) + ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[S2]], %[[S2]] + bb.0: + %0:_(s32) = COPY %w0 + %1:_(s32) = COPY %w1 + %2:_(s32) = G_ADD %0, %1 + %3:gpr32 = COPY %2(s32) + %4:gpr32 = ADDWrr %3, %3 + %w0 = COPY %4 + RET_ReallyLR implicit %w0 +... +--- +name: concrete_to_generic_copy +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: concrete_to_generic_copy + ; CHECK: %[[S1:[0-9]+]]:gpr32 = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %[[S2:[0-9]+]]:_(s32) = COPY %[[S1]] + ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[S2]], %[[S2]] + bb.0: + %0:gpr32 = COPY %w0 + %1:gpr32 = COPY %w1 + %2:gpr32 = ADDWrr %0, %1 + %3:_(s32) = COPY %2 + %4:_(s32) = G_ADD %3, %3 + %w0 = COPY %4(s32) + RET_ReallyLR implicit %w0 +... +--- +name: concrete +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: concrete + ; CHECK: %[[SC:[0-9]+]]:gpr32 = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[SC]], %[[SC]] + bb.0: + %0:gpr32 = COPY %w0 + %1:gpr32 = COPY %w1 + %2:gpr32 = ADDWrr %0, %1 + %3:gpr32 = COPY %2 + %4:gpr32 = ADDWrr %3, %3 + %w0 = COPY %4 + RET_ReallyLR implicit %w0 +... diff --git a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir index d4d23142ab9c..8732274fe034 100644 --- a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir +++ b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir @@ -25,7 +25,7 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 ; CHECK: %w0 = COPY [[COPY]] %0:gpr(s32) = COPY %w0 - %1:gpr(s32) = G_MERGE_VALUES %0(s32) - %2:gpr(s32) = G_UNMERGE_VALUES %1(s32) + %1:gpr(s64) = G_MERGE_VALUES %0(s32), %0(s32) + %2:gpr(s32), %3:gpr(s32) = G_UNMERGE_VALUES %1(s64) %w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir index 4282bffdab12..201565c675af 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir @@ -36,9 +36,9 @@ body: | bb.0: liveins: %w0 %0:_(s32) = COPY %w0 - ; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !7, !DIExpression(), debug-location !9 - DBG_VALUE debug-use %0(s32), debug-use _, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 + DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 - ; CHECK: DBG_VALUE _, 0, !7, !DIExpression(), debug-location !9 - DBG_VALUE _, 0, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE %noreg, 0, !7, !DIExpression(), debug-location !9 + DBG_VALUE %noreg, 0, !7, !DIExpression(), debug-location !9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir b/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir new file mode 100644 index 000000000000..cab5489ab6f4 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-atomicrmw.mir @@ -0,0 +1,238 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @atomicrmw_xchg_i64(i64* %addr) { ret void } + define void @atomicrmw_add_i64(i64* %addr) { ret void } + define void @atomicrmw_add_i32(i64* %addr) { ret void } + define void @atomicrmw_sub_i32(i64* %addr) { ret void } + define void @atomicrmw_and_i32(i64* %addr) { ret void } + ; nand isn't legal + define void @atomicrmw_or_i32(i64* %addr) { ret void } + define void @atomicrmw_xor_i32(i64* %addr) { ret void } + define void @atomicrmw_min_i32(i64* %addr) { ret void } + define void @atomicrmw_max_i32(i64* %addr) { ret void } + define void @atomicrmw_umin_i32(i64* %addr) { ret void } + define void @atomicrmw_umax_i32(i64* %addr) { ret void } +... + +--- +name: atomicrmw_xchg_i64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_xchg_i64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr64 = SWPX [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %x0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s64) = G_CONSTANT i64 1 + %2:gpr(s64) = G_ATOMICRMW_XCHG %0, %1 :: (load store monotonic 8 on %ir.addr) + %x0 = COPY %2(s64) +... +--- +name: atomicrmw_add_i64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_add_i64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr64 = LDADDX [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %x0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s64) = G_CONSTANT i64 1 + %2:gpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store monotonic 8 on %ir.addr) + %x0 = COPY %2(s64) +... +--- +name: atomicrmw_add_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_add_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_sub_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_sub_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDADDALW [[CST]], [[COPY]] :: (load store seq_cst 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_and_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_and_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[CST2:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[CST]] + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDCLRAW [[CST2]], [[COPY]] :: (load store acquire 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_AND %0, %1 :: (load store acquire 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_or_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_or_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSETLW [[CST]], [[COPY]] :: (load store release 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_OR %0, %1 :: (load store release 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_xor_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_xor_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDEORALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_XOR %0, %1 :: (load store acq_rel 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_min_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_min_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMINALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_MIN %0, %1 :: (load store acq_rel 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_max_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_max_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDSMAXALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_MAX %0, %1 :: (load store acq_rel 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_umin_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_umin_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMINALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_UMIN %0, %1 :: (load store acq_rel 8 on %ir.addr) + %w0 = COPY %2(s32) +... + +--- +name: atomicrmw_umax_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: atomicrmw_umax_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = LDUMAXALW [[CST]], [[COPY]] :: (load store acq_rel 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 1 + %2:gpr(s32) = G_ATOMICRMW_UMAX %0, %1 :: (load store acq_rel 8 on %ir.addr) + %w0 = COPY %2(s32) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir b/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir new file mode 100644 index 000000000000..67ce28ba8590 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-cmpxchg.mir @@ -0,0 +1,53 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -mattr=+lse -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @cmpxchg_i32(i64* %addr) { ret void } + define void @cmpxchg_i64(i64* %addr) { ret void } +... + +--- +name: cmpxchg_i32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:gpr32 = MOVi32imm 0 + ; CHECK: [[CST:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr32 = CASW [[CMP]], [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %w0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s32) = G_CONSTANT i32 0 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %w0 = COPY %3(s32) +... + +--- +name: cmpxchg_i64 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: cmpxchg_i64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[CMP:%[0-9]+]]:gpr64 = MOVi64imm 0 + ; CHECK: [[CST:%[0-9]+]]:gpr64 = MOVi64imm 1 + ; CHECK: [[RES:%[0-9]+]]:gpr64 = CASX [[CMP]], [[CST]], [[COPY]] :: (load store monotonic 8 on %ir.addr) + ; CHECK: %x0 = COPY [[RES]] + %0:gpr(p0) = COPY %x0 + %1:gpr(s64) = G_CONSTANT i64 0 + %2:gpr(s64) = G_CONSTANT i64 1 + %3:gpr(s64) = G_ATOMIC_CMPXCHG %0, %1, %2 :: (load store monotonic 8 on %ir.addr) + %x0 = COPY %3(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir index af83be5c075e..7396ae57f8fd 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir @@ -46,11 +46,11 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY]] ; CHECK: %w0 = COPY [[ADDWrr]] - ; CHECK: DBG_VALUE debug-use [[ADDWrr]], debug-use _, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE debug-use [[ADDWrr]], debug-use %noreg, !7, !DIExpression(), debug-location !9 %0:gpr(s32) = COPY %w0 %1:gpr(s32) = G_ADD %0, %0 %w0 = COPY %1(s32) - DBG_VALUE debug-use %1(s32), debug-use _, !7, !DIExpression(), debug-location !9 + DBG_VALUE debug-use %1(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 ... --- @@ -62,7 +62,7 @@ body: | liveins: %w0 ; CHECK-LABEL: name: test_dbg_value_dead ; CHECK-NOT: COPY - ; CHECK: DBG_VALUE debug-use _, debug-use _, !7, !DIExpression(), debug-location !9 + ; CHECK: DBG_VALUE debug-use %noreg, debug-use %noreg, !7, !DIExpression(), debug-location !9 %0:gpr(s32) = COPY %w0 - DBG_VALUE debug-use %0(s32), debug-use _, !7, !DIExpression(), debug-location !9 + DBG_VALUE debug-use %0(s32), debug-use %noreg, !7, !DIExpression(), debug-location !9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir new file mode 100644 index 000000000000..12cd832665b3 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-gv-cmodel-large.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -code-model=large -run-pass=instruction-select -verify-machineinstrs -O0 %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + @foo1 = common global [1073741824 x i32] zeroinitializer, align 4 + @foo2 = common global [1073741824 x i32] zeroinitializer, align 4 + + define i32 @gv_large() { + entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %0 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0), align 4 + %add = add nsw i32 %0, %1 + ret i32 %add + } + +... +--- +name: gv_large +legalized: true +regBankSelected: true +stack: + - { id: 0, name: retval, type: default, offset: 0, size: 4, alignment: 4, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.1: + ; CHECK-LABEL: name: gv_large + ; CHECK: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo1, 0 + ; CHECK: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) @foo1, 16 + ; CHECK: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) @foo1, 32 + ; CHECK: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) @foo1, 48 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY [[MOVKXi2]] + ; CHECK: [[MOVZXi1:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) @foo2, 0 + ; CHECK: [[MOVKXi3:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi1]], target-flags(aarch64-g1, aarch64-nc) @foo2, 16 + ; CHECK: [[MOVKXi4:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi3]], target-flags(aarch64-g2, aarch64-nc) @foo2, 32 + ; CHECK: [[MOVKXi5:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi4]], target-flags(aarch64-g3) @foo2, 48 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[MOVKXi5]] + ; CHECK: STRWui %wzr, %stack.0.retval, 0 :: (store 4 into %ir.retval) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY1]], 0 :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[LDRWui]], [[LDRWui1]] + ; CHECK: %w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit %w0 + %1:gpr(s32) = G_CONSTANT i32 0 + %4:gpr(p0) = G_GLOBAL_VALUE @foo1 + %3:gpr(p0) = COPY %4(p0) + %7:gpr(p0) = G_GLOBAL_VALUE @foo2 + %6:gpr(p0) = COPY %7(p0) + %0:gpr(p0) = G_FRAME_INDEX %stack.0.retval + G_STORE %1(s32), %0(p0) :: (store 4 into %ir.retval) + %2:gpr(s32) = G_LOAD %3(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo1, i64 0, i64 0)`) + %5:gpr(s32) = G_LOAD %6(p0) :: (load 4 from `i32* getelementptr inbounds ([1073741824 x i32], [1073741824 x i32]* @foo2, i64 0, i64 0)`) + %8:gpr(s32) = G_ADD %2, %5 + %w0 = COPY %8(s32) + RET_ReallyLR implicit %w0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-mul.mir b/test/CodeGen/AArch64/GlobalISel/select-mul.mir new file mode 100644 index 000000000000..5b4971d41d8c --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-mul.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s +--- +name: mul_i64_sext_imm32 +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + +body: | + bb.0: + liveins: %w0 + + ; Make sure InstructionSelector is able to match a pattern + ; with an SDNodeXForm, trunc_imm. + ; def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), + ; (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; + ; CHECK-LABEL: name: mul_i64_sext_imm32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3 + ; CHECK: [[SMADDLrrr:%[0-9]+]]:gpr64 = SMADDLrrr [[COPY]], [[MOVi32imm]], %xzr + ; CHECK: %x0 = COPY [[SMADDLrrr]] + %0:gpr(s32) = COPY %w0 + %1:gpr(s64) = G_SEXT %0(s32) + %2:gpr(s64) = G_CONSTANT i64 3 + %3:gpr(s64) = G_MUL %1, %2 + %x0 = COPY %3(s64) +... + + diff --git a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll index 865315bbe0a3..8318e9e2ef01 100644 --- a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll +++ b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll @@ -1,85 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s %type = type [4 x {i8, i32}] +define i8* @translate_element_size1(i64 %arg) { +; CHECK-LABEL: name: translate_element_size1 +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[BASE:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 +; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]] + %tmp = getelementptr i8, i8* null, i64 %arg + ret i8* %tmp +} + define %type* @first_offset_const(%type* %addr) { -; CHECK-LABEL: name: first_offset_const -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 -; CHECK: [[RES:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) -; CHECK: %x0 = COPY [[RES]](p0) + ; CHECK-LABEL: name: first_offset_const + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: %x0 = COPY [[GEP]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type, %type* %addr, i32 1 ret %type* %res } define %type* @first_offset_trivial(%type* %addr) { -; CHECK-LABEL: name: first_offset_trivial -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[TRIVIAL:%[0-9]+]]:_(p0) = COPY [[BASE]](p0) -; CHECK: %x0 = COPY [[TRIVIAL]](p0) + ; CHECK-LABEL: name: first_offset_trivial + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) + ; CHECK: %x0 = COPY [[COPY1]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type, %type* %addr, i32 0 ret %type* %res } define %type* @first_offset_variable(%type* %addr, i64 %idx) { -; CHECK-LABEL: name: first_offset_variable -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 -; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[STEP0:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) -; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[STEP0]](p0) -; CHECK: %x0 = COPY [[RES]](p0) + ; CHECK-LABEL: name: first_offset_variable + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %x0, %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[COPY1]] + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) + ; CHECK: %x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type, %type* %addr, i64 %idx ret %type* %res } define %type* @first_offset_ext(%type* %addr, i32 %idx) { -; CHECK-LABEL: name: first_offset_ext -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IDX32:%[0-9]+]]:_(s32) = COPY %w1 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 -; CHECK: [[IDX64:%[0-9]+]]:_(s64) = G_SEXT [[IDX32]](s32) -; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX64]] -; CHECK: [[STEP0:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) -; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[STEP0]](p0) -; CHECK: %x0 = COPY [[RES]](p0) + ; CHECK-LABEL: name: first_offset_ext + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %w1, %x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[SEXT]] + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP]](p0) + ; CHECK: %x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type, %type* %addr, i32 %idx ret %type* %res } %type1 = type [4 x [4 x i32]] define i32* @const_then_var(%type1* %addr, i64 %idx) { -; CHECK-LABEL: name: const_then_var -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 -; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 272 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 -; CHECK: [[BASE1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET1]](s64) -; CHECK: [[OFFSET2:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[BASE2:%[0-9]+]]:_(p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) -; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[BASE2]](p0) -; CHECK: %x0 = COPY [[RES]](p0) + ; CHECK-LABEL: name: const_then_var + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %x0, %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 272 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C1]], [[COPY1]] + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[GEP]], [[MUL]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[GEP1]](p0) + ; CHECK: %x0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type1, %type1* %addr, i32 4, i32 1, i64 %idx ret i32* %res } define i32* @var_then_const(%type1* %addr, i64 %idx) { -; CHECK-LABEL: name: var_then_const -; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 -; CHECK: [[OFFSET2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 -; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[BASE1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET1]](s64) -; CHECK: [[BASE2:%[0-9]+]]:_(p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) -; CHECK: %x0 = COPY [[BASE2]](p0) + ; CHECK-LABEL: name: var_then_const + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: %x0, %x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[C]], [[COPY1]] + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[MUL]](s64) + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[GEP]], [[C1]](s64) + ; CHECK: %x0 = COPY [[GEP1]](p0) + ; CHECK: RET_ReallyLR implicit %x0 %res = getelementptr %type1, %type1* %addr, i64 %idx, i32 2, i32 2 ret i32* %res } diff --git a/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll b/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll new file mode 100644 index 000000000000..179dd518d3f0 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/unknown-intrinsic.ll @@ -0,0 +1,10 @@ +; RUN: llc -O0 -mtriple=arm64 < %s + +declare i8* @llvm.invariant.group.barrier(i8*) + +define i8* @barrier(i8* %p) { +; CHECK: bl llvm.invariant.group.barrier + %q = call i8* @llvm.invariant.group.barrier(i8* %p) + ret i8* %q +} + diff --git a/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir index 9a2f7f7e54f8..94a9134072a3 100644 --- a/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir +++ b/test/CodeGen/AArch64/GlobalISel/verify-regbankselected.mir @@ -9,8 +9,8 @@ ... --- # CHECK: *** Bad machine code: Generic virtual register must have a bank in a RegBankSelected function *** -# CHECK: instruction: %vreg0(s64) = COPY -# CHECK: operand 0: %vreg0 +# CHECK: instruction: %0:_(s64) = COPY +# CHECK: operand 0: %0 name: test regBankSelected: true registers: diff --git a/test/CodeGen/AArch64/GlobalISel/verify-selected.mir b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir index 2149903d08a7..772233ec1038 100644 --- a/test/CodeGen/AArch64/GlobalISel/verify-selected.mir +++ b/test/CodeGen/AArch64/GlobalISel/verify-selected.mir @@ -22,11 +22,11 @@ body: | %0 = COPY %x0 ; CHECK: *** Bad machine code: Unexpected generic instruction in a Selected function *** - ; CHECK: instruction: %vreg1 = G_ADD + ; CHECK: instruction: %1:gpr64 = G_ADD %1 = G_ADD %0, %0 ; CHECK: *** Bad machine code: Generic virtual register invalid in a Selected function *** - ; CHECK: instruction: %vreg2(s64) = COPY - ; CHECK: operand 0: %vreg2 + ; CHECK: instruction: %2:gpr(s64) = COPY + ; CHECK: operand 0: %2 %2(s64) = COPY %x0 ... diff --git a/test/CodeGen/AArch64/PBQP-csr.ll b/test/CodeGen/AArch64/PBQP-csr.ll index 16d7f8cb7a5a..e071eda17e35 100644 --- a/test/CodeGen/AArch64/PBQP-csr.ll +++ b/test/CodeGen/AArch64/PBQP-csr.ll @@ -22,7 +22,7 @@ entry: %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2 %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0 %0 = bitcast double* %x.i to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 72, i1 false) %1 = load i32, i32* %na, align 4 %cmp70 = icmp sgt i32 %1, 0 br i1 %cmp70, label %for.body.lr.ph, label %for.end @@ -87,5 +87,5 @@ for.end: ; preds = %for.end.loopexit, % } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) diff --git a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll index fb4df34df298..043ce0933a9b 100644 --- a/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll +++ b/test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll @@ -6,13 +6,13 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" declare void @extern(i8*) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 ; Function Attrs: nounwind define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg @@ -27,7 +27,7 @@ bb: define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll index 29b71e042611..55f6c01cbd9f 100644 --- a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll +++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll @@ -296,7 +296,7 @@ declare double @hh(double) #1 ; Check that we correctly deal with repeated operands. ; The following testcase creates: -; %D1 = FADDDrr %D0, %D0 +; %d1 = FADDDrr killed %d0, %d0 ; We'll get a crash if we naively look at the first operand, remove it ; from the substitution list then look at the second operand. diff --git a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir new file mode 100644 index 000000000000..19bdc4baac52 --- /dev/null +++ b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -0,0 +1,161 @@ +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# +name: f1_2s +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: fpr64 } +body: | + bb.0.entry: + %2:fpr64 = COPY %d2 + %1:fpr64 = COPY %d1 + %0:fpr64 = COPY %d0 + %3:fpr64 = FMULv2f32 %0, %1 + %4:fpr64 = FSUBv2f32 killed %3, %2 + %d0 = COPY %4 + RET_ReallyLR implicit %d0 + +... +# UNPROFITABLE-LABEL: name: f1_2s +# UNPROFITABLE: %3:fpr64 = FMULv2f32 %0, %1 +# UNPROFITABLE-NEXT: FSUBv2f32 killed %3, %2 +# +# PROFITABLE-LABEL: name: f1_2s +# PROFITABLE: %5:fpr64 = FNEGv2f32 %2 +# PROFITABLE-NEXT: FMLAv2f32 killed %5, %0, %1 +--- +name: f1_4s +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } +body: | + bb.0.entry: + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %3:fpr128 = FMULv4f32 %0, %1 + %4:fpr128 = FSUBv4f32 killed %3, %2 + %q0 = COPY %4 + RET_ReallyLR implicit %q0 + +... +# UNPROFITABLE-LABEL: name: f1_4s +# UNPROFITABLE: %3:fpr128 = FMULv4f32 %0, %1 +# UNPROFITABLE-NEXT: FSUBv4f32 killed %3, %2 +# +# PROFITABLE-LABEL: name: f1_4s +# PROFITABLE: %5:fpr128 = FNEGv4f32 %2 +# PROFITABLE-NEXT: FMLAv4f32 killed %5, %0, %1 +--- +name: f1_2d +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } +body: | + bb.0.entry: + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %3:fpr128 = FMULv2f64 %0, %1 + %4:fpr128 = FSUBv2f64 killed %3, %2 + %q0 = COPY %4 + RET_ReallyLR implicit %q0 + +... +# UNPROFITABLE-LABEL: name: f1_2d +# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1 +# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2 +# +# PROFITABLE-LABEL: name: f1_2d +# PROFITABLE: %5:fpr128 = FNEGv2f64 %2 +# PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1 +--- +name: f1_both_fmul_2s +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: fpr64 } + - { id: 5, class: fpr64 } + - { id: 6, class: fpr64 } +body: | + bb.0.entry: + %3:fpr64 = COPY %q3 + %2:fpr64 = COPY %q2 + %1:fpr64 = COPY %q1 + %0:fpr64 = COPY %q0 + %4:fpr64 = FMULv2f32 %0, %1 + %5:fpr64 = FMULv2f32 %2, %3 + %6:fpr64 = FSUBv2f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2s +# ALL: %4:fpr64 = FMULv2f32 %0, %1 +# ALL-NEXT: FMLSv2f32 killed %4, %2, %3 +--- +name: f1_both_fmul_4s +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv4f32 %0, %1 + %5:fpr128 = FMULv4f32 %2, %3 + %6:fpr128 = FSUBv4f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_4s +# ALL: %4:fpr128 = FMULv4f32 %0, %1 +# ALL-NEXT: FMLSv4f32 killed %4, %2, %3 +--- +name: f1_both_fmul_2d +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv2f64 %0, %1 + %5:fpr128 = FMULv2f64 %2, %3 + %6:fpr128 = FSUBv2f64 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2d +# ALL: %4:fpr128 = FMULv2f64 %0, %1 +# ALL-NEXT: FMLSv2f64 killed %4, %2, %3 + diff --git a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll index 51c32b409db5..eafb4126807f 100644 --- a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll +++ b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll @@ -508,12 +508,12 @@ block1: ; CHECK: ldr ; CHECK-NEXT: nop ; CHECK-NEXT: .Ltmp -; CHECK-NEXT: BB +; CHECK-NEXT: %bb. ; CHECK-NEXT: madd ; CHECK-NOWORKAROUND-LABEL: fall_through ; CHECK-NOWORKAROUND: ldr ; CHECK-NOWORKAROUND-NEXT: .Ltmp -; CHECK-NOWORKAROUND-NEXT: BB +; CHECK-NOWORKAROUND-NEXT: %bb. ; CHECK-NOWORKAROUND-NEXT: madd ; No checks for this, just check it doesn't crash diff --git a/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/test/CodeGen/AArch64/aarch64-stp-cluster.ll index 25cf313b81e7..5d6c5a7b2cad 100644 --- a/test/CodeGen/AArch64/aarch64-stp-cluster.ll +++ b/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -2,13 +2,13 @@ ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: stp_i64_scale:BB#0 +; CHECK-LABEL: stp_i64_scale:%bb.0 ; CHECK:Cluster ld/st SU(4) - SU(3) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(4): STRXui %vreg1, %vreg0, 1 -; CHECK:SU(3): STRXui %vreg1, %vreg0, 2 -; CHECK:SU(2): STRXui %vreg1, %vreg0, 3 -; CHECK:SU(5): STRXui %vreg1, %vreg0, 4 +; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1 +; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2 +; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3 +; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4 define i64 @stp_i64_scale(i64* nocapture %P, i64 %v) { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 @@ -23,13 +23,13 @@ entry: } ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: stp_i32_scale:BB#0 +; CHECK-LABEL: stp_i32_scale:%bb.0 ; CHECK:Cluster ld/st SU(4) - SU(3) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(4): STRWui %vreg1, %vreg0, 1 -; CHECK:SU(3): STRWui %vreg1, %vreg0, 2 -; CHECK:SU(2): STRWui %vreg1, %vreg0, 3 -; CHECK:SU(5): STRWui %vreg1, %vreg0, 4 +; CHECK:SU(4): STRWui %1:gpr32, %0:gpr64common, 1 +; CHECK:SU(3): STRWui %1:gpr32, %0:gpr64common, 2 +; CHECK:SU(2): STRWui %1:gpr32, %0:gpr64common, 3 +; CHECK:SU(5): STRWui %1:gpr32, %0:gpr64common, 4 define i32 @stp_i32_scale(i32* nocapture %P, i32 %v) { entry: %arrayidx = getelementptr inbounds i32, i32* %P, i32 3 @@ -44,13 +44,13 @@ entry: } ; CHECK:********** MI Scheduling ********** -; CHECK-LABEL:stp_i64_unscale:BB#0 entry +; CHECK-LABEL:stp_i64_unscale:%bb.0 entry ; CHECK:Cluster ld/st SU(5) - SU(2) ; CHECK:Cluster ld/st SU(4) - SU(3) -; CHECK:SU(5): STURXi %vreg1, %vreg0, -32 -; CHECK:SU(2): STURXi %vreg1, %vreg0, -24 -; CHECK:SU(4): STURXi %vreg1, %vreg0, -16 -; CHECK:SU(3): STURXi %vreg1, %vreg0, -8 +; CHECK:SU(5): STURXi %1:gpr64, %0:gpr64common, -32 +; CHECK:SU(2): STURXi %1:gpr64, %0:gpr64common, -24 +; CHECK:SU(4): STURXi %1:gpr64, %0:gpr64common, -16 +; CHECK:SU(3): STURXi %1:gpr64, %0:gpr64common, -8 define void @stp_i64_unscale(i64* nocapture %P, i64 %v) #0 { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 @@ -65,13 +65,13 @@ entry: } ; CHECK:********** MI Scheduling ********** -; CHECK-LABEL:stp_i32_unscale:BB#0 entry +; CHECK-LABEL:stp_i32_unscale:%bb.0 entry ; CHECK:Cluster ld/st SU(5) - SU(2) ; CHECK:Cluster ld/st SU(4) - SU(3) -; CHECK:SU(5): STURWi %vreg1, %vreg0, -16 -; CHECK:SU(2): STURWi %vreg1, %vreg0, -12 -; CHECK:SU(4): STURWi %vreg1, %vreg0, -8 -; CHECK:SU(3): STURWi %vreg1, %vreg0, -4 +; CHECK:SU(5): STURWi %1:gpr32, %0:gpr64common, -16 +; CHECK:SU(2): STURWi %1:gpr32, %0:gpr64common, -12 +; CHECK:SU(4): STURWi %1:gpr32, %0:gpr64common, -8 +; CHECK:SU(3): STURWi %1:gpr32, %0:gpr64common, -4 define void @stp_i32_unscale(i32* nocapture %P, i32 %v) #0 { entry: %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 @@ -86,13 +86,13 @@ entry: } ; CHECK:********** MI Scheduling ********** -; CHECK-LABEL:stp_double:BB#0 +; CHECK-LABEL:stp_double:%bb.0 ; CHECK:Cluster ld/st SU(3) - SU(4) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(3): STRDui %vreg1, %vreg0, 1 -; CHECK:SU(4): STRDui %vreg1, %vreg0, 2 -; CHECK:SU(2): STRDui %vreg1, %vreg0, 3 -; CHECK:SU(5): STRDui %vreg1, %vreg0, 4 +; CHECK:SU(3): STRDui %1:fpr64, %0:gpr64common, 1 +; CHECK:SU(4): STRDui %1:fpr64, %0:gpr64common, 2 +; CHECK:SU(2): STRDui %1:fpr64, %0:gpr64common, 3 +; CHECK:SU(5): STRDui %1:fpr64, %0:gpr64common, 4 define void @stp_double(double* nocapture %P, double %v) { entry: %arrayidx = getelementptr inbounds double, double* %P, i64 3 @@ -107,13 +107,13 @@ entry: } ; CHECK:********** MI Scheduling ********** -; CHECK-LABEL:stp_float:BB#0 +; CHECK-LABEL:stp_float:%bb.0 ; CHECK:Cluster ld/st SU(3) - SU(4) ; CHECK:Cluster ld/st SU(2) - SU(5) -; CHECK:SU(3): STRSui %vreg1, %vreg0, 1 -; CHECK:SU(4): STRSui %vreg1, %vreg0, 2 -; CHECK:SU(2): STRSui %vreg1, %vreg0, 3 -; CHECK:SU(5): STRSui %vreg1, %vreg0, 4 +; CHECK:SU(3): STRSui %1:fpr32, %0:gpr64common, 1 +; CHECK:SU(4): STRSui %1:fpr32, %0:gpr64common, 2 +; CHECK:SU(2): STRSui %1:fpr32, %0:gpr64common, 3 +; CHECK:SU(5): STRSui %1:fpr32, %0:gpr64common, 4 define void @stp_float(float* nocapture %P, float %v) { entry: %arrayidx = getelementptr inbounds float, float* %P, i64 3 @@ -128,12 +128,12 @@ entry: } ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: stp_volatile:BB#0 +; CHECK-LABEL: stp_volatile:%bb.0 ; CHECK-NOT: Cluster ld/st -; CHECK:SU(2): STRXui %vreg1, %vreg0, 3; mem:Volatile -; CHECK:SU(3): STRXui %vreg1, %vreg0, 2; mem:Volatile -; CHECK:SU(4): STRXui %vreg1, %vreg0, 1; mem:Volatile -; CHECK:SU(5): STRXui %vreg1, %vreg0, 4; mem:Volatile +; CHECK:SU(2): STRXui %1:gpr64, %0:gpr64common, 3; mem:Volatile +; CHECK:SU(3): STRXui %1:gpr64, %0:gpr64common, 2; mem:Volatile +; CHECK:SU(4): STRXui %1:gpr64, %0:gpr64common, 1; mem:Volatile +; CHECK:SU(5): STRXui %1:gpr64, %0:gpr64common, 4; mem:Volatile define i64 @stp_volatile(i64* nocapture %P, i64 %v) { entry: %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll index 7504439bab80..b51798be1697 100644 --- a/test/CodeGen/AArch64/aarch64_f16_be.ll +++ b/test/CodeGen/AArch64/aarch64_f16_be.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 -fast-isel < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 -fast-isel < %s | FileCheck %s --check-prefix=CHECK-BE define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) { ; CHECK-LABEL: test_bitcast_v8f16_to_v4f32: diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll index 932cd75052c1..4f902ef4fc83 100644 --- a/test/CodeGen/AArch64/analyze-branch.ll +++ b/test/CodeGen/AArch64/analyze-branch.ll @@ -18,7 +18,7 @@ define void @test_Bcc_fallthrough_taken(i32 %in) nounwind { ; CHECK: cmp {{w[0-9]+}}, #42 ; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_true ; CHECK: [[FALSE]]: @@ -41,7 +41,7 @@ define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind { ; CHECK: cmp {{w[0-9]+}}, #42 ; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_false ; CHECK: [[TRUE]]: @@ -62,7 +62,7 @@ define void @test_CBZ_fallthrough_taken(i32 %in) nounwind { br i1 %tst, label %true, label %false, !prof !0 ; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_true ; CHECK: [[FALSE]]: @@ -83,7 +83,7 @@ define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind { br i1 %tst, label %true, label %false, !prof !1 ; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_false ; CHECK: [[TRUE]]: @@ -104,7 +104,7 @@ define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind { br i1 %tst, label %true, label %false, !prof !0 ; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_true ; CHECK: [[FALSE]]: @@ -125,7 +125,7 @@ define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind { br i1 %tst, label %true, label %false, !prof !1 ; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_false ; CHECK: [[TRUE]]: @@ -147,7 +147,7 @@ define void @test_TBZ_fallthrough_taken(i32 %in) nounwind { br i1 %tst, label %true, label %false, !prof !0 ; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_true ; CHECK: [[FALSE]]: @@ -169,7 +169,7 @@ define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind { br i1 %tst, label %true, label %false, !prof !1 ; CHECK: tbz {{[wx][0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_false ; CHECK: [[TRUE]]: @@ -192,7 +192,7 @@ define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind { br i1 %tst, label %true, label %false, !prof !0 ; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_true ; CHECK: [[FALSE]]: @@ -214,7 +214,7 @@ define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind { br i1 %tst, label %true, label %false, !prof !1 ; CHECK: tbnz {{[wx][0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: // BB# +; CHECK-NEXT: // %bb. ; CHECK-NEXT: bl test_false ; CHECK: [[TRUE]]: diff --git a/test/CodeGen/AArch64/and-mask-removal.ll b/test/CodeGen/AArch64/and-mask-removal.ll index 8291516d81ea..c02bc881cd33 100644 --- a/test/CodeGen/AArch64/and-mask-removal.ll +++ b/test/CodeGen/AArch64/and-mask-removal.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel=false -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s @board = common global [400 x i8] zeroinitializer, align 1 @next_string = common global i32 0, align 4 diff --git a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll index b38b4f2a2b22..2b6cd7c2d285 100644 --- a/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ b/test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: str [[VAL2]], [x0] define void @foo(i8* %a) { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll index ef8d6f3b4ef9..bd0028c74528 100644 --- a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll +++ b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s ; LdStOpt bug created illegal instruction: -; %D1, %D2 = LDPSi %X0, 1 +; %d1, %d2 = LDPSi %x0, 1 ; rdar://11512047 %0 = type opaque diff --git a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll index 5a1eabc2ee6c..a1002989165c 100644 --- a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll +++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s ; The following 2 test cases test shufflevector with beginning UNDEF mask. define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) { diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll index d6a1686d5663..e0fa5dbbaf98 100644 --- a/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -164,10 +164,10 @@ entry: %4 = bitcast i8* %ap.align to %struct.s41* %5 = bitcast %struct.s41* %vs to i8* %6 = bitcast %struct.s41* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @bar2(i32 %x, i128 %s41.coerce) nounwind { entry: diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index 5be84b7d493b..bfc03c6b9757 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false < %s | FileCheck %s -; RUN: llc -O0 -mtriple=arm64-apple-darwin < %s | FileCheck --check-prefix=FAST %s +; RUN: llc -O0 -fast-isel -mtriple=arm64-apple-darwin < %s | FileCheck --check-prefix=FAST %s ; rdar://9932559 define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline { diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll index b844aab5628c..bfb74b598fff 100644 --- a/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/test/CodeGen/AArch64/arm64-abi_align.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s -; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s +; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s ; rdar://12648441 ; Generated from arm64-arguments.c with -O2. @@ -300,14 +300,14 @@ entry: %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, @@ -346,9 +346,9 @@ entry: %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call @@ -414,9 +414,9 @@ entry: %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call } @@ -465,9 +465,9 @@ entry: %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll index 6f88212cd39d..80e9b12089ce 100644 --- a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll +++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll @@ -51,6 +51,20 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) { ret void } +; CHECK-LABEL: test_i64_v4f16: +define void @test_i64_v4f16(<4 x half>* %p, i64* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to i64 + %4 = add i64 %3, %3 + store i64 %4, i64* %q + ret void +} + ; CHECK-LABEL: test_i64_v4i16: define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -140,6 +154,20 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) { ret void } +; CHECK-LABEL: test_f64_v4f16: +define void @test_f64_v4f16(<4 x half>* %p, double* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to double + %4 = fadd double %3, %3 + store double %4, double* %q + ret void +} + ; CHECK-LABEL: test_f64_v8i8: define void @test_f64_v8i8(<8 x i8>* %p, double* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -203,6 +231,20 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) { ret void } +; CHECK-LABEL: test_v1i64_v4f16: +define void @test_v1i64_v4f16(<4 x half>* %p, <1 x i64>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: str + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <1 x i64> + %4 = add <1 x i64> %3, %3 + store <1 x i64> %4, <1 x i64>* %q + ret void +} + ; CHECK-LABEL: test_v1i64_v4i16: define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.4h } @@ -293,6 +335,20 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) { ret void } +; CHECK-LABEL: test_v2f32_v4f16: +define void @test_v2f32_v4f16(<4 x half>* %p, <2 x float>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <2 x float> + %4 = fadd <2 x float> %3, %3 + store <2 x float> %4, <2 x float>* %q + ret void +} + ; CHECK-LABEL: test_v2f32_v8i8: define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -448,6 +504,19 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) { ret void } +; CHECK-LABEL: test_v4i16_v4f16: +define void @test_v4i16_v4f16(<4 x half>* %p, <4 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.4h } + %1 = load <4 x half>, <4 x half>* %p + %2 = fadd <4 x half> %1, %1 + %3 = bitcast <4 x half> %2 to <4 x i16> + %4 = add <4 x i16> %3, %3 + store <4 x i16> %4, <4 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v4i16_v8i8: define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8b } @@ -461,6 +530,103 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) { ret void } +; CHECK-LABEL: test_v4f16_i64: +define void @test_v4f16_i64(i64* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load i64, i64* %p + %2 = add i64 %1, %1 + %3 = bitcast i64 %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_f64: +define void @test_v4f16_f64(double* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load double, double* %p + %2 = fadd double %1, %1 + %3 = bitcast double %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v1i64: +define void @test_v4f16_v1i64(<1 x i64>* %p, <4 x half>* %q) { +; CHECK: ldr +; CHECK: rev64 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <1 x i64>, <1 x i64>* %p + %2 = add <1 x i64> %1, %1 + %3 = bitcast <1 x i64> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2f32: +define void @test_v4f16_v2f32(<2 x float>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <2 x float>, <2 x float>* %p + %2 = fadd <2 x float> %1, %1 + %3 = bitcast <2 x float> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v2i32: +define void @test_v4f16_v2i32(<2 x i32>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2s } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <2 x i32>, <2 x i32>* %p + %2 = add <2 x i32> %1, %1 + %3 = bitcast <2 x i32> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v4i16: +define void @test_v4f16_v4i16(<4 x i16>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.4h } +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <4 x i16>, <4 x i16>* %p + %2 = add <4 x i16> %1, %1 + %3 = bitcast <4 x i16> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + +; CHECK-LABEL: test_v4f16_v8i8: +define void @test_v4f16_v8i8(<8 x i8>* %p, <4 x half>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.8b } +; CHECK: rev16 v{{[0-9]+}}.8b +; CHECK: rev32 v{{[0-9]+}}.4h +; CHECK: st1 { v{{[0-9]+}}.2s } + %1 = load <8 x i8>, <8 x i8>* %p + %2 = add <8 x i8> %1, %1 + %3 = bitcast <8 x i8> %2 to <4 x half> + %4 = fadd <4 x half> %3, %3 + store <4 x half> %4, <4 x half>* %q + ret void +} + ; CHECK-LABEL: test_v8i8_i64: define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) { ; CHECK: ldr @@ -1007,6 +1173,19 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) { ret void } +; CHECK-LABEL: test_v8i16_v8f16: +define void @test_v8i16_v8f16(<8 x half>* %p, <8 x i16>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2d } +; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: st1 { v{{[0-9]+}}.8h } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <8 x i16> + %4 = add <8 x i16> %3, %3 + store <8 x i16> %4, <8 x i16>* %q + ret void +} + ; CHECK-LABEL: test_v8i16_v16i8: define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.16b } @@ -1087,6 +1266,20 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) { ret void } +; CHECK-LABEL: test_v16i8_v8f16: +define void @test_v16i8_v8f16(<8 x half>* %p, <16 x i8>* %q) { +; CHECK: ld1 { v{{[0-9]+}}.2d } +; CHECK: rev64 v{{[0-9]+}}.8h +; CHECK: rev16 v{{[0-9]+}}.16b +; CHECK: st1 { v{{[0-9]+}}.16b } + %1 = load <8 x half>, <8 x half>* %p + %2 = fadd <8 x half> %1, %1 + %3 = bitcast <8 x half> %2 to <16 x i8> + %4 = add <16 x i8> %3, %3 + store <16 x i8> %4, <16 x i8>* %q + ret void +} + ; CHECK-LABEL: test_v16i8_v8i16: define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { ; CHECK: ld1 { v{{[0-9]+}}.8h } @@ -1099,3 +1292,17 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) { store <16 x i8> %4, <16 x i8>* %q ret void } + +; CHECK-LABEL: test_v4f16_struct: +%struct.struct1 = type { half, half, half, half } +define %struct.struct1 @test_v4f16_struct(%struct.struct1* %ret) { +entry: +; CHECK: ld1 { {{v[0-9]+}}.2s } +; CHECK: rev32 +; CHECK-NOT; rev64 + %0 = bitcast %struct.struct1* %ret to <4 x half>* + %1 = load <4 x half>, <4 x half>* %0, align 2 + %2 = extractelement <4 x half> %1, i32 0 + %.fca.0.insert = insertvalue %struct.struct1 undef, half %2, 0 + ret %struct.struct1 %.fca.0.insert +} diff --git a/test/CodeGen/AArch64/arm64-build-vector.ll b/test/CodeGen/AArch64/arm64-build-vector.ll index 9d3247350499..68dea215c8c4 100644 --- a/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/test/CodeGen/AArch64/arm64-build-vector.ll @@ -1,23 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; Check that building up a vector w/ only one non-zero lane initializes -; intelligently. -define void @one_lane(i32* nocapture %out_int, i32 %skip0) nounwind { -; CHECK-LABEL: one_lane: -; CHECK: dup.16b v[[REG:[0-9]+]], wzr -; CHECK-NEXT: mov.b v[[REG]][0], w1 -; v and q are aliases, and str is preferred against st.16b when possible -; rdar://11246289 -; CHECK: str q[[REG]], [x0] -; CHECK: ret - %conv = trunc i32 %skip0 to i8 - %vset_lane = insertelement <16 x i8> , i8 %conv, i32 0 - %tmp = bitcast i32* %out_int to <4 x i32>* - %tmp1 = bitcast <16 x i8> %vset_lane to <4 x i32> - store <4 x i32> %tmp1, <4 x i32>* %tmp, align 16 - ret void -} - ; Check that building a vector from floats doesn't insert an unnecessary ; copy for lane zero. define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind { diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll index a910585e7f5d..b18e638a3a94 100644 --- a/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/test/CodeGen/AArch64/arm64-ccmp.ll @@ -132,6 +132,7 @@ if.end: ; Floating point compare. ; CHECK: single_fcmp +; CHECK: ; %bb. ; CHECK: cmp ; CHECK-NOT: b. ; CHECK: fccmp {{.*}}, #8, ge @@ -448,7 +449,7 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) { ; Test the IR CCs that expand to two cond codes. ; CHECK-LABEL: select_and_olt_one: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, mi ; CHECK-NEXT: fccmp d2, d3, #1, ne @@ -463,7 +464,7 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i } ; CHECK-LABEL: select_and_one_olt: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #1, ne ; CHECK-NEXT: fccmp d2, d3, #0, vc @@ -478,7 +479,7 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i } ; CHECK-LABEL: select_and_olt_ueq: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, mi ; CHECK-NEXT: fccmp d2, d3, #8, le @@ -493,7 +494,7 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i } ; CHECK-LABEL: select_and_ueq_olt: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #8, le ; CHECK-NEXT: fccmp d2, d3, #0, pl @@ -508,7 +509,7 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i } ; CHECK-LABEL: select_or_olt_one: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, pl ; CHECK-NEXT: fccmp d2, d3, #8, le @@ -523,7 +524,7 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3 } ; CHECK-LABEL: select_or_one_olt: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #1, ne ; CHECK-NEXT: fccmp d2, d3, #8, vs @@ -538,7 +539,7 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3 } ; CHECK-LABEL: select_or_olt_ueq: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, pl ; CHECK-NEXT: fccmp d2, d3, #1, ne @@ -553,7 +554,7 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3 } ; CHECK-LABEL: select_or_ueq_olt: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d0, d1, #8, le ; CHECK-NEXT: fccmp d2, d3, #8, mi @@ -568,7 +569,7 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3 } ; CHECK-LABEL: select_or_olt_ogt_ueq: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #0, pl ; CHECK-NEXT: fccmp d4, d5, #4, le @@ -586,7 +587,7 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3 } ; CHECK-LABEL: select_or_olt_ueq_ogt: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: fccmp d2, d3, #4, pl ; CHECK-NEXT: fccmp d2, d3, #1, ne @@ -606,7 +607,7 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3 ; Verify that we correctly promote f16. ; CHECK-LABEL: half_select_and_olt_oge: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0 ; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1 ; CHECK-NEXT: fcmp [[S0]], [[S1]] @@ -624,7 +625,7 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 } ; CHECK-LABEL: half_select_and_olt_one: -; CHECK-LABEL: ; BB#0: +; CHECK-LABEL: ; %bb.0: ; CHECK-DAG: fcvt [[S0:s[0-9]+]], h0 ; CHECK-DAG: fcvt [[S1:s[0-9]+]], h1 ; CHECK-NEXT: fcmp [[S0]], [[S1]] diff --git a/test/CodeGen/AArch64/arm64-csldst-mmo.ll b/test/CodeGen/AArch64/arm64-csldst-mmo.ll index 37cc5411aa31..dfb04fb07402 100644 --- a/test/CodeGen/AArch64/arm64-csldst-mmo.ll +++ b/test/CodeGen/AArch64/arm64-csldst-mmo.ll @@ -10,8 +10,8 @@ ; ; CHECK: Before post-MI-sched: ; CHECK-LABEL: # Machine code for function test1: -; CHECK: SU(2): STRWui %WZR -; CHECK: SU(3): %X21, %X20 = LDPXi %SP +; CHECK: SU(2): STRWui %wzr +; CHECK: SU(3): %x21, %x20 = frame-destroy LDPXi %sp, 2 ; CHECK: Predecessors: ; CHECK-NEXT: SU(0): Out ; CHECK-NEXT: SU(0): Out diff --git a/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll index 1bbcf50ba73c..d43efa7ee794 100644 --- a/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll +++ b/test/CodeGen/AArch64/arm64-dead-register-def-bug.ll @@ -3,7 +3,7 @@ ; Check that the dead register definition pass is considering implicit defs. ; When rematerializing through truncates, the coalescer may produce instructions ; with dead defs, but live implicit-defs of subregs: -; E.g. %X1 = MOVi64imm 2, %W1; %X1:GPR64, %W1:GPR32 +; E.g. dead %x1 = MOVi64imm 2, implicit-def %w1; %x1:GPR64, %w1:GPR32 ; These instructions are live, and their definitions should not be rewritten. ; ; diff --git a/test/CodeGen/AArch64/arm64-elf-constpool.ll b/test/CodeGen/AArch64/arm64-elf-constpool.ll index 95d334376b76..9f7a885f0087 100644 --- a/test/CodeGen/AArch64/arm64-elf-constpool.ll +++ b/test/CodeGen/AArch64/arm64-elf-constpool.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -O0 -o - %s | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O0 -fast-isel -o - %s | FileCheck %s ; O0 checked for fastisel purposes. It has a separate path which ; creates a constpool entry for floating values. diff --git a/test/CodeGen/AArch64/arm64-elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll index 92dc8179f8ea..6cb72e2e3f4e 100644 --- a/test/CodeGen/AArch64/arm64-elf-globals.ll +++ b/test/CodeGen/AArch64/arm64-elf-globals.ll @@ -1,11 +1,11 @@ ; RUN: llc -mtriple=arm64-linux-gnu -o - %s -mcpu=cyclone | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=arm64-linux-gnu -o - %s -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST ; RUN: llc -mtriple=arm64-linux-gnu -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-PIC -; RUN: llc -mtriple=arm64-linux-gnu -O0 -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC +; RUN: llc -mtriple=arm64-linux-gnu -O0 -fast-isel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC ; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -mcpu=cyclone | FileCheck %s -; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -o - %s -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST ; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-PIC -; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -O0 -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC +; RUN: llc -mtriple=aarch64-fuchsia -code-model=kernel -O0 -fast-isel -relocation-model=pic -o - %s -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST-PIC @var8 = external global i8, align 1 @var16 = external global i16, align 2 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll index bdc24aea2144..256db180d911 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll @@ -1,5 +1,5 @@ ; This test should cause the TargetMaterializeAlloca to be invoked -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -disable-fp-elim < %s | FileCheck %s %struct.S1Ty = type { i64 } %struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 55c9c6036ed5..87d6811f239e 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s define void @branch1() nounwind uwtable ssp { %x = alloca i32, align 4 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll index 59c4e38e5467..4cf23545aabc 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE -; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O0 -fast-isel -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=2 -code-model=large -verify-machineinstrs -disable-fp-elim -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE +; RUN: llc -O0 -fast-isel -fast-isel-abort=2 -code-model=small -verify-machineinstrs -disable-fp-elim -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE define void @call0() nounwind { entry: diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll index 16a02de79a91..b3e649c3fc33 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s ; Test fptosi define i32 @fptosi_wh(half %a) nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll index 1b6886523311..7b208cceb5b2 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s ;; Test various conversions. define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll index c77949f996c3..51ec377ccaf4 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define zeroext i1 @fcmp_float1(float %a) { ; CHECK-LABEL: fcmp_float1 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll index 85d000b8606b..00e2fab81f98 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ; Test load/store of global value from global offset table. @seed = common global i64 0, align 8 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll index 4bc02ebdd3e1..4288aa1df444 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define i32 @icmp_eq_imm(i32 %a) nounwind ssp { entry: diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll index a8f30ad4777d..e43160ab340c 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64 +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64 @message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16 @temp = common global [80 x i8] zeroinitializer, align 16 @@ -11,11 +11,11 @@ define void @t1() { ; ARM64: mov x2, #80 ; ARM64: uxtb w1, w9 ; ARM64: bl _memset - call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) define void @t2() { ; ARM64-LABEL: t2 @@ -25,11 +25,11 @@ define void @t2() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #80 ; ARM64: bl _memcpy - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t3() { ; ARM64-LABEL: t3 @@ -39,11 +39,11 @@ define void @t3() { ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #20 ; ARM64: bl _memmove - call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t4() { ; ARM64-LABEL: t4 @@ -58,7 +58,7 @@ define void @t4() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -75,7 +75,7 @@ define void @t5() { ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -92,7 +92,7 @@ define void @t6() { ; ARM64: ldrb w10, [x9, #8] ; ARM64: strb w10, [x8, #8] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false) ret void } @@ -111,7 +111,7 @@ define void @t7() { ; ARM64: ldrb w10, [x9, #6] ; ARM64: strb w10, [x8, #6] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false) ret void } @@ -130,7 +130,7 @@ define void @t8() { ; ARM64: ldrb w10, [x9, #3] ; ARM64: strb w10, [x8, #3] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false) ret void } @@ -143,6 +143,6 @@ define void @test_distant_memcpy(i8* %dst) { ; ARM64: strb [[BYTE]], [x0] %array = alloca i8, i32 8192 %elem = getelementptr i8, i8* %array, i32 8000 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i1 false) ret void } diff --git a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll index b5a08c148930..234731cfa242 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ; Materialize using fmov define float @fmov_float1() { diff --git a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll index 81daa7c1d5ac..d9997f916955 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios < %s | FileCheck %s ; Fast-isel can't do vector conversions yet, but it was emitting some highly ; suspect UCVTFUWDri MachineInstrs. diff --git a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll index 05aa96997b57..635e6b92542a 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll @@ -1,12 +1,12 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc %s -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc %s -O0 -fast-isel -fast-isel-abort=1 -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t ; RUN: FileCheck %s < %t --check-prefix=CHECK-SSA ; CHECK-SSA-LABEL: Machine code for function t1 -; CHECK-SSA: [[QUOTREG:%vreg[0-9]+]] = SDIVWr -; CHECK-SSA-NOT: [[QUOTREG]] = -; CHECK-SSA: {{%vreg[0-9]+}} = MSUBWrrr [[QUOTREG]] +; CHECK-SSA: [[QUOTREG:%[0-9]+]]:gpr32 = SDIVWr +; CHECK-SSA-NOT: [[QUOTREG]] = +; CHECK-SSA: {{%[0-9]+}}:gpr32 = MSUBWrrr killed [[QUOTREG]] ; CHECK-SSA-LABEL: Machine code for function t2 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll index 1f6a60e77cc3..9a67fff00ac3 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s ;; Test returns. define void @t0() nounwind ssp { diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll index 9f83a9c359a2..39934c4399b4 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s define void @t0(i32 %a) nounwind { entry: diff --git a/test/CodeGen/AArch64/arm64-fcmp-opt.ll b/test/CodeGen/AArch64/arm64-fcmp-opt.ll index e8b1557bac66..5155d49cc3fa 100644 --- a/test/CodeGen/AArch64/arm64-fcmp-opt.ll +++ b/test/CodeGen/AArch64/arm64-fcmp-opt.ll @@ -41,7 +41,7 @@ entry: define float @fcmp_oeq(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_oeq ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], eq @@ -53,7 +53,7 @@ define float @fcmp_oeq(float %a, float %b) nounwind ssp { define float @fcmp_ogt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ogt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], gt @@ -65,7 +65,7 @@ define float @fcmp_ogt(float %a, float %b) nounwind ssp { define float @fcmp_oge(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_oge ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ge @@ -77,7 +77,7 @@ define float @fcmp_oge(float %a, float %b) nounwind ssp { define float @fcmp_olt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_olt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], mi @@ -89,7 +89,7 @@ define float @fcmp_olt(float %a, float %b) nounwind ssp { define float @fcmp_ole(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ole ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ls @@ -101,7 +101,7 @@ define float @fcmp_ole(float %a, float %b) nounwind ssp { define float @fcmp_ord(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ord ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vc %cmp = fcmp ord float %a, %b @@ -112,7 +112,7 @@ define float @fcmp_ord(float %a, float %b) nounwind ssp { define float @fcmp_uno(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_uno ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], vs %cmp = fcmp uno float %a, %b @@ -123,7 +123,7 @@ define float @fcmp_uno(float %a, float %b) nounwind ssp { define float @fcmp_ugt(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ugt ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], hi %cmp = fcmp ugt float %a, %b @@ -134,7 +134,7 @@ define float @fcmp_ugt(float %a, float %b) nounwind ssp { define float @fcmp_uge(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_uge ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], pl %cmp = fcmp uge float %a, %b @@ -145,7 +145,7 @@ define float @fcmp_uge(float %a, float %b) nounwind ssp { define float @fcmp_ult(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ult ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], lt %cmp = fcmp ult float %a, %b @@ -156,7 +156,7 @@ define float @fcmp_ult(float %a, float %b) nounwind ssp { define float @fcmp_ule(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ule ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], le %cmp = fcmp ule float %a, %b @@ -167,7 +167,7 @@ define float @fcmp_ule(float %a, float %b) nounwind ssp { define float @fcmp_une(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_une ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel s0, s[[ONE]], s[[ZERO]], ne %cmp = fcmp une float %a, %b @@ -180,7 +180,7 @@ define float @fcmp_une(float %a, float %b) nounwind ssp { define float @fcmp_one(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_one ; fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], mi ; CHECK: fcsel s0, s[[ONE]], [[TMP]], gt @@ -194,7 +194,7 @@ define float @fcmp_one(float %a, float %b) nounwind ssp { define float @fcmp_ueq(float %a, float %b) nounwind ssp { ; CHECK-LABEL: @fcmp_ueq ; CHECK: fcmp s0, s1 -; CHECK-DAG: movi.2d v[[ZERO:[0-9]+]], #0 +; CHECK-DAG: fmov s[[ZERO:[0-9]+]], wzr ; CHECK-DAG: fmov s[[ONE:[0-9]+]], #1.0 ; CHECK: fcsel [[TMP:s[0-9]+]], s[[ONE]], s[[ZERO]], eq ; CHECK: fcsel s0, s[[ONE]], [[TMP]], vs diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll index 2ae0da2d89d1..3561d8fcdff9 100644 --- a/test/CodeGen/AArch64/arm64-fp128.ll +++ b/test/CodeGen/AArch64/arm64-fp128.ll @@ -195,7 +195,7 @@ define i32 @test_br_cc() { iftrue: ret i32 42 -; CHECK-NEXT: BB# +; CHECK-NEXT: %bb. ; CHECK-NEXT: mov w0, #42 ; CHECK: ret iffalse: @@ -211,7 +211,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { store fp128 %val, fp128* @lhs, align 16 ; CHECK: tst w0, #0x1 ; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB# +; CHECK-NEXT: %bb. ; CHECK-NEXT: mov v[[VAL:[0-9]+]].16b, v0.16b ; CHECK-NEXT: [[IFFALSE]]: ; CHECK: str q[[VAL]], [{{x[0-9]+}}, :lo12:lhs] diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll index a8d1c2482520..7b6146840fed 100644 --- a/test/CodeGen/AArch64/arm64-hello.ll +++ b/test/CodeGen/AArch64/arm64-hello.ll @@ -5,9 +5,9 @@ ; CHECK: sub sp, sp, #32 ; CHECK-NEXT: stp x29, x30, [sp, #16] ; CHECK-NEXT: add x29, sp, #16 -; CHECK-NEXT: stur wzr, [x29, #-4] ; CHECK: adrp x0, l_.str@PAGE ; CHECK: add x0, x0, l_.str@PAGEOFF +; CHECK-NEXT: stur wzr, [x29, #-4] ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; CHECK-NEXT: add sp, sp, #32 @@ -15,9 +15,9 @@ ; CHECK-LINUX-LABEL: main: ; CHECK-LINUX: str x30, [sp, #-16]! -; CHECK-LINUX-NEXT: str wzr, [sp, #12] ; CHECK-LINUX: adrp x0, .L.str ; CHECK-LINUX: add x0, x0, :lo12:.L.str +; CHECK-LINUX-NEXT: str wzr, [sp, #12] ; CHECK-LINUX-NEXT: bl puts ; CHECK-LINUX-NEXT: ldr x30, [sp], #16 ; CHECK-LINUX-NEXT: ret diff --git a/test/CodeGen/AArch64/arm64-icmp-opt.ll b/test/CodeGen/AArch64/arm64-icmp-opt.ll index 12eae0e88fbe..1ed5c5ee135c 100644 --- a/test/CodeGen/AArch64/arm64-icmp-opt.ll +++ b/test/CodeGen/AArch64/arm64-icmp-opt.ll @@ -7,7 +7,7 @@ define i32 @t1(i64 %a) { ; CHECK-LABEL: t1: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: lsr x8, x0, #63 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index cdbadfe51f0c..b63e739f577d 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6176,7 +6176,7 @@ define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i6 ; Check for dependencies between the vector and the scalar load. define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) { ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load: -; CHECK: BB#0: +; CHECK: %bb.0: ; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0] ; CHECK-NEXT: str q0, [x3] ; CHECK-NEXT: ldr q0, [x4] diff --git a/test/CodeGen/AArch64/arm64-jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll index f5c2ee6da0bf..fac3e5704d15 100644 --- a/test/CodeGen/AArch64/arm64-jumptable.ll +++ b/test/CodeGen/AArch64/arm64-jumptable.ll @@ -6,22 +6,20 @@ define void @sum(i32 %a, i32* %to, i32 %c) { entry: switch i32 %a, label %exit [ i32 1, label %bb1 - i32 2, label %bb2 + i32 2, label %exit.sink.split i32 3, label %bb3 i32 4, label %bb4 ] bb1: %b = add i32 %c, 1 - store i32 %b, i32* %to - br label %exit -bb2: - store i32 2, i32* %to - br label %exit + br label %exit.sink.split bb3: - store i32 3, i32* %to - br label %exit + br label %exit.sink.split bb4: - store i32 5, i32* %to + br label %exit.sink.split +exit.sink.split: + %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 3, %bb3 ], [ %a, %entry ] + store i32 %.sink, i32* %to br label %exit exit: ret void diff --git a/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/test/CodeGen/AArch64/arm64-ldp-cluster.ll index 64e535ca7499..75b02b9d9134 100644 --- a/test/CodeGen/AArch64/arm64-ldp-cluster.ll +++ b/test/CodeGen/AArch64/arm64-ldp-cluster.ll @@ -4,15 +4,15 @@ ; Test ldr clustering. ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldr_int:BB#0 +; CHECK-LABEL: ldr_int:%bb.0 ; CHECK: Cluster ld/st SU(1) - SU(2) -; CHECK: SU(1): %vreg{{[0-9]+}} = LDRWui -; CHECK: SU(2): %vreg{{[0-9]+}} = LDRWui +; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldr_int:BB#0 +; EXYNOS-LABEL: ldr_int:%bb.0 ; EXYNOS: Cluster ld/st SU(1) - SU(2) -; EXYNOS: SU(1): %vreg{{[0-9]+}} = LDRWui -; EXYNOS: SU(2): %vreg{{[0-9]+}} = LDRWui +; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui define i32 @ldr_int(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 1 %tmp1 = load i32, i32* %p1, align 2 @@ -24,15 +24,15 @@ define i32 @ldr_int(i32* %a) nounwind { ; Test ldpsw clustering ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldp_sext_int:BB#0 +; CHECK-LABEL: ldp_sext_int:%bb.0 ; CHECK: Cluster ld/st SU(1) - SU(2) -; CHECK: SU(1): %vreg{{[0-9]+}} = LDRSWui -; CHECK: SU(2): %vreg{{[0-9]+}} = LDRSWui +; CHECK: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui +; CHECK: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_sext_int:BB#0 +; EXYNOS-LABEL: ldp_sext_int:%bb.0 ; EXYNOS: Cluster ld/st SU(1) - SU(2) -; EXYNOS: SU(1): %vreg{{[0-9]+}} = LDRSWui -; EXYNOS: SU(2): %vreg{{[0-9]+}} = LDRSWui +; EXYNOS: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOS: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui define i64 @ldp_sext_int(i32* %p) nounwind { %tmp = load i32, i32* %p, align 4 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1 @@ -45,15 +45,15 @@ define i64 @ldp_sext_int(i32* %p) nounwind { ; Test ldur clustering. ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldur_int:BB#0 +; CHECK-LABEL: ldur_int:%bb.0 ; CHECK: Cluster ld/st SU(2) - SU(1) -; CHECK: SU(1): %vreg{{[0-9]+}} = LDURWi -; CHECK: SU(2): %vreg{{[0-9]+}} = LDURWi +; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDURWi +; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDURWi ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldur_int:BB#0 +; EXYNOS-LABEL: ldur_int:%bb.0 ; EXYNOS: Cluster ld/st SU(2) - SU(1) -; EXYNOS: SU(1): %vreg{{[0-9]+}} = LDURWi -; EXYNOS: SU(2): %vreg{{[0-9]+}} = LDURWi +; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDURWi +; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDURWi define i32 @ldur_int(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 -1 %tmp1 = load i32, i32* %p1, align 2 @@ -65,15 +65,15 @@ define i32 @ldur_int(i32* %a) nounwind { ; Test sext + zext clustering. ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldp_half_sext_zext_int:BB#0 +; CHECK-LABEL: ldp_half_sext_zext_int:%bb.0 ; CHECK: Cluster ld/st SU(3) - SU(4) -; CHECK: SU(3): %vreg{{[0-9]+}} = LDRSWui -; CHECK: SU(4): %vreg{{[0-9]+}}:sub_32 = LDRWui +; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui +; CHECK: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_half_sext_zext_int:BB#0 +; EXYNOS-LABEL: ldp_half_sext_zext_int:%bb.0 ; EXYNOS: Cluster ld/st SU(3) - SU(4) -; EXYNOS: SU(3): %vreg{{[0-9]+}} = LDRSWui -; EXYNOS: SU(4): %vreg{{[0-9]+}}:sub_32 = LDRWui +; EXYNOS: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui +; EXYNOS: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { %tmp0 = load i64, i64* %q, align 4 %tmp = load i32, i32* %p, align 4 @@ -88,15 +88,15 @@ define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind { ; Test zext + sext clustering. ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldp_half_zext_sext_int:BB#0 +; CHECK-LABEL: ldp_half_zext_sext_int:%bb.0 ; CHECK: Cluster ld/st SU(3) - SU(4) -; CHECK: SU(3): %vreg{{[0-9]+}}:sub_32 = LDRWui -; CHECK: SU(4): %vreg{{[0-9]+}} = LDRSWui +; CHECK: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldp_half_zext_sext_int:BB#0 +; EXYNOS-LABEL: ldp_half_zext_sext_int:%bb.0 ; EXYNOS: Cluster ld/st SU(3) - SU(4) -; EXYNOS: SU(3): %vreg{{[0-9]+}}:sub_32 = LDRWui -; EXYNOS: SU(4): %vreg{{[0-9]+}} = LDRSWui +; EXYNOS: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui +; EXYNOS: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { %tmp0 = load i64, i64* %q, align 4 %tmp = load i32, i32* %p, align 4 @@ -111,15 +111,15 @@ define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind { ; Verify we don't cluster volatile loads. ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldr_int_volatile:BB#0 +; CHECK-LABEL: ldr_int_volatile:%bb.0 ; CHECK-NOT: Cluster ld/st -; CHECK: SU(1): %vreg{{[0-9]+}} = LDRWui -; CHECK: SU(2): %vreg{{[0-9]+}} = LDRWui +; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldr_int_volatile:BB#0 +; EXYNOS-LABEL: ldr_int_volatile:%bb.0 ; EXYNOS-NOT: Cluster ld/st -; EXYNOS: SU(1): %vreg{{[0-9]+}} = LDRWui -; EXYNOS: SU(2): %vreg{{[0-9]+}} = LDRWui +; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui +; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui define i32 @ldr_int_volatile(i32* %a) nounwind { %p1 = getelementptr inbounds i32, i32* %a, i32 1 %tmp1 = load volatile i32, i32* %p1, align 2 @@ -131,12 +131,12 @@ define i32 @ldr_int_volatile(i32* %a) nounwind { ; Test ldq clustering (no clustering for Exynos). ; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: ldq_cluster:BB#0 +; CHECK-LABEL: ldq_cluster:%bb.0 ; CHECK: Cluster ld/st SU(1) - SU(3) -; CHECK: SU(1): %vreg{{[0-9]+}} = LDRQui -; CHECK: SU(3): %vreg{{[0-9]+}} = LDRQui +; CHECK: SU(1): %{{[0-9]+}}:fpr128 = LDRQui +; CHECK: SU(3): %{{[0-9]+}}:fpr128 = LDRQui ; EXYNOS: ********** MI Scheduling ********** -; EXYNOS-LABEL: ldq_cluster:BB#0 +; EXYNOS-LABEL: ldq_cluster:%bb.0 ; EXYNOS-NOT: Cluster ld/st define <2 x i64> @ldq_cluster(i64* %p) { %a1 = bitcast i64* %p to <2 x i64>* diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll index 0590031fbcdc..4f8f3a227bb8 100644 --- a/test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -22,7 +22,7 @@ entry: ; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -33,7 +33,7 @@ entry: ; CHECK: stur [[DEST]], [x0, #15] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -45,7 +45,7 @@ entry: ; CHECK: str [[REG3]], [x0, #32] ; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: stp [[DEST1]], [[DEST2]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -56,7 +56,7 @@ entry: ; CHECK: str [[REG4]], [x0, #16] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -67,7 +67,7 @@ entry: ; CHECK: strh [[REG5]], [x0, #16] ; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: str [[REG6]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -80,7 +80,7 @@ entry: ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -91,7 +91,7 @@ entry: ; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6] ; CHECK: ldr ; CHECK: str - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -104,9 +104,9 @@ entry: ; CHECK: str [[REG10]], [x0] %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll index 8c872cc61500..ecdfcc6673aa 100644 --- a/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -5,7 +5,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK: str wzr, [x0, #8] ; CHECK: str xzr, [x0] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -17,11 +17,11 @@ entry: ; CHECK: str xzr, [sp, #8] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll index 29036caabf3a..87a0232c734a 100644 --- a/test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ b/test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -4,41 +4,43 @@ ; RUN: FileCheck --check-prefix=CHECK-LINUX --check-prefix=CHECK %s ; ARM64: Calls to bzero() replaced with calls to memset() -; CHECK: @fct1 +; CHECK-LABEL: fct1: ; For small size (<= 256), we do not change memset to bzero. -; CHECK: memset +; CHECK-DARWIN: {{b|bl}} _memset +; CHECK-LINUX: {{b|bl}} memset define void @fct1(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) -; CHECK: @fct2 +; CHECK-LABEL: fct2: ; When the size is bigger than 256, change into bzero. -; CHECK-DARWIN: bzero -; CHECK-LINUX: memset +; CHECK-DARWIN: {{b|bl}} _bzero +; CHECK-LINUX: {{b|bl}} memset define void @fct2(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) ret void } -; CHECK: @fct3 +; CHECK-LABEL: fct3: ; For unknown size, change to bzero. -; CHECK-DARWIN: bzero -; CHECK-LINUX: memset +; CHECK-DARWIN: {{b|bl}} _bzero +; CHECK-LINUX: {{b|bl}} memset define void @fct3(i8* nocapture %ptr, i32 %unknown) { entry: %conv = sext i32 %unknown to i64 - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) ret void } -; CHECK: @fct4 +; CHECK-LABEL: fct4: ; Size <= 256, no change. -; CHECK: memset +; CHECK-DARWIN: {{b|bl}} _memset +; CHECK-LINUX: {{b|bl}} memset define void @fct4(i8* %ptr) { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -50,10 +52,10 @@ declare i8* @__memset_chk(i8*, i32, i64, i64) declare i64 @llvm.objectsize.i64(i8*, i1) -; CHECK: @fct5 +; CHECK-LABEL: fct5: ; Size > 256, change. -; CHECK-DARWIN: bzero -; CHECK-LINUX: memset +; CHECK-DARWIN: {{b|bl}} _bzero +; CHECK-LINUX: {{b|bl}} memset define void @fct5(i8* %ptr) { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -61,10 +63,10 @@ entry: ret void } -; CHECK: @fct6 +; CHECK-LABEL: fct6: ; Size = unknown, change. -; CHECK-DARWIN: bzero -; CHECK-LINUX: memset +; CHECK-DARWIN: {{b|bl}} _bzero +; CHECK-LINUX: {{b|bl}} memset define void @fct6(i8* %ptr, i32 %unknown) { entry: %conv = sext i32 %unknown to i64 @@ -76,9 +78,10 @@ entry: ; Next functions check that memset is not turned into bzero ; when the set constant is non-zero, whatever the given size. -; CHECK: @fct7 +; CHECK-LABEL: fct7: ; memset with something that is not a zero, no change. -; CHECK: memset +; CHECK-DARWIN: {{b|bl}} _memset +; CHECK-LINUX: {{b|bl}} memset define void @fct7(i8* %ptr) { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -86,9 +89,10 @@ entry: ret void } -; CHECK: @fct8 +; CHECK-LABEL: fct8: ; memset with something that is not a zero, no change. -; CHECK: memset +; CHECK-DARWIN: {{b|bl}} _memset +; CHECK-LINUX: {{b|bl}} memset define void @fct8(i8* %ptr) { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -96,9 +100,10 @@ entry: ret void } -; CHECK: @fct9 +; CHECK-LABEL: fct9: ; memset with something that is not a zero, no change. -; CHECK: memset +; CHECK-DARWIN: {{b|bl}} _memset +; CHECK-LINUX: {{b|bl}} memset define void @fct9(i8* %ptr, i32 %unknown) { entry: %conv = sext i32 %unknown to i64 diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll index 85572f2cf0f8..7ecf214b4bed 100644 --- a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -7,8 +7,8 @@ define void @t0(i8* %out, i8* %in) { ; CHECK: orr w2, wzr, #0x10 ; CHECK-NEXT: bl _memcpy entry: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll index 307d1ec1aa8c..f0b9ccc8b5d1 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -8,7 +8,7 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: main -; CHECK: *** Final schedule for BB#2 *** +; CHECK: *** Final schedule for %bb.2 *** ; CHECK: MADDWrrr ; CHECK: ADDWri ; CHECK: ********** INTERVALS ********** @@ -26,9 +26,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -83,8 +83,8 @@ for.end: ; preds = %for.cond ; after it, this test checks to make sure there are more than one. ; ; CHECK: ********** MI Scheduling ********** -; CHECK: neon4xfloat:BB#0 -; CHECK: *** Final schedule for BB#0 *** +; CHECK: neon4xfloat:%bb.0 +; CHECK: *** Final schedule for %bb.0 *** ; CHECK: FDIVv4f32 ; CHECK: FADDv4f32 ; CHECK: FADDv4f32 @@ -105,7 +105,7 @@ define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } @@ -130,7 +130,7 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) ; are otherwise ready are jammed in the pending queue. ; CHECK: ********** MI Scheduling ********** ; CHECK: testResourceConflict -; CHECK: *** Final schedule for BB#0 *** +; CHECK: *** Final schedule for %bb.0 *** ; CHECK: BRK ; CHECK: ********** INTERVALS ********** define void @testResourceConflict(float* %ptr) { @@ -178,7 +178,7 @@ declare void @llvm.trap() ; Resource contention on LDST. ; CHECK: ********** MI Scheduling ********** ; CHECK: testLdStConflict -; CHECK: *** Final schedule for BB#1 *** +; CHECK: *** Final schedule for %bb.1 *** ; CHECK: LD4Fourv2d ; CHECK: STRQui ; CHECK: ********** INTERVALS ********** diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll index 82ba18ce72ca..c2f53e88a95a 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -8,10 +8,10 @@ ; ; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** -; CHECK: main:BB#2 +; CHECK: main:%bb.2 ; CHECK: LDR ; CHECK: Latency : 4 -; CHECK: *** Final schedule for BB#2 *** +; CHECK: *** Final schedule for %bb.2 *** ; CHECK: LDR ; CHECK: LDR ; CHECK-NOT: LDR @@ -32,9 +32,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -106,7 +106,7 @@ for.end: ; preds = %for.cond ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll index ad4feef7280f..8af6b8220470 100644 --- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll @@ -6,10 +6,10 @@ ; ; CHECK: ********** MI Scheduling ********** ; CHECK: shiftable -; CHECK: SU(2): %vreg2 = SUBXri %vreg1, 20, 0 +; CHECK: SU(2): %2:gpr64common = SUBXri %1:gpr64common, 20, 0 ; CHECK: Successors: -; CHECK-NEXT: SU(4): Data Latency=1 Reg=%vreg2 -; CHECK-NEXT: SU(3): Data Latency=2 Reg=%vreg2 +; CHECK-NEXT: SU(4): Data Latency=1 Reg=%2 +; CHECK-NEXT: SU(3): Data Latency=2 Reg=%2 ; CHECK: ********** INTERVALS ********** define i64 @shiftable(i64 %A, i64 %B) { %tmp0 = sub i64 %B, 20 diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll index 9cbf0cb3803a..88d6a68ee014 100644 --- a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll +++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -4,16 +4,16 @@ ; Test for bug in misched memory dependency calculation. ; ; CHECK: ********** MI Scheduling ********** -; CHECK: misched_bug:BB#0 entry -; CHECK: SU(2): %vreg2 = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0 +; CHECK: misched_bug:%bb.0 entry +; CHECK: SU(2): %2:gpr32 = LDRWui %0:gpr64common, 1; mem:LD4[%ptr1_plus1] ; CHECK: Successors: -; CHECK-NEXT: SU(5): Data Latency=4 Reg=%vreg2 +; CHECK-NEXT: SU(5): Data Latency=4 Reg=%2 ; CHECK-NEXT: SU(4): Ord Latency=0 -; CHECK: SU(3): STRWui %WZR, %vreg0, 0; mem:ST4[%ptr1] GPR64common:%vreg0 +; CHECK: SU(3): STRWui %wzr, %0:gpr64common, 0; mem:ST4[%ptr1] ; CHECK: Successors: ; CHECK: SU(4): Ord Latency=0 -; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1 -; CHECK: SU(5): %W0 = COPY %vreg2; GPR32:%vreg2 +; CHECK: SU(4): STRWui %wzr, %1:gpr64common, 0; mem:ST4[%ptr2] +; CHECK: SU(5): %w0 = COPY %2 ; CHECK: ** ScheduleDAGMI::schedule picking next node define i32 @misched_bug(i32* %ptr1, i32* %ptr2) { entry: diff --git a/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/test/CodeGen/AArch64/arm64-misched-multimmo.ll index 75f45da0e48f..47f2ec790c7a 100644 --- a/test/CodeGen/AArch64/arm64-misched-multimmo.ll +++ b/test/CodeGen/AArch64/arm64-misched-multimmo.ll @@ -8,11 +8,11 @@ ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling. ; ; CHECK-LABEL: # Machine code for function foo: -; CHECK: SU(2): %W{{[0-9]+}}, %W{{[0-9]+}} = LDPWi +; CHECK: SU(2): renamable %w{{[0-9]+}}, renamable %w{{[0-9]+}} = LDPWi ; CHECK: Successors: ; CHECK-NOT: ch SU(4) ; CHECK: SU(3) -; CHECK: SU(4): STRWui %WZR, %X{{[0-9]+}} +; CHECK: SU(4): STRWui %wzr, renamable %x{{[0-9]+}} define i32 @foo() { entry: %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @G2, i64 0, i64 0), align 4 diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll index 7b2433099031..b3a2bcd5d669 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,GENERIC ; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck %s --check-prefixes=CHECK,EXYNOSM1 declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) @@ -47,7 +47,6 @@ declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmla_lane_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -58,7 +57,6 @@ entry: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlaq_lane_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -69,7 +67,6 @@ entry: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmla_lane_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -80,7 +77,6 @@ entry: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlaq_lane_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -91,7 +87,6 @@ entry: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmla_laneq_s16: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -102,7 +97,6 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s16: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -113,7 +107,6 @@ entry: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmla_laneq_s32: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -124,7 +117,6 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s32: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -135,7 +127,6 @@ entry: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmls_lane_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -146,7 +137,6 @@ entry: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsq_lane_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -157,7 +147,6 @@ entry: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmls_lane_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -168,7 +157,6 @@ entry: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsq_lane_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -179,7 +167,6 @@ entry: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmls_laneq_s16: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -190,7 +177,6 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s16: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -201,7 +187,6 @@ entry: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmls_laneq_s32: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -212,7 +197,6 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s32: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -223,7 +207,6 @@ entry: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -233,7 +216,6 @@ entry: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -243,7 +225,6 @@ entry: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -253,7 +234,6 @@ entry: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -263,7 +243,6 @@ entry: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -273,7 +252,6 @@ entry: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -283,7 +261,6 @@ entry: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -293,7 +270,6 @@ entry: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -303,7 +279,6 @@ entry: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_s16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -313,7 +288,6 @@ entry: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_s16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -323,7 +297,6 @@ entry: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_s32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -333,7 +306,6 @@ entry: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_s32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -343,7 +315,6 @@ entry: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_u16: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -353,7 +324,6 @@ entry: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_u16: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -363,7 +333,6 @@ entry: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_u32: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -373,7 +342,6 @@ entry: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_u32: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -382,12 +350,9 @@ entry: define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfma_lane_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfma_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -398,12 +363,9 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmaq_lane_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -414,12 +376,9 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfma_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfma_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -428,12 +387,9 @@ entry: define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmaq_laneq_f32: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -442,12 +398,9 @@ entry: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfms_lane_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfms_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> @@ -457,12 +410,9 @@ entry: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmsq_lane_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> @@ -472,12 +422,9 @@ entry: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfms_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfms_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> @@ -487,12 +434,9 @@ entry: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmsq_laneq_f32: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> @@ -502,12 +446,9 @@ entry: define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK-LABEL: test_vfmaq_lane_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_lane_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -518,12 +459,9 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmaq_laneq_f64: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_laneq_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -532,12 +470,9 @@ entry: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { ; CHECK-LABEL: test_vfmsq_lane_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_lane_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %sub = fsub <1 x double> , %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -547,12 +482,9 @@ entry: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmsq_laneq_f64: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_laneq_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> @@ -563,10 +495,6 @@ entry: define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmas_laneq_f32 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXNOS-LABEL: test_vfmas_laneq_f32 -; EXNOS: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; EXNOS-NEXT: ret entry: %extract = extractelement <4 x float> %v, i32 3 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) @@ -578,7 +506,6 @@ declare float @llvm.fma.f32(float, float, float) define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { ; CHECK-LABEL: test_vfmsd_lane_f64 ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: ret entry: %extract.rhs = extractelement <1 x double> %v, i32 0 %extract = fsub double -0.000000e+00, %extract.rhs @@ -591,10 +518,6 @@ declare double @llvm.fma.f64(double, double, double) define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmss_lane_f32 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmss_lane_f32 -; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] -; EXYNOS-NEXT: ret entry: %extract.rhs = extractelement <2 x float> %v, i32 1 %extract = fsub float -0.000000e+00, %extract.rhs @@ -605,7 +528,6 @@ entry: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmss_laneq_f32 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %extract.rhs = extractelement <4 x float> %v, i32 3 %extract = fsub float -0.000000e+00, %extract.rhs @@ -616,10 +538,6 @@ entry: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmsd_laneq_f64 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsd_laneq_f64 -; EXYNOS: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; EXYNOS-NEXT: ret entry: %extract.rhs = extractelement <2 x double> %v, i32 1 %extract = fsub double -0.000000e+00, %extract.rhs @@ -641,10 +559,6 @@ entry: define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmss_lane_f32_0 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmss_lane_f32_0 -; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] -; EXYNOS-NEXT: ret entry: %tmp0 = fsub <2 x float> , %v %tmp1 = extractelement <2 x float> %tmp0, i32 1 @@ -655,7 +569,6 @@ entry: define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmss_laneq_f32_0 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %tmp0 = fsub <4 x float>, %v %tmp1 = extractelement <4 x float> %tmp0, i32 3 @@ -666,7 +579,6 @@ entry: define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmsd_laneq_f64_0 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret entry: %tmp0 = fsub <2 x double>, %v %tmp1 = extractelement <2 x double> %tmp0, i32 1 @@ -677,7 +589,6 @@ entry: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -688,7 +599,6 @@ entry: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -699,7 +609,6 @@ entry: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -710,7 +619,6 @@ entry: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_s32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -721,7 +629,6 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -733,7 +640,6 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -745,7 +651,6 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -757,7 +662,6 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -769,7 +673,6 @@ entry: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -780,7 +683,6 @@ entry: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -791,7 +693,6 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -802,7 +703,6 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -813,7 +713,6 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -825,7 +724,6 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -837,7 +735,6 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -849,7 +746,6 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -861,7 +757,6 @@ entry: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -872,7 +767,6 @@ entry: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -883,7 +777,6 @@ entry: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_u16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -894,7 +787,6 @@ entry: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_u32: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -905,7 +797,6 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -917,7 +808,6 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -929,7 +819,6 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u16: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -941,7 +830,6 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u32: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -953,7 +841,6 @@ entry: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -964,7 +851,6 @@ entry: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -975,7 +861,6 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u16: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -986,7 +871,6 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u32: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -997,7 +881,6 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1009,7 +892,6 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1021,7 +903,6 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u16: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1033,7 +914,6 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u32: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1045,7 +925,6 @@ entry: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1055,7 +934,6 @@ entry: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1065,7 +943,6 @@ entry: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1075,7 +952,6 @@ entry: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1085,7 +961,6 @@ entry: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1096,7 +971,6 @@ entry: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1107,7 +981,6 @@ entry: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1118,7 +991,6 @@ entry: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1129,7 +1001,6 @@ entry: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_s16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1139,7 +1010,6 @@ entry: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_s32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1149,7 +1019,6 @@ entry: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_u16: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1159,7 +1028,6 @@ entry: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_u32: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1169,7 +1037,6 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1180,7 +1047,6 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1191,7 +1057,6 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u16: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1202,7 +1067,6 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u32: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1213,7 +1077,6 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s16: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1224,7 +1087,6 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s32: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1235,7 +1097,6 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s16: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1247,7 +1108,6 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s32: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1259,7 +1119,6 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s16: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1270,7 +1129,6 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s32: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1281,7 +1139,6 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s16: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1293,7 +1150,6 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s32: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1305,7 +1161,6 @@ entry: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_lane_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1315,7 +1170,6 @@ entry: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_lane_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1325,7 +1179,6 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s16: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1335,7 +1188,6 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s32: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1345,7 +1197,6 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1356,7 +1207,6 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1367,7 +1217,6 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s16: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1378,7 +1227,6 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s32: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1389,7 +1237,6 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1399,7 +1246,6 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1409,7 +1255,6 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1419,7 +1264,6 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s32: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1429,7 +1273,6 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1439,7 +1282,6 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s16: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1449,7 +1291,6 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1459,7 +1300,6 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s32: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1468,12 +1308,9 @@ entry: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmul_lane_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1483,10 +1320,6 @@ entry: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmul_lane_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_lane_f64: -; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1498,12 +1331,9 @@ entry: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulq_lane_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1512,12 +1342,9 @@ entry: define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulq_lane_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_lane_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1526,12 +1353,9 @@ entry: define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmul_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1541,10 +1365,6 @@ entry: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_laneq_f64: -; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] -; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1556,12 +1376,9 @@ entry: define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulq_laneq_f32: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1570,12 +1387,9 @@ entry: define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulq_laneq_f64: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_laneq_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %mul = fmul <2 x double> %shuffle, %a @@ -1584,12 +1398,9 @@ entry: define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulx_lane_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulx_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1598,12 +1409,9 @@ entry: define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulxq_lane_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_lane_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; Exynos-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1612,12 +1420,9 @@ entry: define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulxq_lane_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_lane_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1626,12 +1431,9 @@ entry: define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulx_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulx_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1640,12 +1442,9 @@ entry: define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f32: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_laneq_f32: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1654,12 +1453,9 @@ entry: define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f64: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_laneq_f64: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1669,7 +1465,6 @@ entry: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmla_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1680,7 +1475,6 @@ entry: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlaq_lane_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1691,7 +1485,6 @@ entry: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmla_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1702,7 +1495,6 @@ entry: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlaq_lane_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1713,7 +1505,6 @@ entry: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmla_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1724,7 +1515,6 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s16_0: ; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1735,7 +1525,6 @@ entry: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmla_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1746,7 +1535,6 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s32_0: ; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1757,7 +1545,6 @@ entry: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmls_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1768,7 +1555,6 @@ entry: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsq_lane_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1779,7 +1565,6 @@ entry: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmls_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1790,7 +1575,6 @@ entry: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsq_lane_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1801,7 +1585,6 @@ entry: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmls_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1812,7 +1595,6 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s16_0: ; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1823,7 +1605,6 @@ entry: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmls_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1834,7 +1615,6 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s32_0: ; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1845,7 +1625,6 @@ entry: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1855,7 +1634,6 @@ entry: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1865,7 +1643,6 @@ entry: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1875,7 +1652,6 @@ entry: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1885,7 +1661,6 @@ entry: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1895,7 +1670,6 @@ entry: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1905,7 +1679,6 @@ entry: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1915,7 +1688,6 @@ entry: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1925,7 +1697,6 @@ entry: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1935,7 +1706,6 @@ entry: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_s16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1945,7 +1715,6 @@ entry: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1955,7 +1724,6 @@ entry: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_s32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1965,7 +1733,6 @@ entry: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1975,7 +1742,6 @@ entry: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_u16_0: ; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1985,7 +1751,6 @@ entry: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1995,7 +1760,6 @@ entry: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_u32_0: ; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -2004,12 +1768,9 @@ entry: define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfma_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfma_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -2018,12 +1779,9 @@ entry: define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmaq_lane_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -2032,12 +1790,9 @@ entry: define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfma_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfma_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -2046,12 +1801,9 @@ entry: define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmaq_laneq_f32_0: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -2060,12 +1812,9 @@ entry: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfms_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfms_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -2075,12 +1824,9 @@ entry: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmsq_lane_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -2090,12 +1836,9 @@ entry: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfms_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfms_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -2105,12 +1848,9 @@ entry: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmsq_laneq_f32_0: -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -2120,12 +1860,9 @@ entry: define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmaq_laneq_f64_0: -; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmaq_laneq_f64_0: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -2134,12 +1871,9 @@ entry: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmsq_laneq_f64_0: -; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vfmsq_laneq_f64_0: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -2150,7 +1884,6 @@ entry: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2161,7 +1894,6 @@ entry: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2172,7 +1904,6 @@ entry: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_s16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2183,7 +1914,6 @@ entry: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_s32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2194,7 +1924,6 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2206,7 +1935,6 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2218,7 +1946,6 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2230,7 +1957,6 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2242,7 +1968,6 @@ entry: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2253,7 +1978,6 @@ entry: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2264,7 +1988,6 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2275,7 +1998,6 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2286,7 +2008,6 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2298,7 +2019,6 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2310,7 +2030,6 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2322,7 +2041,6 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2334,7 +2052,6 @@ entry: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2345,7 +2062,6 @@ entry: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2356,7 +2072,6 @@ entry: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_u16_0: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2367,7 +2082,6 @@ entry: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_u32_0: ; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2378,7 +2092,6 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2390,7 +2103,6 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2402,7 +2114,6 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u16_0: ; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2414,7 +2125,6 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u32_0: ; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2426,7 +2136,6 @@ entry: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2437,7 +2146,6 @@ entry: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2448,7 +2156,6 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u16_0: ; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2459,7 +2166,6 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u32_0: ; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2470,7 +2176,6 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2482,7 +2187,6 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2494,7 +2198,6 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: ; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2506,7 +2209,6 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: ; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2518,7 +2220,6 @@ entry: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2528,7 +2229,6 @@ entry: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2538,7 +2238,6 @@ entry: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2548,7 +2247,6 @@ entry: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2558,7 +2256,6 @@ entry: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2569,7 +2266,6 @@ entry: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2580,7 +2276,6 @@ entry: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2591,7 +2286,6 @@ entry: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2602,7 +2296,6 @@ entry: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_s16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2612,7 +2305,6 @@ entry: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_s32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2622,7 +2314,6 @@ entry: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_u16_0: ; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2632,7 +2323,6 @@ entry: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_u32_0: ; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2642,7 +2332,6 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2653,7 +2342,6 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2664,7 +2352,6 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u16_0: ; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2675,7 +2362,6 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u32_0: ; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2686,7 +2372,6 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s16_0: ; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2697,7 +2382,6 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s32_0: ; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2708,7 +2392,6 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: ; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2720,7 +2403,6 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: ; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2732,7 +2414,6 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s16_0: ; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2743,7 +2424,6 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s32_0: ; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2754,7 +2434,6 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2766,7 +2445,6 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: ; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2778,7 +2456,6 @@ entry: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_lane_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2788,7 +2465,6 @@ entry: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_lane_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2798,7 +2474,6 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s16_0: ; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2808,7 +2483,6 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s32_0: ; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2818,7 +2492,6 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2829,7 +2502,6 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2840,7 +2512,6 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: ; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2851,7 +2522,6 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: ; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2862,7 +2532,6 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2872,7 +2541,6 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16_0: ; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2882,7 +2550,6 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2892,7 +2559,6 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s32_0: ; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2902,7 +2568,6 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2912,7 +2577,6 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: ; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2922,7 +2586,6 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2932,7 +2595,6 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: ; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2941,12 +2603,9 @@ entry: define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmul_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2955,12 +2614,9 @@ entry: define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulq_lane_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2969,12 +2625,9 @@ entry: define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmul_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2984,10 +2637,6 @@ entry: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64_0: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmul_laneq_f64_0: -; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] -; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -2999,12 +2648,9 @@ entry: define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulq_laneq_f32_0: -; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -3013,12 +2659,9 @@ entry: define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulq_laneq_f64_0: -; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulq_laneq_f64_0: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -3027,12 +2670,9 @@ entry: define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulx_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulx_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -3041,12 +2681,9 @@ entry: define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulxq_lane_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_lane_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -3055,12 +2692,9 @@ entry: define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulxq_lane_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_lane_f64_0: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -3069,12 +2703,9 @@ entry: define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulx_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulx_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -3083,12 +2714,9 @@ entry: define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f32_0: -; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_laneq_f32_0: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -3097,12 +2725,9 @@ entry: define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f64_0: -; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; CHECK-NEXT: ret -; EXYNOS-LABEL: test_vmulxq_laneq_f64_0: -; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d -; EXYNOS-NEXT: ret +; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -3111,14 +2736,11 @@ entry: define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { ; CHECK-LABEL: optimize_dup: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK-NEXT: ret -; EXYNOS-LABEL: optimize_dup: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -3130,15 +2752,12 @@ entry: define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { ; CHECK-LABEL: no_optimize_dup: -; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; CHECK-NEXT: ret -; EXYNOS-LABEL: no_optimize_dup: -; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s -; EXYNOS: dup [[y:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[y]].4s -; EXYNOS-NEXT: ret +; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s +; EXYNOSM1: dup [[W:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[W]].4s entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -3147,3 +2766,22 @@ entry: %s = fsub <4 x float> %0, %1 ret <4 x float> %s } + +define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" { +; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +entry: + %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) + ret <2 x float> %0 +} + +define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m1(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m1" { +; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: +; GENERIC: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +entry: + %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> + %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) + ret <2 x float> %0 +} diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 0253229c0d7f..ee1c6ab42744 100644 --- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -22,10 +22,10 @@ target triple = "arm64-apple-ios" ; DISABLE: cmp w0, w1 ; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]] ; -; Store %a in the alloca. -; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4] ; Set the alloca address in the second argument. -; CHECK-NEXT: sub x1, [[SAVE_SP]], #4 +; CHECK: sub x1, [[SAVE_SP]], #4 +; Store %a in the alloca. +; CHECK-NEXT: stur w0, {{\[}}[[SAVE_SP]], #-4] ; Set the first argument to zero. ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: bl _doSomething diff --git a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll index e72c2b7989d2..98851917999b 100644 --- a/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll +++ b/test/CodeGen/AArch64/arm64-simd-scalar-to-vector.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mcpu=cyclone | FileCheck %s -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -fast-isel -mcpu=cyclone | FileCheck %s --check-prefix=CHECK-FAST define <16 x i8> @foo(<16 x i8> %a) nounwind optsize readnone ssp { ; CHECK: uaddlv.16b h0, v0 diff --git a/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll b/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll new file mode 100644 index 000000000000..fe22296320fc --- /dev/null +++ b/test/CodeGen/AArch64/arm64-spill-remarks-treshold-hotness.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-missed=regalloc \ +; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s + +; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -aarch64-neon-syntax=apple -pass-remarks-missed=regalloc \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 \ +; RUN: 2>&1 | FileCheck -check-prefix=THRESHOLD %s + +; CHECK: remark: /tmp/kk.c:3:20: 1 spills 1 reloads generated in loop{{$}} +; THRESHOLD-NOT: remark + +define void @fpr128(<4 x float>* %p) nounwind ssp { +entry: + br label %loop, !dbg !8 + +loop: + %i = phi i32 [ 0, %entry], [ %i.2, %end2 ] + br label %loop2, !dbg !9 + +loop2: + %j = phi i32 [ 0, %loop], [ %j.2, %loop2 ] + call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind + %j.2 = add i32 %j, 1 + %c2 = icmp slt i32 %j.2, 100 + br i1 %c2, label %loop2, label %end2 + +end2: + call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind + %i.2 = add i32 %i, 1 + %c = icmp slt i32 %i.2, 100 + br i1 %c, label %loop, label %end + +end: + br label %loop3 + +loop3: + %k = phi i32 [ 0, %end], [ %k.2, %loop3 ] + call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind + %k.2 = add i32 %k, 1 + %c3 = icmp slt i32 %k.2, 100 + br i1 %c3, label %loop3, label %end3, !dbg !10 + +end3: + ret void +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 1, column: 20, scope: !6) +!9 = !DILocation(line: 2, column: 20, scope: !6) +!10 = !DILocation(line: 3, column: 20, scope: !6) diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll index 28ee8fcf46fc..cce5be8ff223 100644 --- a/test/CodeGen/AArch64/arm64-st1.ll +++ b/test/CodeGen/AArch64/arm64-st1.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s +; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. define void @st1lane_16b(<16 x i8> %A, i8* %D) { ; CHECK-LABEL: st1lane_16b @@ -375,6 +377,10 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { ; CHECK-LABEL: st2_8b ; CHECK: st2.8b +; EXYNOS-LABEL: st2_8b +; EXYNOS: zip1.8b +; EXYNOS: zip2.8b +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) ret void } @@ -389,6 +395,17 @@ define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { ; CHECK-LABEL: st4_8b ; CHECK: st4.8b +; EXYNOS-LABEL: st4_8b +; EXYNOS: zip1.8b +; EXYNOS: zip2.8b +; EXYNOS: zip1.8b +; EXYNOS: zip2.8b +; EXYNOS: zip1.8b +; EXYNOS: zip2.8b +; EXYNOS: stp +; EXYNOS: zip1.8b +; EXYNOS: zip2.8b +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) ret void } @@ -400,6 +417,10 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { ; CHECK-LABEL: st2_16b ; CHECK: st2.16b +; EXYNOS-LABEL: st2_16b +; EXYNOS: zip1.16b +; EXYNOS: zip2.16b +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) ret void } @@ -414,6 +435,17 @@ define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { ; CHECK-LABEL: st4_16b ; CHECK: st4.16b +; EXYNOS-LABEL: st4_16b +; EXYNOS: zip1.16b +; EXYNOS: zip2.16b +; EXYNOS: zip1.16b +; EXYNOS: zip2.16b +; EXYNOS: zip1.16b +; EXYNOS: zip2.16b +; EXYNOS: stp +; EXYNOS: zip1.16b +; EXYNOS: zip2.16b +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) ret void } @@ -425,6 +457,10 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { ; CHECK-LABEL: st2_4h ; CHECK: st2.4h +; EXYNOS-LABEL: st2_4h +; EXYNOS: zip1.4h +; EXYNOS: zip2.4h +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) ret void } @@ -439,6 +475,17 @@ define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { ; CHECK-LABEL: st4_4h ; CHECK: st4.4h +; EXYNOS-LABEL: st4_4h +; EXYNOS: zip1.4h +; EXYNOS: zip2.4h +; EXYNOS: zip1.4h +; EXYNOS: zip2.4h +; EXYNOS: zip1.4h +; EXYNOS: zip2.4h +; EXYNOS: stp +; EXYNOS: zip1.4h +; EXYNOS: zip2.4h +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) ret void } @@ -450,6 +497,10 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { ; CHECK-LABEL: st2_8h ; CHECK: st2.8h +; EXYNOS-LABEL: st2_8h +; EXYNOS: zip1.8h +; EXYNOS: zip2.8h +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) ret void } @@ -464,6 +515,17 @@ define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { ; CHECK-LABEL: st4_8h ; CHECK: st4.8h +; EXYNOS-LABEL: st4_8h +; EXYNOS: zip1.8h +; EXYNOS: zip2.8h +; EXYNOS: zip1.8h +; EXYNOS: zip2.8h +; EXYNOS: zip1.8h +; EXYNOS: zip2.8h +; EXYNOS: stp +; EXYNOS: zip1.8h +; EXYNOS: zip2.8h +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) ret void } @@ -475,6 +537,10 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { ; CHECK-LABEL: st2_2s ; CHECK: st2.2s +; EXYNOS-LABEL: st2_2s +; EXYNOS: zip1.2s +; EXYNOS: zip2.2s +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) ret void } @@ -489,6 +555,17 @@ define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { ; CHECK-LABEL: st4_2s ; CHECK: st4.2s +; EXYNOS-LABEL: st4_2s +; EXYNOS: zip1.2s +; EXYNOS: zip2.2s +; EXYNOS: zip1.2s +; EXYNOS: zip2.2s +; EXYNOS: zip1.2s +; EXYNOS: zip2.2s +; EXYNOS: stp +; EXYNOS: zip1.2s +; EXYNOS: zip2.2s +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) ret void } @@ -500,6 +577,10 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { ; CHECK-LABEL: st2_4s ; CHECK: st2.4s +; EXYNOS-LABEL: st2_4s +; EXYNOS: zip1.4s +; EXYNOS: zip2.4s +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) ret void } @@ -514,6 +595,17 @@ define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { ; CHECK-LABEL: st4_4s ; CHECK: st4.4s +; EXYNOS-LABEL: st4_4s +; EXYNOS: zip1.4s +; EXYNOS: zip2.4s +; EXYNOS: zip1.4s +; EXYNOS: zip2.4s +; EXYNOS: zip1.4s +; EXYNOS: zip2.4s +; EXYNOS: stp +; EXYNOS: zip1.4s +; EXYNOS: zip2.4s +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) ret void } @@ -551,6 +643,10 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { ; CHECK-LABEL: st2_2d ; CHECK: st2.2d +; EXYNOS-LABEL: st2_2d +; EXYNOS: zip1.2d +; EXYNOS: zip2.2d +; EXYNOS: stp call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) ret void } @@ -565,6 +661,17 @@ define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { ; CHECK-LABEL: st4_2d ; CHECK: st4.2d +; EXYNOS-LABEL: st4_2d +; EXYNOS: zip1.2d +; EXYNOS: zip2.2d +; EXYNOS: zip1.2d +; EXYNOS: zip2.2d +; EXYNOS: zip1.2d +; EXYNOS: zip2.2d +; EXYNOS: stp +; EXYNOS: zip1.2d +; EXYNOS: zip2.2d +; EXYNOS: stp call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) ret void } diff --git a/test/CodeGen/AArch64/arm64-storebytesmerge.ll b/test/CodeGen/AArch64/arm64-storebytesmerge.ll new file mode 100644 index 000000000000..fb06131242d3 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-storebytesmerge.ll @@ -0,0 +1,46 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -enable-misched=false < %s | FileCheck %s + +;target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +;target triple = "aarch64--linux-gnu" + + +; CHECK-LABEL: test +; CHECK: str x30, [sp, #-16]! +; CHECK: adrp x8, q +; CHECK: ldr x8, [x8, :lo12:q] +; CHECK: stp xzr, xzr, [x8] +; CHECK: bl f + +@q = external unnamed_addr global i16*, align 8 + +; Function Attrs: nounwind +define void @test() local_unnamed_addr #0 { +entry: + br label %for.body453.i + +for.body453.i: ; preds = %for.body453.i, %entry + br i1 undef, label %for.body453.i, label %for.end705.i + +for.end705.i: ; preds = %for.body453.i + %0 = load i16*, i16** @q, align 8 + %1 = getelementptr inbounds i16, i16* %0, i64 0 + %2 = bitcast i16* %1 to <2 x i16>* + store <2 x i16> zeroinitializer, <2 x i16>* %2, align 2 + %3 = getelementptr i16, i16* %1, i64 2 + %4 = bitcast i16* %3 to <2 x i16>* + store <2 x i16> zeroinitializer, <2 x i16>* %4, align 2 + %5 = getelementptr i16, i16* %1, i64 4 + %6 = bitcast i16* %5 to <2 x i16>* + store <2 x i16> zeroinitializer, <2 x i16>* %6, align 2 + %7 = getelementptr i16, i16* %1, i64 6 + %8 = bitcast i16* %7 to <2 x i16>* + store <2 x i16> zeroinitializer, <2 x i16>* %8, align 2 + call void @f() #2 + unreachable +} + +declare void @f() local_unnamed_addr #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+crc,+crypto,+fp-armv8,+neon" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll index 4a3229a39b50..8e0736c4fba2 100644 --- a/test/CodeGen/AArch64/arm64-stur.ll +++ b/test/CodeGen/AArch64/arm64-stur.ll @@ -55,11 +55,11 @@ define void @foo(%struct.X* nocapture %p) nounwind optsize ssp { ; CHECK-NEXT: ret %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1 %val = bitcast i64* %B to i8* - call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Unaligned 16b stores are split into 8b stores for performance. ; radar://15424193 diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll index bb9ad46ba63d..9f77d3527d4b 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: llc -O0 -fast-isel -mtriple=arm64-none-linux-gnu -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s -; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \ +; RUN: llc -emulated-tls -O0 -fast-isel -mtriple=arm64-none-linux-gnu -relocation-model=pic \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s ; If the .tlsdesccall and blr parts are emitted completely separately (even with diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll index a09853a0b406..0f8f4c5d4a44 100644 --- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -109,7 +109,7 @@ declare void @llvm.va_end(i8*) define void @test_va_end() nounwind { ; CHECK-LABEL: test_va_end: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 %addr = bitcast %va_list* @var to i8* call void @llvm.va_end(i8* %addr) diff --git a/test/CodeGen/AArch64/arm64-vcvt_f.ll b/test/CodeGen/AArch64/arm64-vcvt_f.ll index 254671a3c3c5..90cc2d37882c 100644 --- a/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -; RUN: llc < %s -O0 -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f64_f32: diff --git a/test/CodeGen/AArch64/arm64-vector-insertion.ll b/test/CodeGen/AArch64/arm64-vector-insertion.ll index 7d72b489c3be..c96e735dc813 100644 --- a/test/CodeGen/AArch64/arm64-vector-insertion.ll +++ b/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -8,7 +8,7 @@ entry: ret void ; CHECK-LABEL: test0f - ; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000 + ; CHECK: movi.4s v[[TEMP:[0-9]+]], #0 ; CHECK: mov.s v[[TEMP]][0], v{{[0-9]+}}[0] ; CHECK: str q[[TEMP]], [x0] ; CHECK: ret @@ -16,7 +16,6 @@ entry: } - define void @test1f(float* nocapture %x, float %a) #0 { entry: %0 = insertelement <4 x float> , float %a, i32 0 diff --git a/test/CodeGen/AArch64/arm64-virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll index 4ecfde4f83e2..4ce0d2f00075 100644 --- a/test/CodeGen/AArch64/arm64-virtual_base.ll +++ b/test/CodeGen/AArch64/arm64-virtual_base.ll @@ -43,9 +43,9 @@ entry: %tmp14 = bitcast double* %arraydecay5.3.1 to i8* %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0 %tmp15 = bitcast double* %arraydecay11.3.1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) diff --git a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll index 412651c55678..2fb9d3b2d030 100644 --- a/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -9,10 +9,10 @@ define void @t1() nounwind ssp { entry: ; ALL-LABEL: t1: ; ALL-NOT: fmov -; CYCLONE: movi.2d v0, #0000000000000000 -; CYCLONE: movi.2d v1, #0000000000000000 -; CYCLONE: movi.2d v2, #0000000000000000 -; CYCLONE: movi.2d v3, #0000000000000000 +; CYCLONE: fmov d0, xzr +; CYCLONE: fmov d1, xzr +; CYCLONE: fmov d2, xzr +; CYCLONE: fmov d3, xzr ; KRYO: movi v0.2d, #0000000000000000 ; KRYO: movi v1.2d, #0000000000000000 ; KRYO: movi v2.2d, #0000000000000000 @@ -48,8 +48,8 @@ entry: define void @t4() nounwind ssp { ; ALL-LABEL: t4: ; ALL-NOT: fmov -; CYCLONE: movi.2d v0, #0000000000000000 -; CYCLONE: movi.2d v1, #0000000000000000 +; CYCLONE: fmov s0, wzr +; CYCLONE: fmov s1, wzr ; KRYO: movi v0.2d, #0000000000000000 ; KRYO: movi v1.2d, #0000000000000000 ; FALKOR: movi v0.2d, #0000000000000000 @@ -87,4 +87,13 @@ for.end: ret double %v0 } +define <2 x i64> @t6() { +; ALL-LABEL: t6: +; CYCLONE: movi.16b v0, #0 +; KRYO: movi v0.2d, #0000000000000000 +; FALKOR: movi v0.2d, #0000000000000000 + ret <2 x i64> zeroinitializer +} + + declare double @sin(double) diff --git a/test/CodeGen/AArch64/atomic-ops-lse.ll b/test/CodeGen/AArch64/atomic-ops-lse.ll index 49f716547b12..1a5cd2dc4233 100644 --- a/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -629,12 +629,27 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 +; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret + + ret i8 %old +} + +define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i8_1: + %pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire + %success = extractvalue { i8, i1 } %pair, 1 -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] ; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 - ret i8 %old +; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + ret i1 %success } define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { @@ -644,12 +659,28 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-NOT: dmb ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 +; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] +; CHECK-NEXT: ret + + ret i16 %old +} + +define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i16_1: + %pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire + %success = extractvalue { i16, i1 } %pair, 1 -; CHECK: casah w0, w1, [x[[ADDR]]] ; CHECK-NOT: dmb +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 - ret i16 %old +; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cmp w[[NEW]], w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + + ret i1 %success } define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { diff --git a/test/CodeGen/AArch64/bics.ll b/test/CodeGen/AArch64/bics.ll index 53aa28ad913f..244aacbc0df3 100644 --- a/test/CodeGen/AArch64/bics.ll +++ b/test/CodeGen/AArch64/bics.ll @@ -2,7 +2,7 @@ define i1 @andn_cmp(i32 %x, i32 %y) { ; CHECK-LABEL: andn_cmp: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: bics wzr, w1, w0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -15,7 +15,7 @@ define i1 @andn_cmp(i32 %x, i32 %y) { define i1 @and_cmp(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: bics wzr, w1, w0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret @@ -27,7 +27,7 @@ define i1 @and_cmp(i32 %x, i32 %y) { define i1 @and_cmp_const(i32 %x) { ; CHECK-LABEL: and_cmp_const: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43 ; CHECK-NEXT: bics wzr, w8, w0 ; CHECK-NEXT: cset w0, eq diff --git a/test/CodeGen/AArch64/big-byval.ll b/test/CodeGen/AArch64/big-byval.ll new file mode 100644 index 000000000000..cf03074767fb --- /dev/null +++ b/test/CodeGen/AArch64/big-byval.ll @@ -0,0 +1,13 @@ +; RUN: llc -o - %s -verify-machineinstrs | FileCheck %s +target triple = "aarch64--" + +; Make sure we don't fail machine verification because the memcpy callframe +; setup is nested inside the extfunc callframe setup. +; CHECK-LABEL: func: +; CHECK: bl memcpy +; CHECK: bl extfunc +declare void @extfunc([4096 x i64]* byval %p) +define void @func([4096 x i64]* %z) { + call void @extfunc([4096 x i64]* byval %z) + ret void +} diff --git a/test/CodeGen/AArch64/big-callframe.ll b/test/CodeGen/AArch64/big-callframe.ll new file mode 100644 index 000000000000..6e15795b2723 --- /dev/null +++ b/test/CodeGen/AArch64/big-callframe.ll @@ -0,0 +1,15 @@ +; RUN: llc -o - %s -verify-machineinstrs | FileCheck %s +; Make sure we use a frame pointer and fp relative addressing for the emergency +; spillslot when we have gigantic callframes. +; CHECK-LABEL: func: +; CHECK: stur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Spill +; CHECK: ldur {{.*}}, [x29, #{{.*}}] // 8-byte Folded Reload +target triple = "aarch64--" +declare void @extfunc([4096 x i64]* byval %p) +define void @func([4096 x i64]* %z) { + %lvar = alloca [31 x i8] + %v = load volatile [31 x i8], [31 x i8]* %lvar + store volatile [31 x i8] %v, [31 x i8]* %lvar + call void @extfunc([4096 x i64]* byval %z) + ret void +} diff --git a/test/CodeGen/AArch64/br-cond-not-merge.ll b/test/CodeGen/AArch64/br-cond-not-merge.ll index bf21ef307905..46532386783f 100644 --- a/test/CodeGen/AArch64/br-cond-not-merge.ll +++ b/test/CodeGen/AArch64/br-cond-not-merge.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s +; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 -global-isel=false < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s declare void @foo() diff --git a/test/CodeGen/AArch64/branch-relax-cbz.ll b/test/CodeGen/AArch64/branch-relax-cbz.ll index d13c0f677bcb..cddecbd9babb 100644 --- a/test/CodeGen/AArch64/branch-relax-cbz.ll +++ b/test/CodeGen/AArch64/branch-relax-cbz.ll @@ -4,7 +4,7 @@ ; CHECK: cmn x{{[0-9]+}}, #5 ; CHECK-NEXT: b.le [[B2:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: ; BB#1: ; %b3 +; CHECK-NEXT: ; %bb.1: ; %b3 ; CHECK: ldr [[LOAD:w[0-9]+]] ; CHECK: cbnz [[LOAD]], [[B8:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: b [[B7:LBB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AArch64/build-one-lane.ll b/test/CodeGen/AArch64/build-one-lane.ll new file mode 100644 index 000000000000..722d62437a3b --- /dev/null +++ b/test/CodeGen/AArch64/build-one-lane.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; Check that building up a vector w/ only one non-zero lane initializes +; intelligently. + +define <8 x i8> @v8i8(i8 %t, i8 %s) nounwind { + %v = insertelement <8 x i8> , i8 %s, i32 7 + ret <8 x i8> %v + +; CHECK: movi v[[R:[0-9]+]].8b, #0 +; CHECK: mov v[[R]].b[7], w{{[0-9]+}} +} + +define <16 x i8> @v16i8(i8 %t, i8 %s) nounwind { + %v = insertelement <16 x i8> , i8 %s, i32 15 + ret <16 x i8> %v + +; CHECK: movi v[[R:[0-9]+]].16b, #0 +; CHECK: mov v[[R]].b[15], w{{[0-9]+}} +} + +define <4 x i16> @v4i16(i16 %t, i16 %s) nounwind { + %v = insertelement <4 x i16> , i16 %s, i32 3 + ret <4 x i16> %v + +; CHECK: movi v[[R:[0-9]+]].4h, #0 +; CHECK: mov v[[R]].h[3], w{{[0-9]+}} +} + +define <8 x i16> @v8i16(i16 %t, i16 %s) nounwind { + %v = insertelement <8 x i16> , i16 %s, i32 7 + ret <8 x i16> %v + +; CHECK: movi v[[R:[0-9]+]].8h, #0 +; CHECK: mov v[[R]].h[7], w{{[0-9]+}} +} + +define <2 x i32> @v2i32(i32 %t, i32 %s) nounwind { + %v = insertelement <2 x i32> , i32 %s, i32 1 + ret <2 x i32> %v + +; CHECK: movi v[[R:[0-9]+]].2s, #0 +; CHECK: mov v[[R]].s[1], w{{[0-9]+}} +} + +define <4 x i32> @v4i32(i32 %t, i32 %s) nounwind { + %v = insertelement <4 x i32> , i32 %s, i32 3 + ret <4 x i32> %v + +; CHECK: movi v[[R:[0-9]+]].4s, #0 +; CHECK: mov v[[R]].s[3], w{{[0-9]+}} +} + +define <2 x i64> @v2i64(i64 %t, i64 %s) nounwind { + %v = insertelement <2 x i64> , i64 %s, i32 1 + ret <2 x i64> %v + +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].d[1], x{{[0-9]+}} +} + +define <2 x float> @v2f32(float %t, float %s) nounwind { + %v = insertelement <2 x float> , float %s, i32 1 + ret <2 x float> %v + +; CHECK: movi v[[R:[0-9]+]].2s, #0 +; CHECK: mov v[[R]].s[1], v{{[0-9]+}}.s[0] +} + +define <4 x float> @v4f32(float %t, float %s) nounwind { + %v = insertelement <4 x float> , float %s, i32 3 + ret <4 x float> %v + +; CHECK: movi v[[R:[0-9]+]].4s, #0 +; CHECK: mov v[[R]].s[3], v{{[0-9]+}}.s[0] +} + +define <2 x double> @v2f64(double %t, double %s) nounwind { + %v = insertelement <2 x double> , double %s, i32 1 + ret <2 x double> %v + +; CHECK: movi v[[R:[0-9]+]].2d, #0 +; CHECK: mov v[[R]].d[1], v{{[0-9]+}}.d[0] +} diff --git a/test/CodeGen/AArch64/chkstk.ll b/test/CodeGen/AArch64/chkstk.ll new file mode 100644 index 000000000000..1c2e5528f10c --- /dev/null +++ b/test/CodeGen/AArch64/chkstk.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s + +; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \ +; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s + +define void @check_watermark() { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK-DEFAULT-CODE-MODEL: check_watermark: +; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp +; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100 +; CHECK-DEFAULT-CODE-MODEL: bl __chkstk +; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4 + +; CHECK-LARGE-CODE-MODEL: check_watermark: +; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp +; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100 +; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk +; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk +; CHECK-LARGE-CODE-MODEL: blr x16 +; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4 diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll index 1bfbcf851c0e..bd3d328ec119 100644 --- a/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: diff --git a/test/CodeGen/AArch64/combine-and-like.ll b/test/CodeGen/AArch64/combine-and-like.ll new file mode 100644 index 000000000000..15770c2e02ff --- /dev/null +++ b/test/CodeGen/AArch64/combine-and-like.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i32 @f(i32 %a0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = lshr i32 %a0, 2147483647 + %2 = add i32 %1, 2147483647 + %3 = and i32 %2, %1 + ret i32 %3 +} diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll index a36aad51ca82..d179eab7e8fa 100644 --- a/test/CodeGen/AArch64/cxx-tlscc.ll +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -3,7 +3,7 @@ ; Shrink wrapping currently does not kick in because we have a TLS CALL ; in the entry block and it will clobber the link register. -; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -fast-isel | FileCheck --check-prefix=CHECK-O0 %s %struct.S = type { i8 } diff --git a/test/CodeGen/AArch64/dllexport.ll b/test/CodeGen/AArch64/dllexport.ll index 287c545610c0..f408620e26d0 100644 --- a/test/CodeGen/AArch64/dllexport.ll +++ b/test/CodeGen/AArch64/dllexport.ll @@ -40,35 +40,34 @@ define weak_odr dllexport void @l() { ; CHECK: .section .drectve ; CHECK-GNU-NOT: -export:f -; CHECK-GNU: -export:g -; CHECK-GNU-SAME: -export:h +; CHECK-GNU: .ascii " -export:g" +; CHECK-GNU: .ascii " -export:h" ; CHECK-GNU-NOT: -export:i -; CHECK-GNU-SAME: -export:j -; CHECK-GNU-SAME: -export:k -; CHECK-GNU-SAME: -export:l -; CHECK-GNU-SAME: -export:m,data -; CHECK-GNU-SAME: -export:n,data -; CHECK-GNU-SAME: -export:o,data -; CHECK-GNU-SAME: -export:p,data -; CHECK-GNU-SAME: -export:q,data -; CHECK-GNU-SAME: -export:r -; CHECK-GNU-SAME: -export:s -; CHECK-GNU-SAME: -export:t -; CHECK-GNU-SAME: -export:u +; CHECK-GNU: .ascii " -export:j" +; CHECK-GNU: .ascii " -export:k" +; CHECK-GNU: .ascii " -export:l" +; CHECK-GNU: .ascii " -export:m,data" +; CHECK-GNU: .ascii " -export:n,data" +; CHECK-GNU: .ascii " -export:o,data" +; CHECK-GNU: .ascii " -export:p,data" +; CHECK-GNU: .ascii " -export:q,data" +; CHECK-GNU: .ascii " -export:r" +; CHECK-GNU: .ascii " -export:s" +; CHECK-GNU: .ascii " -export:t" +; CHECK-GNU: .ascii " -export:u" ; CHECK-MSVC-NOT: /EXPORT:f -; CHECK-MSVC: /EXPORT:g -; CHECK-MSVC-SAME: /EXPORT:h +; CHECK-MSVC: .ascii " /EXPORT:g" +; CHECK-MSVC: .ascii " /EXPORT:h" ; CHECK-MSVC-NOT: /EXPORT:i -; CHECK-MSVC-SAME: /EXPORT:j -; CHECK-MSVC-SAME: /EXPORT:k -; CHECK-MSVC-SAME: /EXPORT:l -; CHECK-MSVC-SAME: /EXPORT:m,DATA -; CHECK-MSVC-SAME: /EXPORT:n,DATA -; CHECK-MSVC-SAME: /EXPORT:o,DATA -; CHECK-MSVC-SAME: /EXPORT:p,DATA -; CHECK-MSVC-SAME: /EXPORT:q,DATA -; CHECK-MSVC-SAME: /EXPORT:r -; CHECK-MSVC-SAME: /EXPORT:s -; CHECK-MSVC-SAME: /EXPORT:t -; CHECK-MSVC-SAME: /EXPORT:u - +; CHECK-MSVC: .ascii " /EXPORT:j" +; CHECK-MSVC: .ascii " /EXPORT:k" +; CHECK-MSVC: .ascii " /EXPORT:l" +; CHECK-MSVC: .ascii " /EXPORT:m,DATA" +; CHECK-MSVC: .ascii " /EXPORT:n,DATA" +; CHECK-MSVC: .ascii " /EXPORT:o,DATA" +; CHECK-MSVC: .ascii " /EXPORT:p,DATA" +; CHECK-MSVC: .ascii " /EXPORT:q,DATA" +; CHECK-MSVC: .ascii " /EXPORT:r" +; CHECK-MSVC: .ascii " /EXPORT:s" +; CHECK-MSVC: .ascii " /EXPORT:t" +; CHECK-MSVC: .ascii " /EXPORT:u" diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll index 03473cf80ee4..f205078ed411 100644 --- a/test/CodeGen/AArch64/emutls_generic.ll +++ b/test/CodeGen/AArch64/emutls_generic.ll @@ -4,6 +4,10 @@ ; RUN: | FileCheck -check-prefix=ARM_64 %s ; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -O3 \ ; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-windows-gnu -O3 \ +; RUN: | FileCheck -check-prefix=ARM_64 %s +; RUN: llc < %s -emulated-tls -mtriple=aarch64-apple-darwin -O3 \ +; RUN: | FileCheck -check-prefix=DARWIN %s ; Make sure that TLS symbols are emitted in expected order. @@ -46,7 +50,7 @@ entry: ; ARM_64-NEXT: .xword 0 ; ARM_64-NEXT: .xword __emutls_t.external_y ; ARM_64-NOT: __emutls_v.external_x: -; ARM_64: .section .rodata, +; ARM_64: .section .r{{o?}}data, ; ARM_64-LABEL: __emutls_t.external_y: ; ARM_64-NEXT: .byte 7 ; ARM_64: .data{{$}} @@ -57,6 +61,41 @@ entry: ; ARM_64-NEXT: .xword 16 ; ARM_64-NEXT: .xword 0 ; ARM_64-NEXT: .xword __emutls_t.internal_y -; ARM_64: .section .rodata, +; ARM_64: .section .r{{o?}}data, ; ARM_64-LABEL: __emutls_t.internal_y: ; ARM_64-NEXT: .xword 9 + +; DARWIN-LABEL: _get_external_x: +; DARWIN: ___emutls_v.external_x +; DARWIN: ___emutls_get_address +; DARWIN-LABEL: _get_external_y: +; DARWIN: ___emutls_v.external_y +; DARWIN: ___emutls_get_address +; DARWIN-LABEL: _get_internal_y: +; DARWIN: ___emutls_v.internal_y +; DARWIN: ___emutls_get_address +; DARWIN-NOT: ___emutls_t.external_x +; DARWIN-NOT: ___emutls_v.external_x: +; DARWIN: .section __DATA,__data +; DARWIN: .globl ___emutls_v.external_y +; DARWIN: .p2align 3 +; DARWIN-LABEL: ___emutls_v.external_y: +; DARWIN-NEXT: .quad 1 +; DARWIN-NEXT: .quad 2 +; DARWIN-NEXT: .quad 0 +; DARWIN-NEXT: .quad ___emutls_t.external_y +; DARWIN-NOT: ___emutls_v.external_x: +; DARWIN: .section __TEXT,__const +; DARWIN-LABEL: ___emutls_t.external_y: +; DARWIN-NEXT: .byte 7 +; DARWIN: .section __DATA,__data +; DARWIN-NOT: .globl ___emutls_v +; DARWIN: .p2align 3 +; DARWIN-LABEL: ___emutls_v.internal_y: +; DARWIN-NEXT: .quad 8 +; DARWIN-NEXT: .quad 16 +; DARWIN-NEXT: .quad 0 +; DARWIN-NEXT: .quad ___emutls_t.internal_y +; DARWIN: .section __TEXT,__const +; DARWIN-LABEL: ___emutls_t.internal_y: +; DARWIN-NEXT: .quad 9 diff --git a/test/CodeGen/AArch64/fast-isel-assume.ll b/test/CodeGen/AArch64/fast-isel-assume.ll index d39a907407db..50f510a09b63 100644 --- a/test/CodeGen/AArch64/fast-isel-assume.ll +++ b/test/CodeGen/AArch64/fast-isel-assume.ll @@ -3,7 +3,7 @@ ; Check that we ignore the assume intrinsic. ; CHECK-LABEL: test: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: ret define void @test(i32 %a) { %tmp0 = icmp slt i32 %a, 0 diff --git a/test/CodeGen/AArch64/fast-isel-atomic.ll b/test/CodeGen/AArch64/fast-isel-atomic.ll index 195b8befc8e1..452129e49515 100644 --- a/test/CodeGen/AArch64/fast-isel-atomic.ll +++ b/test/CodeGen/AArch64/fast-isel-atomic.ll @@ -1,11 +1,11 @@ ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-- -O0 -fast-isel=0 -global-isel=false -verify-machineinstrs < %s | FileCheck %s ; Note that checking SelectionDAG output isn't strictly necessary, but they ; currently match, so we might as well check both! Feel free to remove SDAG. ; CHECK-LABEL: atomic_store_monotonic_8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: strb w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_monotonic_8(i8* %p, i8 %val) #0 { @@ -14,7 +14,7 @@ define void @atomic_store_monotonic_8(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_8_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: strb w1, [x0, #1] ; CHECK-NEXT: ret define void @atomic_store_monotonic_8_off(i8* %p, i8 %val) #0 { @@ -24,7 +24,7 @@ define void @atomic_store_monotonic_8_off(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: strh w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_monotonic_16(i16* %p, i16 %val) #0 { @@ -33,7 +33,7 @@ define void @atomic_store_monotonic_16(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_16_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: strh w1, [x0, #2] ; CHECK-NEXT: ret define void @atomic_store_monotonic_16_off(i16* %p, i16 %val) #0 { @@ -43,7 +43,7 @@ define void @atomic_store_monotonic_16_off(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_monotonic_32(i32* %p, i32 %val) #0 { @@ -52,7 +52,7 @@ define void @atomic_store_monotonic_32(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_32_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str w1, [x0, #4] ; CHECK-NEXT: ret define void @atomic_store_monotonic_32_off(i32* %p, i32 %val) #0 { @@ -62,7 +62,7 @@ define void @atomic_store_monotonic_32_off(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str x1, [x0] ; CHECK-NEXT: ret define void @atomic_store_monotonic_64(i64* %p, i64 %val) #0 { @@ -71,7 +71,7 @@ define void @atomic_store_monotonic_64(i64* %p, i64 %val) #0 { } ; CHECK-LABEL: atomic_store_monotonic_64_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: str x1, [x0, #8] ; CHECK-NEXT: ret define void @atomic_store_monotonic_64_off(i64* %p, i64 %val) #0 { @@ -81,7 +81,7 @@ define void @atomic_store_monotonic_64_off(i64* %p, i64 %val) #0 { } ; CHECK-LABEL: atomic_store_release_8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlrb w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_release_8(i8* %p, i8 %val) #0 { @@ -90,7 +90,7 @@ define void @atomic_store_release_8(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_release_8_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: stlrb w1, [x0] ; CHECK-NEXT: ret @@ -101,7 +101,7 @@ define void @atomic_store_release_8_off(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_release_16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlrh w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_release_16(i16* %p, i16 %val) #0 { @@ -110,7 +110,7 @@ define void @atomic_store_release_16(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_release_16_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #2 ; CHECK-NEXT: stlrh w1, [x0] ; CHECK-NEXT: ret @@ -121,7 +121,7 @@ define void @atomic_store_release_16_off(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_release_32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlr w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_release_32(i32* %p, i32 %val) #0 { @@ -130,7 +130,7 @@ define void @atomic_store_release_32(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_release_32_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #4 ; CHECK-NEXT: stlr w1, [x0] ; CHECK-NEXT: ret @@ -141,7 +141,7 @@ define void @atomic_store_release_32_off(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_release_64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlr x1, [x0] ; CHECK-NEXT: ret define void @atomic_store_release_64(i64* %p, i64 %val) #0 { @@ -150,7 +150,7 @@ define void @atomic_store_release_64(i64* %p, i64 %val) #0 { } ; CHECK-LABEL: atomic_store_release_64_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #8 ; CHECK-NEXT: stlr x1, [x0] ; CHECK-NEXT: ret @@ -162,7 +162,7 @@ define void @atomic_store_release_64_off(i64* %p, i64 %val) #0 { ; CHECK-LABEL: atomic_store_seq_cst_8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlrb w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_seq_cst_8(i8* %p, i8 %val) #0 { @@ -171,7 +171,7 @@ define void @atomic_store_seq_cst_8(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_8_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: stlrb w1, [x0] ; CHECK-NEXT: ret @@ -182,7 +182,7 @@ define void @atomic_store_seq_cst_8_off(i8* %p, i8 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlrh w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_seq_cst_16(i16* %p, i16 %val) #0 { @@ -191,7 +191,7 @@ define void @atomic_store_seq_cst_16(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_16_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #2 ; CHECK-NEXT: stlrh w1, [x0] ; CHECK-NEXT: ret @@ -202,7 +202,7 @@ define void @atomic_store_seq_cst_16_off(i16* %p, i16 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlr w1, [x0] ; CHECK-NEXT: ret define void @atomic_store_seq_cst_32(i32* %p, i32 %val) #0 { @@ -211,7 +211,7 @@ define void @atomic_store_seq_cst_32(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_32_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #4 ; CHECK-NEXT: stlr w1, [x0] ; CHECK-NEXT: ret @@ -222,7 +222,7 @@ define void @atomic_store_seq_cst_32_off(i32* %p, i32 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: stlr x1, [x0] ; CHECK-NEXT: ret define void @atomic_store_seq_cst_64(i64* %p, i64 %val) #0 { @@ -231,7 +231,7 @@ define void @atomic_store_seq_cst_64(i64* %p, i64 %val) #0 { } ; CHECK-LABEL: atomic_store_seq_cst_64_off: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: add x0, x0, #8 ; CHECK-NEXT: stlr x1, [x0] ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll index 89b368fa19bb..d5b64c5363e1 100644 --- a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll +++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll @@ -8,9 +8,9 @@ define <2 x i32> @icmp_v2i32(<2 x i32> %a) { ; CHECK-LABEL: icmp_v2i32: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: cmeq.2s [[CMP:v[0-9]+]], v0, #0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.8b v0, [[CMP]], [[MASK]] ; CHECK-NEXT: ret @@ -23,9 +23,9 @@ bb2: define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v2i32: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]] ; CHECK-NEXT: ret @@ -38,10 +38,10 @@ bb2: define <4 x i32> @icmp_v4i32(<4 x i32> %a) { ; CHECK-LABEL: icmp_v4i32: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: cmeq.4s [[CMP:v[0-9]+]], v0, #0 ; CHECK-NEXT: xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]] -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]] ; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 @@ -55,9 +55,9 @@ bb2: define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) { ; CHECK-LABEL: icmp_constfold_v4i32: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]] ; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0 @@ -71,9 +71,9 @@ bb2: define <16 x i8> @icmp_v16i8(<16 x i8> %a) { ; CHECK-LABEL: icmp_v16i8: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: cmeq.16b [[CMP:v[0-9]+]], v0, #0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]] ; CHECK-NEXT: ret @@ -86,9 +86,9 @@ bb2: define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) { ; CHECK-LABEL: icmp_constfold_v16i8: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d [[CMP:v[0-9]+]], #0xffffffffffffffff -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #1 ; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]] ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll index 7ef625abab20..f03955c4dcd3 100644 --- a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -6,7 +6,7 @@ ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] -; CHECK-NEXT: // BB#2: +; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: stlxr [[STATUS]], w2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: @@ -25,14 +25,14 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { } ; CHECK-LABEL: cmpxchg_acq_rel_32_load: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: ; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] -; CHECK-NEXT: // BB#2: +; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: @@ -57,7 +57,7 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 ; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], x1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] -; CHECK-NEXT: // BB#2: +; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: stlxr [[STATUS]], x2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll index 07595a954db0..290e0c918ade 100644 --- a/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -8,8 +8,8 @@ define void @test(i64 %a, i8* %b) { %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %b, i64 8, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll index 9201d1be6a9c..a17a2564b4fe 100644 --- a/test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: not llc -O0 -mtriple=aarch64-apple-ios -o /dev/null -fast-isel-abort=3 %s 2> %t +; RUN: llc -O0 -fast-isel -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: not llc -O0 -mtriple=aarch64-apple-ios -o /dev/null -fast-isel -fast-isel-abort=3 %s 2> %t ; RUN: FileCheck %s --check-prefix=CHECK-ERRORS < %t ; The issue here is that FastISel cannot emit an ADDrr where one of the inputs diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll index e52b601b1454..aeafc127494b 100644 --- a/test/CodeGen/AArch64/fcvt-int.ll +++ b/test/CodeGen/AArch64/fcvt-int.ll @@ -152,7 +152,7 @@ define double @test_bitcasti64todouble(i64 %in) { define double @bitcast_fabs(double %x) { ; CHECK-LABEL: bitcast_fabs: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: fabs d0, d0 ; CHECK-NEXT: ret ; @@ -164,7 +164,7 @@ define double @bitcast_fabs(double %x) { define float @bitcast_fneg(float %x) { ; CHECK-LABEL: bitcast_fneg: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: fneg s0, s0 ; CHECK-NEXT: ret ; diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll index 4d9cb21ddc3d..f74e9c350942 100644 --- a/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -12,7 +12,7 @@ define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { %tst1 = icmp ugt i32 %lhs32, %rhs32 %val1 = select i1 %tst1, float 0.0, float 1.0 store float %val1, float* @varfloat -; CHECK-DAG: movi v[[FLT0:[0-9]+]].2d, #0 +; CHECK-DAG: fmov s[[FLT0:[0-9]+]], wzr ; CHECK-DAG: fmov s[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{s[0-9]+}}, s[[FLT0]], s[[FLT1]], hi diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index fbdd8f984e8c..32881e6522be 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -295,18 +295,12 @@ define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne -; CHECK-CVT-DAG: csetm {{.*}}, ne +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmeq +; CHECK-CVT: mvn +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_une: ; CHECK-FP16-NOT: fcvt @@ -325,22 +319,14 @@ define <4 x i1> @test_fcmp_une(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], eq -; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], eq -; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, vc -; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, vc +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ueq: ; CHECK-FP16-NOT: fcvt @@ -359,18 +345,12 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi -; CHECK-CVT-DAG: csetm {{.*}}, hi +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ugt: ; CHECK-FP16-NOT: fcvt @@ -389,18 +369,12 @@ define <4 x i1> @test_fcmp_ugt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl -; CHECK-CVT-DAG: csetm {{.*}}, pl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_uge: ; CHECK-FP16-NOT: fcvt @@ -419,18 +393,12 @@ define <4 x i1> @test_fcmp_uge(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt -; CHECK-CVT-DAG: csetm {{.*}}, lt +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ult: ; CHECK-FP16-NOT: fcvt @@ -449,18 +417,12 @@ define <4 x i1> @test_fcmp_ult(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le -; CHECK-CVT-DAG: csetm {{.*}}, le +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ule: ; CHECK-FP16-NOT: fcvt @@ -479,18 +441,14 @@ define <4 x i1> @test_fcmp_ule(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs -; CHECK-CVT-DAG: csetm {{.*}}, vs +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: mvn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_uno: ; CHECK-FP16-NOT: fcvt @@ -509,22 +467,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm [[REG1:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG2:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG3:w[0-9]+]], mi -; CHECK-CVT-DAG: csetm [[REG4:w[0-9]+]], mi -; CHECK-CVT-DAG: csinv {{.*}}, [[REG1]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG2]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG3]], wzr, le -; CHECK-CVT-DAG: csinv {{.*}}, [[REG4]], wzr, le +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_one: ; CHECK-FP16-NOT: fcvt @@ -543,18 +492,11 @@ define <4 x i1> @test_fcmp_one(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq -; CHECK-CVT-DAG: csetm {{.*}}, eq +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmeq +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_oeq: ; CHECK-FP16-NOT: fcvt @@ -573,18 +515,11 @@ define <4 x i1> @test_fcmp_oeq(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt -; CHECK-CVT-DAG: csetm {{.*}}, gt +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ogt: ; CHECK-FP16-NOT: fcvt @@ -603,18 +538,11 @@ define <4 x i1> @test_fcmp_ogt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge -; CHECK-CVT-DAG: csetm {{.*}}, ge +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_oge: ; CHECK-FP16-NOT: fcvt @@ -633,18 +561,11 @@ define <4 x i1> @test_fcmp_oge(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi -; CHECK-CVT-DAG: csetm {{.*}}, mi +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmgt +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_olt: ; CHECK-FP16-NOT: fcvt @@ -663,18 +584,11 @@ define <4 x i1> @test_fcmp_olt(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls -; CHECK-CVT-DAG: csetm {{.*}}, ls +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ole: ; CHECK-FP16-NOT: fcvt @@ -693,18 +607,13 @@ define <4 x i1> @test_fcmp_ole(<4 x half> %a, <4 x half> %b) #0 { define <4 x i1> @test_fcmp_ord(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: fcvt -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc -; CHECK-CVT-DAG: csetm {{.*}}, vc +; CHECK-CVT: fcvtl +; CHECK-CVT: fcvtl +; CHECK-CVT: fcmge +; CHECK-CVT: fcmgt +; CHECK-CVT: orr +; CHECK-CVT: xtn +; CHECK-CVT: ret ; CHECK-FP16-LABEL: test_fcmp_ord: ; CHECK-FP16-NOT: fcvt diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index cf6545dab385..824a18939402 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -186,11 +186,11 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define i32 @test_extern() { ; CHECK-LABEL: test_extern: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 undef, i32 undef, i1 0) ; CHECK: bl memcpy ret i32 0 } diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll index 54d38a91c387..42bcade98354 100644 --- a/test/CodeGen/AArch64/func-calls.ll +++ b/test/CodeGen/AArch64/func-calls.ll @@ -63,10 +63,10 @@ define void @simple_rets() { store [2 x i64] %arr, [2 x i64]* @varsmallstruct ; CHECK: bl return_smallstruct ; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct +; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct ; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]] call void @return_large_struct(%myStruct* sret @varstruct) -; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct ; CHECK: bl return_large_struct ret void diff --git a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll index 1cffbf3de052..80c83bd4823e 100644 --- a/test/CodeGen/AArch64/i128-fast-isel-fallback.ll +++ b/test/CodeGen/AArch64/i128-fast-isel-fallback.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -mtriple=arm64-apple-ios7.0 -mcpu=generic < %s | FileCheck %s ; Function Attrs: nounwind ssp define void @test1() { diff --git a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll index 35117a147eeb..951bd4ada3c9 100644 --- a/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll +++ b/test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll @@ -115,11 +115,11 @@ entry: %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4 %1 = bitcast i8* %C to i64* store i64 0, i64* %1, align 4 - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 8, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/ldst-opt.mir b/test/CodeGen/AArch64/ldst-opt.mir index 9cb9528cc62e..57ac50f6ee52 100644 --- a/test/CodeGen/AArch64/ldst-opt.mir +++ b/test/CodeGen/AArch64/ldst-opt.mir @@ -172,7 +172,7 @@ body: | STRXui %x0, %sp, 0 :: (store 8) STRXui killed %x0, %sp, 2 :: (store 8) %x0 = LDRXui %sp, 0 :: (load 8) - BL $bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp + BL &bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp RET %lr ... # CHECK-LABEL: name: promote-load-from-store-trivial-kills @@ -180,4 +180,4 @@ body: | # CHECK: STRXui %x0, %sp, 2 # CHECK-NOT: LDRXui # CHECK-NOT: ORR -# CHECK: BL $bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp +# CHECK: BL &bar, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit-def %sp diff --git a/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/test/CodeGen/AArch64/ldst-paired-aliasing.ll index 9c698b5fdcc6..0f8ffb50c8d9 100644 --- a/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -5,7 +5,7 @@ target triple = "aarch64--linux-gnu" declare void @f(i8*, i8*) declare void @f2(i8*, i8*) declare void @_Z5setupv() -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 define i32 @main() local_unnamed_addr #1 { ; Make sure the stores happen in the correct order (the exact instructions could change). @@ -24,7 +24,7 @@ for.body.lr.ph.i.i.i.i.i.i63: tail call void @_Z5setupv() %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6 %x3 = bitcast i32* %x2 to i8* - call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %x3, i8 0, i64 16, i1 false) %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0 %x4 = bitcast [10 x i32]* %b1 to <4 x i32>* store <4 x i32> , <4 x i32>* %x4, align 16 diff --git a/test/CodeGen/AArch64/ldst-zero.ll b/test/CodeGen/AArch64/ldst-zero.ll index 7d443a631f91..0ada6fd84cbf 100644 --- a/test/CodeGen/AArch64/ldst-zero.ll +++ b/test/CodeGen/AArch64/ldst-zero.ll @@ -3,7 +3,7 @@ ; Tests to check that zero stores which are generated as STP xzr, xzr aren't ; scheduled incorrectly due to incorrect alias information -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; Original test case which exhibited the bug @@ -14,7 +14,7 @@ define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll index 6e33ab2d0beb..a479572d2a31 100644 --- a/test/CodeGen/AArch64/local_vars.ll +++ b/test/CodeGen/AArch64/local_vars.ll @@ -17,7 +17,7 @@ declare void @foo() define void @trivial_func() nounwind { ; CHECK-LABEL: trivial_func: // @trivial_func -; CHECK-NEXT: // BB#0 +; CHECK-NEXT: // %bb.0 ; CHECK-NEXT: ret ret void diff --git a/test/CodeGen/AArch64/loh.mir b/test/CodeGen/AArch64/loh.mir index 6e4bb5cfaee6..ee62c339cf08 100644 --- a/test/CodeGen/AArch64/loh.mir +++ b/test/CodeGen/AArch64/loh.mir @@ -22,14 +22,14 @@ tracksRegLiveness: true body: | bb.0: ; CHECK: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g4 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X0 = ADRP - ; CHECK-NEXT: %X0 = ADRP + ; CHECK-NEXT: %x0 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: %x0 = ADRP target-flags(aarch64-page) @g1 %x0 = ADRP target-flags(aarch64-page) @g0 %x0 = ADRP target-flags(aarch64-page) @g1 %x1 = ADRP target-flags(aarch64-page) @g2 @@ -38,11 +38,11 @@ body: | bb.1: ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X20 = ADRP - ; CHECK-NEXT: %X3 = ADDXri %X20, + ; CHECK-NEXT: %x20 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0 ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADDXri %X1, + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g0 + ; CHECK-NEXT: %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0 %x1 = ADRP target-flags(aarch64-page) @g0 %x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0 @@ -73,11 +73,11 @@ body: | bb.5: ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %S6 = LDRSui %X5, + ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X4 = ADRP - ; CHECK-NEXT: %X4 = LDRXui %X4, + ; CHECK-NEXT: %x4 = ADRP target-flags(aarch64-page) @g2 + ; CHECK-NEXT: %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 %x4 = ADRP target-flags(aarch64-page) @g2 %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 %x5 = ADRP target-flags(aarch64-page) @g2 @@ -85,11 +85,11 @@ body: | bb.6: ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X6 = LDRXui %X5, + ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + ; CHECK-NEXT: %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %X4 = ADRP - ; CHECK-NEXT: %X4 = LDRXui %X4, + ; CHECK-NEXT: %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + ; CHECK-NEXT: %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 @@ -104,24 +104,24 @@ body: | bb.8: ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr: - ; CHECK-NEXT: %X7 = ADRP [TF=1] - ; CHECK-NEXT: %X8 = ADDXri %X7, - ; CHECK-NEXT: %D1 = LDRDui %X8, 8 + ; CHECK-NEXT: %x7 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: %d1 = LDRDui %x8, 8 %x7 = ADRP target-flags(aarch64-page) @g3 %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0 %d1 = LDRDui %x8, 8 bb.9: ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X3 = ADRP - ; CHECK-NEXT: %X3 = ADDXri %X3, + ; CHECK-NEXT: %x3 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X2 = ADDXri %X5, + ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpAddStr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADDXri %X1, - ; CHECK-NEXT: STRXui %XZR, %X1, 16 + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3 + ; CHECK-NEXT: STRXui %xzr, %x1, 16 %x1 = ADRP target-flags(aarch64-page) @g3 %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0 STRXui %xzr, %x1, 16 @@ -138,12 +138,12 @@ body: | bb.10: ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X2 = ADRP - ; CHECK-NEXT: %X2 = LDRXui %X2, + ; CHECK-NEXT: %x2 = ADRP target-flags(aarch64-page) @g3 + ; CHECK-NEXT: %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = LDRXui %X1, - ; CHECK-NEXT: %X1 = LDRXui %X1, 24 + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + ; CHECK-NEXT: %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + ; CHECK-NEXT: %x1 = LDRXui %x1, 24 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 %x1 = LDRXui %x1, 24 @@ -154,12 +154,12 @@ body: | bb.11: ; CHECK-NEXT: Adding MCLOH_AdrpLdr - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X5 = LDRXui %X5, + ; CHECK-NEXT: %x5 = ADRP target-flags(aarch64-page) @g1 + ; CHECK-NEXT: %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = LDRXui %X1, - ; CHECK-NEXT: STRXui %XZR, %X1, 32 + ; CHECK-NEXT: %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + ; CHECK-NEXT: %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + ; CHECK-NEXT: STRXui %xzr, %x1, 32 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 STRXui %xzr, %x1, 32 @@ -171,9 +171,9 @@ body: | bb.12: ; CHECK-NOT: MCLOH_AdrpAdrp ; CHECK: Adding MCLOH_AdrpAddLdr - ; %X9 = ADRP - ; %X9 = ADDXri %X9, - ; %X5 = LDRXui %X9, 0 + ; %x9 = ADRP @g4 + ; %x9 = ADDXri %x9, @g4 + ; %x5 = LDRXui %x9, 0 %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4 %x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0 %x5 = LDRXui %x9, 0 diff --git a/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll b/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll new file mode 100644 index 000000000000..d64b51509e16 --- /dev/null +++ b/test/CodeGen/AArch64/loop-micro-op-buffer-size-t99.ll @@ -0,0 +1,124 @@ +; REQUIRES: asserts +; RUN: opt -mcpu=thunderx2t99 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; CHECK: Loop Unroll: F[foo] Loop %loop.2.header +; CHECK: Loop Size = 19 +; CHECK: Trip Count = 512 +; CHECK: Trip Multiple = 512 +; CHECK: UNROLLING loop %loop.2.header by 4 with a breakout at trip 0 +; CHECK: Merging: +; CHECK: Loop Unroll: F[foo] Loop %loop.header +; CHECK: Loop Size = 18 +; CHECK: Trip Count = 512 +; CHECK: Trip Multiple = 512 +; CHECK: UNROLLING loop %loop.header by 4 with a breakout at trip 0 +; CHECK: Merging: +; CHECK: %counter = phi i32 [ 0, %entry ], [ %inc.3, %loop.inc.3 ] +; CHECK: %val = add nuw nsw i32 %counter, 5 +; CHECK: %val1 = add nuw nsw i32 %counter, 6 +; CHECK: %val2 = add nuw nsw i32 %counter, 7 +; CHECK: %val3 = add nuw nsw i32 %counter, 8 +; CHECK: %val4 = add nuw nsw i32 %counter, 9 +; CHECK: %val5 = add nuw nsw i32 %counter, 10 +; CHECK-NOT: %val = add i32 %counter, 5 +; CHECK-NOT: %val = add i32 %counter, 6 +; CHECK-NOT: %val = add i32 %counter, 7 +; CHECK-NOT: %val = add i32 %counter, 8 +; CHECK-NOT: %val = add i32 %counter, 9 +; CHECK-NOT: %val = add i32 %counter, 10 +; CHECK: %counter.2 = phi i32 [ 0, %exit.0 ], [ %inc.2.3, %loop.2.inc.3 ] + +define void @foo(i32 * %out) { +entry: + %0 = alloca [1024 x i32] + %x0 = alloca [1024 x i32] + %x01 = alloca [1024 x i32] + %x02 = alloca [1024 x i32] + %x03 = alloca [1024 x i32] + %x04 = alloca [1024 x i32] + %x05 = alloca [1024 x i32] + %x06 = alloca [1024 x i32] + br label %loop.header + +loop.header: + %counter = phi i32 [0, %entry], [%inc, %loop.inc] + br label %loop.body + +loop.body: + %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter + store i32 %counter, i32* %ptr + %val = add i32 %counter, 5 + %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter + store i32 %val, i32* %xptr + %val1 = add i32 %counter, 6 + %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter + store i32 %val1, i32* %xptr1 + %val2 = add i32 %counter, 7 + %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter + store i32 %val2, i32* %xptr2 + %val3 = add i32 %counter, 8 + %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter + store i32 %val3, i32* %xptr3 + %val4 = add i32 %counter, 9 + %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter + store i32 %val4, i32* %xptr4 + %val5 = add i32 %counter, 10 + %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter + store i32 %val5, i32* %xptr5 + br label %loop.inc + +loop.inc: + %inc = add i32 %counter, 2 + %1 = icmp sge i32 %inc, 1023 + br i1 %1, label %exit.0, label %loop.header + +exit.0: + %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5 + %3 = load i32, i32* %2 + store i32 %3, i32 * %out + br label %loop.2.header + + +loop.2.header: + %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc] + br label %loop.2.body + +loop.2.body: + %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2 + store i32 %counter.2, i32* %ptr.2 + %val.2 = add i32 %counter.2, 5 + %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2 + store i32 %val.2, i32* %xptr.2 + %val1.2 = add i32 %counter.2, 6 + %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2 + store i32 %val1, i32* %xptr1.2 + %val2.2 = add i32 %counter.2, 7 + %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2 + store i32 %val2, i32* %xptr2.2 + %val3.2 = add i32 %counter.2, 8 + %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2 + store i32 %val3.2, i32* %xptr3.2 + %val4.2 = add i32 %counter.2, 9 + %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2 + store i32 %val4.2, i32* %xptr4.2 + %val5.2 = add i32 %counter.2, 10 + %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2 + store i32 %val5.2, i32* %xptr5.2 + %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2 + store i32 %val5.2, i32* %xptr6.2 + br label %loop.2.inc + +loop.2.inc: + %inc.2 = add i32 %counter.2, 2 + %4 = icmp sge i32 %inc.2, 1023 + br i1 %4, label %exit.2, label %loop.2.header + +exit.2: + %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6 + %x3 = load i32, i32* %x2 + %out2 = getelementptr i32, i32 * %out, i32 1 + store i32 %3, i32 * %out2 + ret void +} diff --git a/test/CodeGen/AArch64/machine-combiner-madd.ll b/test/CodeGen/AArch64/machine-combiner-madd.ll index 4efe4e9cfb01..5ace6e631361 100644 --- a/test/CodeGen/AArch64/machine-combiner-madd.ll +++ b/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -19,7 +19,7 @@ %class.D = type { %class.basic_string.base, [4 x i8] } %class.basic_string.base = type <{ i64, i64, i32 }> @a = global %class.D* zeroinitializer, align 8 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define internal void @fun() section ".text.startup" { entry: %tmp.i.i = alloca %class.D, align 8 @@ -31,7 +31,7 @@ loop: %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8 %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i %d = bitcast %class.D* %arrayidx.i.i.i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 nonnull %y, i8* align 8 %d, i64 24, i1 false) %inc.i.i = add i64 %i, 1 %cmp.i.i = icmp slt i64 %inc.i.i, 0 br i1 %cmp.i.i, label %loop, label %exit diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll index 6bacf852907e..2ac87f000484 100644 --- a/test/CodeGen/AArch64/machine-copy-prop.ll +++ b/test/CodeGen/AArch64/machine-copy-prop.ll @@ -2,18 +2,18 @@ ; This file check a bug in MachineCopyPropagation pass. The last COPY will be ; incorrectly removed if the machine instructions are as follows: -; %Q5_Q6 = COPY %Q2_Q3 -; %D5 = -; %D3 = -; %D3 = COPY %D6 +; %q5_q6 = COPY %q2_q3 +; %d5 = +; %d3 = +; %d3 = COPY %d6 ; This is caused by a bug in function SourceNoLongerAvailable(), which fails to -; remove the relationship of D6 and "%Q5_Q6 = COPY %Q2_Q3". +; remove the relationship of D6 and "%q5_q6 = COPY %q2_q3". @failed = internal unnamed_addr global i1 false ; CHECK-LABEL: foo: ; CHECK: ld2 -; CHECK-NOT: // kill: D{{[0-9]+}} D{{[0-9]+}} +; CHECK-NOT: // kill: def D{{[0-9]+}} killed D{{[0-9]+}} define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) { entry: %val0 = alloca [2 x i64], align 8 diff --git a/test/CodeGen/AArch64/machine-outliner-remarks.ll b/test/CodeGen/AArch64/machine-outliner-remarks.ll index 1a237a2403ea..a5f131b5a0ca 100644 --- a/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ b/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -120,4 +120,4 @@ attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" !26 = !DILocation(line: 29, column: 9, scope: !18) !27 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 35, type: !9, isLocal: false, isDefinition: true, scopeLine: 35, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) !33 = !DILocation(line: 36, column: 1, scope: !27) -!35 = !DILocation(line: 38, column: 1, scope: !27) \ No newline at end of file +!35 = !DILocation(line: 38, column: 1, scope: !27) diff --git a/test/CodeGen/AArch64/machine-outliner.mir b/test/CodeGen/AArch64/machine-outliner.mir index 3ff2d2a3a36d..2c8a13b516ad 100644 --- a/test/CodeGen/AArch64/machine-outliner.mir +++ b/test/CodeGen/AArch64/machine-outliner.mir @@ -1,6 +1,12 @@ # RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner %s -o - | FileCheck %s --- | + @x = common global i32 0, align 4 + + define void @baz() #0 { + ret void + } + define i32 @main() #0 { ret i32 0 } @@ -16,57 +22,75 @@ # - Create outlined functions # - Don't outline anything to do with LR or W30 # - Save LR when it's not available +# - Don't outline stack instructions when we might need to save + restore +# - Functions whose addresses are taken can still be outlined # # CHECK-LABEL: name: main + # CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]] # CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: %x16 = ADDXri %sp, 48, 0 # CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 # CHECK-NEXT: %lr = ORRXri %xzr, 1 + # CHECK: BL @OUTLINED_FUNCTION_[[F0]] # CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: %x16 = ADDXri %sp, 48, 0 # CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 # CHECK-NEXT: %lr = ORRXri %xzr, 1 + # CHECK: BL @OUTLINED_FUNCTION_[[F0]] # CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: %x16 = ADDXri %sp, 48, 0 # CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 # CHECK-NEXT: %lr = ORRXri %xzr, 1 name: main +tracksRegLiveness: true body: | bb.0: %sp = frame-setup SUBXri %sp, 16, 0 + renamable %x9 = ADRP target-flags(aarch64-page) @bar %x9 = ORRXri %xzr, 1 %w16 = ORRWri %wzr, 1 %w30 = ORRWri %wzr, 1 %lr = ORRXri %xzr, 1 + %x20, %x19 = LDPXi %sp, 10 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 + renamable %x9 = ADRP target-flags(aarch64-page) @x + %x16 = ADDXri %sp, 48, 0; STRHHroW %w16, %x9, %w30, 1, 1 %lr = ORRXri %xzr, 1 - %w3 = ORRWri %wzr, 1993 + %x20, %x19 = LDPXi %sp, 10 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 + renamable %x9 = ADRP target-flags(aarch64-page) @x + %x16 = ADDXri %sp, 48, 0; STRHHroW %w16, %x9, %w30, 1, 1 - %lr = ORRXri %xzr, 1 + %lr = ORRXri %xzr, 1 %w4 = ORRWri %wzr, 1994 + %x20, %x19 = LDPXi %sp, 10 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 %w16 = ORRWri %wzr, 1 + renamable %x9 = ADRP target-flags(aarch64-page) @x + %x16 = ADDXri %sp, 48, 0; STRHHroW %w16, %x9, %w30, 1, 1 %lr = ORRXri %xzr, 1 @@ -77,6 +101,7 @@ body: | --- # This test ensures that we can avoid saving LR when it's available. # CHECK-LABEL: bb.1: +# CHECK-NOT: BL @baz, implicit-def dead %lr, implicit %sp # CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def %lr, implicit %sp # CHECK-NEXT: %w17 = ORRWri %wzr, 2 # CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def %lr, implicit %sp @@ -90,23 +115,48 @@ body: | %fp = frame-setup ADDXri %sp, 16, 0 bb.1: + BL @baz, implicit-def dead %lr, implicit %sp %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 + BL @baz, implicit-def dead %lr, implicit %sp %w17 = ORRWri %wzr, 2 + BL @baz, implicit-def dead %lr, implicit %sp %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 %w17 = ORRWri %wzr, 1 + BL @baz, implicit-def dead %lr, implicit %sp %w8 = ORRWri %wzr, 0 - + bb.2: + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %x15 = ADDXri %sp, 48, 0; + %w9 = ORRWri %wzr, 0 + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %w15 = ORRWri %wzr, 1 + %x15 = ADDXri %sp, 48, 0; + %w8 = ORRWri %wzr, 0 + + bb.3: %fp, %lr = LDPXi %sp, 2 %sp = ADDXri %sp, 32, 0 RET undef %lr ... +--- +name: baz +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %lr, %w8 + RET undef %lr # CHECK-LABEL: name: OUTLINED_FUNCTION_{{[0-9]}} # CHECK=LABEL: name: OUTLINED_FUNCTION_{{[1-9]}} diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll index 070502052fff..9a0179ecc1b8 100644 --- a/test/CodeGen/AArch64/max-jump-table.ll +++ b/test/CodeGen/AArch64/max-jump-table.ll @@ -28,19 +28,19 @@ entry: ] ; CHECK-LABEL: function jt1: ; CHECK-NEXT: Jump Tables: -; CHECK0-NEXT: jt#0: -; CHECK0-NOT: jt#1: -; CHECK4-NEXT: jt#0: -; CHECK4-SAME: jt#1: -; CHECK4-SAME: jt#2: -; CHECK4-SAME: jt#3: -; CHECK4-NOT: jt#4: -; CHECK8-NEXT: jt#0: -; CHECK8-SAME: jt#1: -; CHECK8-NOT: jt#2: -; CHECKM1-NEXT: jt#0: -; CHECKM1-SAME: jt#1 -; CHECKM1-NOT: jt#2: +; CHECK0-NEXT: %jump-table.0: +; CHECK0-NOT: %jump-table.1: +; CHECK4-NEXT: %jump-table.0: +; CHECK4-SAME: %jump-table.1: +; CHECK4-SAME: %jump-table.2: +; CHECK4-SAME: %jump-table.3: +; CHECK4-NOT: %jump-table.4: +; CHECK8-NEXT: %jump-table.0: +; CHECK8-SAME: %jump-table.1: +; CHECK8-NOT: %jump-table.2: +; CHECKM1-NEXT: %jump-table.0: +; CHECKM1-SAME: %jump-table.1 +; CHECKM1-NOT: %jump-table.2: ; CHEC-NEXT: Function Live Ins: bb1: tail call void @ext(i32 0) br label %return @@ -77,10 +77,10 @@ entry: ] ; CHECK-LABEL: function jt2: ; CHECK-NEXT: Jump Tables: -; CHECK0-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#7 BB#5 BB#6{{$}} -; CHECK4-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} -; CHECK8-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} -; CHECKM1-NEXT: jt#0: BB#1 BB#2 BB#3 BB#4{{$}} +; CHECK0-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.7 %bb.5 %bb.6{{$}} +; CHECK4-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECK8-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} +; CHECKM1-NEXT: %jump-table.0: %bb.1 %bb.2 %bb.3 %bb.4{{$}} ; CHEC-NEXT: Function Live Ins: bb1: tail call void @ext(i32 1) br label %return diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll index 7e6ec36104ab..8b91b8431087 100644 --- a/test/CodeGen/AArch64/memcpy-f128.ll +++ b/test/CodeGen/AArch64/memcpy-f128.ll @@ -12,8 +12,8 @@ define void @test1() { ; CHECK: str q0 ; CHECK: ret entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/CodeGen/AArch64/merge-store-dependency.ll b/test/CodeGen/AArch64/merge-store-dependency.ll index 4f2af9ed7e65..5bed63ef895f 100644 --- a/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/test/CodeGen/AArch64/merge-store-dependency.ll @@ -14,7 +14,7 @@ entry: ; A53: str [[DATA]], {{.*}} %0 = bitcast %struct1* %fde to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false) %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 store i16 256, i16* %state, align 8 %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 @@ -58,6 +58,6 @@ exit: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) declare i32 @fcntl(i32, i32, ...) declare noalias i8* @foo() diff --git a/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll b/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll index 74aeaf75d037..cd64ae11550c 100644 --- a/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll +++ b/test/CodeGen/AArch64/mergestores_noimplicitfloat.ll @@ -16,8 +16,8 @@ target triple = "arm64-apple-ios10.0.0" ; CHECK-DAG: str [[R3]], [x0, #24] define void @pr33475(i8* %p0, i8* %p1) noimplicitfloat { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p0, i8* %p1, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p0, i8* align 4 %p1, i64 32, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/test/CodeGen/AArch64/min-jump-table.ll b/test/CodeGen/AArch64/min-jump-table.ll index 80974debc48a..b22e683ebfed 100644 --- a/test/CodeGen/AArch64/min-jump-table.ll +++ b/test/CodeGen/AArch64/min-jump-table.ll @@ -12,8 +12,8 @@ entry: ] ; CHECK-LABEL: function jt2: ; CHECK0-NEXT: Jump Tables: -; CHECK0-NEXT: jt#0: -; CHECK0-NOT: jt#1: +; CHECK0-NEXT: %jump-table.0: +; CHECK0-NOT: %jump-table.1: ; CHECK4-NOT: Jump Tables: ; CHECK8-NOT: Jump Tables: @@ -33,11 +33,11 @@ entry: ] ; CHECK-LABEL: function jt4: ; CHECK0-NEXT: Jump Tables: -; CHECK0-NEXT: jt#0: -; CHECK0-NOT: jt#1: +; CHECK0-NEXT: %jump-table.0: +; CHECK0-NOT: %jump-table.1: ; CHECK4-NEXT: Jump Tables: -; CHECK4-NEXT: jt#0: -; CHECK4-NOT: jt#1: +; CHECK4-NEXT: %jump-table.0: +; CHECK4-NOT: %jump-table.1: ; CHECK8-NOT: Jump Tables: bb1: tail call void @ext(i32 0) br label %return @@ -62,8 +62,8 @@ entry: ] ; CHECK-LABEL: function jt8: ; CHECK-NEXT: Jump Tables: -; CHECK-NEXT: jt#0: -; CHECK-NOT: jt#1: +; CHECK-NEXT: %jump-table.0: +; CHECK-NOT: %jump-table.1: bb1: tail call void @ext(i32 0) br label %return bb2: tail call void @ext(i32 2) br label %return diff --git a/test/CodeGen/AArch64/minmax-of-minmax.ll b/test/CodeGen/AArch64/minmax-of-minmax.ll new file mode 100644 index 000000000000..9257832d4c4b --- /dev/null +++ b/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -0,0 +1,2441 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +; There are 4 commuted variants (abbc/abcb/bcab/bcba) * +; 4 predicate variants ([*][lg][te]) * +; 4 min/max flavors (smin/smax/umin/umax) * +; 2 notted variants +; = 128 tests + +define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @smax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: smax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umin_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %c, %a + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ab_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @umax_bc_ba_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: umax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %a, %c + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp slt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp slt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp slt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp slt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sgt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sgt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp slt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp slt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp sgt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp sle <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp sgt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp sgt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp sgt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp sle <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ult <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ult <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ult <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ult <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ugt <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ugt <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ult <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ult <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp uge <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_eq_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp uge <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ac = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_bc + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_cb = icmp ugt <4 x i32> %c, %b + %min_cb = select <4 x i1> %cmp_cb, <4 x i32> %c, <4 x i32> %b + %cmp_ac = icmp ule <4 x i32> %x, %z + %r = select <4 x i1> %cmp_ac, <4 x i32> %min_ab, <4 x i32> %min_cb + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ab = icmp ugt <4 x i32> %a, %b + %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b + %cmp_ca = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ab + ret <4 x i32> %r +} + +define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ret + %a = xor <4 x i32> %x, + %b = xor <4 x i32> %y, + %c = xor <4 x i32> %z, + %cmp_bc = icmp ugt <4 x i32> %b, %c + %min_bc = select <4 x i1> %cmp_bc, <4 x i32> %b, <4 x i32> %c + %cmp_ba = icmp ugt <4 x i32> %b, %a + %min_ba = select <4 x i1> %cmp_ba, <4 x i32> %b, <4 x i32> %a + %cmp_ca = icmp ule <4 x i32> %z, %x + %r = select <4 x i1> %cmp_ca, <4 x i32> %min_bc, <4 x i32> %min_ba + ret <4 x i32> %r +} + diff --git a/test/CodeGen/AArch64/misched-stp.ll b/test/CodeGen/AArch64/misched-stp.ll index 1c9ea68834c2..1afec40f1921 100644 --- a/test/CodeGen/AArch64/misched-stp.ll +++ b/test/CodeGen/AArch64/misched-stp.ll @@ -30,7 +30,7 @@ entry: ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; CHECK-LABEL: test_zero @@ -41,7 +41,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 diff --git a/test/CodeGen/AArch64/neon-bitcast.ll b/test/CodeGen/AArch64/neon-bitcast.ll index 61099d48fdd2..8f67ff83ae12 100644 --- a/test/CodeGen/AArch64/neon-bitcast.ll +++ b/test/CodeGen/AArch64/neon-bitcast.ll @@ -4,7 +4,7 @@ define <1 x i64> @test_v8i8_to_v1i64(<8 x i8> %in) nounwind { ; CHECK: test_v8i8_to_v1i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i8> %in to <1 x i64> @@ -13,7 +13,7 @@ define <1 x i64> @test_v8i8_to_v1i64(<8 x i8> %in) nounwind { define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind { ; CHECK: test_v8i8_to_v2i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i8> %in to <2 x i32> @@ -22,7 +22,7 @@ define <2 x i32> @test_v8i8_to_v2i32(<8 x i8> %in) nounwind { define <2 x float> @test_v8i8_to_v2f32(<8 x i8> %in) nounwind{ ; CHECK: test_v8i8_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i8> %in to <2 x float> @@ -31,7 +31,7 @@ define <2 x float> @test_v8i8_to_v2f32(<8 x i8> %in) nounwind{ define <4 x i16> @test_v8i8_to_v4i16(<8 x i8> %in) nounwind{ ; CHECK: test_v8i8_to_v4i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i8> %in to <4 x i16> @@ -40,7 +40,7 @@ define <4 x i16> @test_v8i8_to_v4i16(<8 x i8> %in) nounwind{ define <8 x i8> @test_v8i8_to_v8i8(<8 x i8> %in) nounwind{ ; CHECK: test_v8i8_to_v8i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i8> %in to <8 x i8> @@ -51,7 +51,7 @@ define <8 x i8> @test_v8i8_to_v8i8(<8 x i8> %in) nounwind{ define <1 x i64> @test_v4i16_to_v1i64(<4 x i16> %in) nounwind { ; CHECK: test_v4i16_to_v1i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i16> %in to <1 x i64> @@ -60,7 +60,7 @@ define <1 x i64> @test_v4i16_to_v1i64(<4 x i16> %in) nounwind { define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind { ; CHECK: test_v4i16_to_v2i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i16> %in to <2 x i32> @@ -69,7 +69,7 @@ define <2 x i32> @test_v4i16_to_v2i32(<4 x i16> %in) nounwind { define <2 x float> @test_v4i16_to_v2f32(<4 x i16> %in) nounwind{ ; CHECK: test_v4i16_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i16> %in to <2 x float> @@ -78,7 +78,7 @@ define <2 x float> @test_v4i16_to_v2f32(<4 x i16> %in) nounwind{ define <4 x i16> @test_v4i16_to_v4i16(<4 x i16> %in) nounwind{ ; CHECK: test_v4i16_to_v4i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i16> %in to <4 x i16> @@ -87,7 +87,7 @@ define <4 x i16> @test_v4i16_to_v4i16(<4 x i16> %in) nounwind{ define <8 x i8> @test_v4i16_to_v8i8(<4 x i16> %in) nounwind{ ; CHECK: test_v4i16_to_v8i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i16> %in to <8 x i8> @@ -98,7 +98,7 @@ define <8 x i8> @test_v4i16_to_v8i8(<4 x i16> %in) nounwind{ define <1 x i64> @test_v2i32_to_v1i64(<2 x i32> %in) nounwind { ; CHECK: test_v2i32_to_v1i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i32> %in to <1 x i64> @@ -107,7 +107,7 @@ define <1 x i64> @test_v2i32_to_v1i64(<2 x i32> %in) nounwind { define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind { ; CHECK: test_v2i32_to_v2i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i32> %in to <2 x i32> @@ -116,7 +116,7 @@ define <2 x i32> @test_v2i32_to_v2i32(<2 x i32> %in) nounwind { define <2 x float> @test_v2i32_to_v2f32(<2 x i32> %in) nounwind{ ; CHECK: test_v2i32_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i32> %in to <2 x float> @@ -125,7 +125,7 @@ define <2 x float> @test_v2i32_to_v2f32(<2 x i32> %in) nounwind{ define <4 x i16> @test_v2i32_to_v4i16(<2 x i32> %in) nounwind{ ; CHECK: test_v2i32_to_v4i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i32> %in to <4 x i16> @@ -134,7 +134,7 @@ define <4 x i16> @test_v2i32_to_v4i16(<2 x i32> %in) nounwind{ define <8 x i8> @test_v2i32_to_v8i8(<2 x i32> %in) nounwind{ ; CHECK: test_v2i32_to_v8i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i32> %in to <8 x i8> @@ -145,7 +145,7 @@ define <8 x i8> @test_v2i32_to_v8i8(<2 x i32> %in) nounwind{ define <1 x i64> @test_v2f32_to_v1i64(<2 x float> %in) nounwind { ; CHECK: test_v2f32_to_v1i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x float> %in to <1 x i64> @@ -154,7 +154,7 @@ define <1 x i64> @test_v2f32_to_v1i64(<2 x float> %in) nounwind { define <2 x i32> @test_v2f32_to_v2i32(<2 x float> %in) nounwind { ; CHECK: test_v2f32_to_v2i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x float> %in to <2 x i32> @@ -163,7 +163,7 @@ define <2 x i32> @test_v2f32_to_v2i32(<2 x float> %in) nounwind { define <2 x float> @test_v2f32_to_v2f32(<2 x float> %in) nounwind{ ; CHECK: test_v2f32_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x float> %in to <2 x float> @@ -172,7 +172,7 @@ define <2 x float> @test_v2f32_to_v2f32(<2 x float> %in) nounwind{ define <4 x i16> @test_v2f32_to_v4i16(<2 x float> %in) nounwind{ ; CHECK: test_v2f32_to_v4i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x float> %in to <4 x i16> @@ -181,7 +181,7 @@ define <4 x i16> @test_v2f32_to_v4i16(<2 x float> %in) nounwind{ define <8 x i8> @test_v2f32_to_v8i8(<2 x float> %in) nounwind{ ; CHECK: test_v2f32_to_v8i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x float> %in to <8 x i8> @@ -192,7 +192,7 @@ define <8 x i8> @test_v2f32_to_v8i8(<2 x float> %in) nounwind{ define <1 x i64> @test_v1i64_to_v1i64(<1 x i64> %in) nounwind { ; CHECK: test_v1i64_to_v1i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <1 x i64> %in to <1 x i64> @@ -201,7 +201,7 @@ define <1 x i64> @test_v1i64_to_v1i64(<1 x i64> %in) nounwind { define <2 x i32> @test_v1i64_to_v2i32(<1 x i64> %in) nounwind { ; CHECK: test_v1i64_to_v2i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <1 x i64> %in to <2 x i32> @@ -210,7 +210,7 @@ define <2 x i32> @test_v1i64_to_v2i32(<1 x i64> %in) nounwind { define <2 x float> @test_v1i64_to_v2f32(<1 x i64> %in) nounwind{ ; CHECK: test_v1i64_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <1 x i64> %in to <2 x float> @@ -219,7 +219,7 @@ define <2 x float> @test_v1i64_to_v2f32(<1 x i64> %in) nounwind{ define <4 x i16> @test_v1i64_to_v4i16(<1 x i64> %in) nounwind{ ; CHECK: test_v1i64_to_v4i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <1 x i64> %in to <4 x i16> @@ -228,7 +228,7 @@ define <4 x i16> @test_v1i64_to_v4i16(<1 x i64> %in) nounwind{ define <8 x i8> @test_v1i64_to_v8i8(<1 x i64> %in) nounwind{ ; CHECK: test_v1i64_to_v8i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <1 x i64> %in to <8 x i8> @@ -240,7 +240,7 @@ define <8 x i8> @test_v1i64_to_v8i8(<1 x i64> %in) nounwind{ define <2 x double> @test_v16i8_to_v2f64(<16 x i8> %in) nounwind { ; CHECK: test_v16i8_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <2 x double> @@ -249,7 +249,7 @@ define <2 x double> @test_v16i8_to_v2f64(<16 x i8> %in) nounwind { define <2 x i64> @test_v16i8_to_v2i64(<16 x i8> %in) nounwind { ; CHECK: test_v16i8_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <2 x i64> @@ -258,7 +258,7 @@ define <2 x i64> @test_v16i8_to_v2i64(<16 x i8> %in) nounwind { define <4 x i32> @test_v16i8_to_v4i32(<16 x i8> %in) nounwind { ; CHECK: test_v16i8_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <4 x i32> @@ -267,7 +267,7 @@ define <4 x i32> @test_v16i8_to_v4i32(<16 x i8> %in) nounwind { define <4 x float> @test_v16i8_to_v2f32(<16 x i8> %in) nounwind{ ; CHECK: test_v16i8_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <4 x float> @@ -276,7 +276,7 @@ define <4 x float> @test_v16i8_to_v2f32(<16 x i8> %in) nounwind{ define <8 x i16> @test_v16i8_to_v8i16(<16 x i8> %in) nounwind{ ; CHECK: test_v16i8_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <8 x i16> @@ -285,7 +285,7 @@ define <8 x i16> @test_v16i8_to_v8i16(<16 x i8> %in) nounwind{ define <16 x i8> @test_v16i8_to_v16i8(<16 x i8> %in) nounwind{ ; CHECK: test_v16i8_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <16 x i8> %in to <16 x i8> @@ -296,7 +296,7 @@ define <16 x i8> @test_v16i8_to_v16i8(<16 x i8> %in) nounwind{ define <2 x double> @test_v8i16_to_v2f64(<8 x i16> %in) nounwind { ; CHECK: test_v8i16_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <2 x double> @@ -305,7 +305,7 @@ define <2 x double> @test_v8i16_to_v2f64(<8 x i16> %in) nounwind { define <2 x i64> @test_v8i16_to_v2i64(<8 x i16> %in) nounwind { ; CHECK: test_v8i16_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <2 x i64> @@ -314,7 +314,7 @@ define <2 x i64> @test_v8i16_to_v2i64(<8 x i16> %in) nounwind { define <4 x i32> @test_v8i16_to_v4i32(<8 x i16> %in) nounwind { ; CHECK: test_v8i16_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <4 x i32> @@ -323,7 +323,7 @@ define <4 x i32> @test_v8i16_to_v4i32(<8 x i16> %in) nounwind { define <4 x float> @test_v8i16_to_v2f32(<8 x i16> %in) nounwind{ ; CHECK: test_v8i16_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <4 x float> @@ -332,7 +332,7 @@ define <4 x float> @test_v8i16_to_v2f32(<8 x i16> %in) nounwind{ define <8 x i16> @test_v8i16_to_v8i16(<8 x i16> %in) nounwind{ ; CHECK: test_v8i16_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <8 x i16> @@ -341,7 +341,7 @@ define <8 x i16> @test_v8i16_to_v8i16(<8 x i16> %in) nounwind{ define <16 x i8> @test_v8i16_to_v16i8(<8 x i16> %in) nounwind{ ; CHECK: test_v8i16_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <8 x i16> %in to <16 x i8> @@ -352,7 +352,7 @@ define <16 x i8> @test_v8i16_to_v16i8(<8 x i16> %in) nounwind{ define <2 x double> @test_v4i32_to_v2f64(<4 x i32> %in) nounwind { ; CHECK: test_v4i32_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <2 x double> @@ -361,7 +361,7 @@ define <2 x double> @test_v4i32_to_v2f64(<4 x i32> %in) nounwind { define <2 x i64> @test_v4i32_to_v2i64(<4 x i32> %in) nounwind { ; CHECK: test_v4i32_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <2 x i64> @@ -370,7 +370,7 @@ define <2 x i64> @test_v4i32_to_v2i64(<4 x i32> %in) nounwind { define <4 x i32> @test_v4i32_to_v4i32(<4 x i32> %in) nounwind { ; CHECK: test_v4i32_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <4 x i32> @@ -379,7 +379,7 @@ define <4 x i32> @test_v4i32_to_v4i32(<4 x i32> %in) nounwind { define <4 x float> @test_v4i32_to_v2f32(<4 x i32> %in) nounwind{ ; CHECK: test_v4i32_to_v2f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <4 x float> @@ -388,7 +388,7 @@ define <4 x float> @test_v4i32_to_v2f32(<4 x i32> %in) nounwind{ define <8 x i16> @test_v4i32_to_v8i16(<4 x i32> %in) nounwind{ ; CHECK: test_v4i32_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <8 x i16> @@ -397,7 +397,7 @@ define <8 x i16> @test_v4i32_to_v8i16(<4 x i32> %in) nounwind{ define <16 x i8> @test_v4i32_to_v16i8(<4 x i32> %in) nounwind{ ; CHECK: test_v4i32_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x i32> %in to <16 x i8> @@ -408,7 +408,7 @@ define <16 x i8> @test_v4i32_to_v16i8(<4 x i32> %in) nounwind{ define <2 x double> @test_v4f32_to_v2f64(<4 x float> %in) nounwind { ; CHECK: test_v4f32_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <2 x double> @@ -417,7 +417,7 @@ define <2 x double> @test_v4f32_to_v2f64(<4 x float> %in) nounwind { define <2 x i64> @test_v4f32_to_v2i64(<4 x float> %in) nounwind { ; CHECK: test_v4f32_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <2 x i64> @@ -426,7 +426,7 @@ define <2 x i64> @test_v4f32_to_v2i64(<4 x float> %in) nounwind { define <4 x i32> @test_v4f32_to_v4i32(<4 x float> %in) nounwind { ; CHECK: test_v4f32_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <4 x i32> @@ -435,7 +435,7 @@ define <4 x i32> @test_v4f32_to_v4i32(<4 x float> %in) nounwind { define <4 x float> @test_v4f32_to_v4f32(<4 x float> %in) nounwind{ ; CHECK: test_v4f32_to_v4f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <4 x float> @@ -444,7 +444,7 @@ define <4 x float> @test_v4f32_to_v4f32(<4 x float> %in) nounwind{ define <8 x i16> @test_v4f32_to_v8i16(<4 x float> %in) nounwind{ ; CHECK: test_v4f32_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <8 x i16> @@ -453,7 +453,7 @@ define <8 x i16> @test_v4f32_to_v8i16(<4 x float> %in) nounwind{ define <16 x i8> @test_v4f32_to_v16i8(<4 x float> %in) nounwind{ ; CHECK: test_v4f32_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <4 x float> %in to <16 x i8> @@ -464,7 +464,7 @@ define <16 x i8> @test_v4f32_to_v16i8(<4 x float> %in) nounwind{ define <2 x double> @test_v2i64_to_v2f64(<2 x i64> %in) nounwind { ; CHECK: test_v2i64_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <2 x double> @@ -473,7 +473,7 @@ define <2 x double> @test_v2i64_to_v2f64(<2 x i64> %in) nounwind { define <2 x i64> @test_v2i64_to_v2i64(<2 x i64> %in) nounwind { ; CHECK: test_v2i64_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <2 x i64> @@ -482,7 +482,7 @@ define <2 x i64> @test_v2i64_to_v2i64(<2 x i64> %in) nounwind { define <4 x i32> @test_v2i64_to_v4i32(<2 x i64> %in) nounwind { ; CHECK: test_v2i64_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <4 x i32> @@ -491,7 +491,7 @@ define <4 x i32> @test_v2i64_to_v4i32(<2 x i64> %in) nounwind { define <4 x float> @test_v2i64_to_v4f32(<2 x i64> %in) nounwind{ ; CHECK: test_v2i64_to_v4f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <4 x float> @@ -500,7 +500,7 @@ define <4 x float> @test_v2i64_to_v4f32(<2 x i64> %in) nounwind{ define <8 x i16> @test_v2i64_to_v8i16(<2 x i64> %in) nounwind{ ; CHECK: test_v2i64_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <8 x i16> @@ -509,7 +509,7 @@ define <8 x i16> @test_v2i64_to_v8i16(<2 x i64> %in) nounwind{ define <16 x i8> @test_v2i64_to_v16i8(<2 x i64> %in) nounwind{ ; CHECK: test_v2i64_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x i64> %in to <16 x i8> @@ -520,7 +520,7 @@ define <16 x i8> @test_v2i64_to_v16i8(<2 x i64> %in) nounwind{ define <2 x double> @test_v2f64_to_v2f64(<2 x double> %in) nounwind { ; CHECK: test_v2f64_to_v2f64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <2 x double> @@ -529,7 +529,7 @@ define <2 x double> @test_v2f64_to_v2f64(<2 x double> %in) nounwind { define <2 x i64> @test_v2f64_to_v2i64(<2 x double> %in) nounwind { ; CHECK: test_v2f64_to_v2i64: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <2 x i64> @@ -538,7 +538,7 @@ define <2 x i64> @test_v2f64_to_v2i64(<2 x double> %in) nounwind { define <4 x i32> @test_v2f64_to_v4i32(<2 x double> %in) nounwind { ; CHECK: test_v2f64_to_v4i32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <4 x i32> @@ -547,7 +547,7 @@ define <4 x i32> @test_v2f64_to_v4i32(<2 x double> %in) nounwind { define <4 x float> @test_v2f64_to_v4f32(<2 x double> %in) nounwind{ ; CHECK: test_v2f64_to_v4f32: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <4 x float> @@ -556,7 +556,7 @@ define <4 x float> @test_v2f64_to_v4f32(<2 x double> %in) nounwind{ define <8 x i16> @test_v2f64_to_v8i16(<2 x double> %in) nounwind{ ; CHECK: test_v2f64_to_v8i16: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <8 x i16> @@ -565,7 +565,7 @@ define <8 x i16> @test_v2f64_to_v8i16(<2 x double> %in) nounwind{ define <16 x i8> @test_v2f64_to_v16i8(<2 x double> %in) nounwind{ ; CHECK: test_v2f64_to_v16i8: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret %val = bitcast <2 x double> %in to <16 x i8> diff --git a/test/CodeGen/AArch64/nest-register.ll b/test/CodeGen/AArch64/nest-register.ll index cc42913e10a6..b8651714be34 100644 --- a/test/CodeGen/AArch64/nest-register.ll +++ b/test/CodeGen/AArch64/nest-register.ll @@ -5,7 +5,7 @@ define i8* @nest_receiver(i8* nest %arg) nounwind { ; CHECK-LABEL: nest_receiver: -; CHECK-NEXT: // BB#0: +; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov x0, x18 ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll index d8785f845c29..adb3faa91ba3 100644 --- a/test/CodeGen/AArch64/nontemporal.ll +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -313,8 +313,8 @@ declare void @dummy(<4 x float>*) define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca: -; CHECK: stnp d0, d{{.*}}, [sp] -; CHECK-NEXT: mov x0, sp +; CHECK: mov x0, sp +; CHECK-NEXT: stnp d0, d{{.*}}, [sp] ; CHECK-NEXT: bl _dummy %tmp0 = alloca <4 x float> store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0 @@ -324,8 +324,8 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: -; CHECK: stnp d0, d{{.*}}, [sp, #16] -; CHECK-NEXT: mov x0, sp +; CHECK: mov x0, sp +; CHECK-NEXT: stnp d0, d{{.*}}, [sp, #16] ; CHECK-NEXT: bl _dummy %tmp0 = alloca <4 x float>, i32 2 %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1 diff --git a/test/CodeGen/AArch64/phi-dbg.ll b/test/CodeGen/AArch64/phi-dbg.ll index a1adf0f50d9b..80bc885afa5c 100644 --- a/test/CodeGen/AArch64/phi-dbg.ll +++ b/test/CodeGen/AArch64/phi-dbg.ll @@ -30,7 +30,7 @@ define i32 @func(i32) #0 !dbg !8 { ; CHECK: ldr w[[REG:[0-9]+]], [sp, #8] ; CHECK-NEXT: .Ltmp call void @llvm.dbg.value(metadata i32 %.0, i64 0, metadata !15, metadata !13), !dbg !16 -; CHECK-NEXT: //DEBUG_VALUE: func:c <- %W[[REG]] +; CHECK-NEXT: //DEBUG_VALUE: func:c <- %w[[REG]] %5 = add nsw i32 %.0, %0, !dbg !22 call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !15, metadata !13), !dbg !16 ret i32 %5, !dbg !23 diff --git a/test/CodeGen/AArch64/pr33172.ll b/test/CodeGen/AArch64/pr33172.ll index 1e1da78b28ff..098d5358b02d 100644 --- a/test/CodeGen/AArch64/pr33172.ll +++ b/test/CodeGen/AArch64/pr33172.ll @@ -21,12 +21,12 @@ entry: %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8 store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8 store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8 - tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2 + tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2 unreachable } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } diff --git a/test/CodeGen/AArch64/preferred-alignment.ll b/test/CodeGen/AArch64/preferred-alignment.ll index c032e83d268f..b39a5e8703d7 100644 --- a/test/CodeGen/AArch64/preferred-alignment.ll +++ b/test/CodeGen/AArch64/preferred-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64 -O0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -O0 -fast-isel < %s | FileCheck %s ; Function Attrs: nounwind define i32 @foo() #0 { diff --git a/test/CodeGen/AArch64/recp-fastmath.ll b/test/CodeGen/AArch64/recp-fastmath.ll index 4776931cf062..9f00621eff6b 100644 --- a/test/CodeGen/AArch64/recp-fastmath.ll +++ b/test/CodeGen/AArch64/recp-fastmath.ll @@ -5,7 +5,7 @@ define float @frecp0(float %x) #0 { ret float %div ; CHECK-LABEL: frecp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv } @@ -15,7 +15,7 @@ define float @frecp1(float %x) #1 { ret float %div ; CHECK-LABEL: frecp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:s[0-7]]] ; CHECK-NEXT: frecps {{s[0-7](, s[0-7])?}}, [[R]] ; CHECK: frecps {{s[0-7]}}, {{s[0-7]}}, {{s[0-7]}} @@ -27,7 +27,7 @@ define <2 x float> @f2recp0(<2 x float> %x) #0 { ret <2 x float> %div ; CHECK-LABEL: f2recp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv } @@ -37,7 +37,7 @@ define <2 x float> @f2recp1(<2 x float> %x) #1 { ret <2 x float> %div ; CHECK-LABEL: f2recp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.2s]] ; CHECK-NEXT: frecps {{v[0-7]\.2s(, v[0-7].2s)?}}, [[R]] ; CHECK: frecps {{v[0-7]\.2s}}, {{v[0-7]\.2s}}, {{v[0-7]\.2s}} @@ -49,7 +49,7 @@ define <4 x float> @f4recp0(<4 x float> %x) #0 { ret <4 x float> %div ; CHECK-LABEL: f4recp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv } @@ -59,7 +59,7 @@ define <4 x float> @f4recp1(<4 x float> %x) #1 { ret <4 x float> %div ; CHECK-LABEL: f4recp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]] ; CHECK-NEXT: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]] ; CHECK: frecps {{v[0-7]\.4s}}, {{v[0-7]\.4s}}, {{v[0-7]\.4s}} @@ -71,7 +71,7 @@ define <8 x float> @f8recp0(<8 x float> %x) #0 { ret <8 x float> %div ; CHECK-LABEL: f8recp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv ; CHECK-NEXT: fdiv @@ -82,7 +82,7 @@ define <8 x float> @f8recp1(<8 x float> %x) #1 { ret <8 x float> %div ; CHECK-LABEL: f8recp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.4s]] ; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, [[R]] ; CHECK: frecps {{v[0-7]\.4s(, v[0-7].4s)?}}, {{v[0-7]\.4s}} @@ -96,7 +96,7 @@ define double @drecp0(double %x) #0 { ret double %div ; CHECK-LABEL: drecp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv } @@ -106,7 +106,7 @@ define double @drecp1(double %x) #1 { ret double %div ; CHECK-LABEL: drecp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:d[0-7]]] ; CHECK-NEXT: frecps {{d[0-7](, d[0-7])?}}, [[R]] ; CHECK: frecps {{d[0-7]}}, {{d[0-7]}}, {{d[0-7]}} @@ -119,7 +119,7 @@ define <2 x double> @d2recp0(<2 x double> %x) #0 { ret <2 x double> %div ; CHECK-LABEL: d2recp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv } @@ -129,7 +129,7 @@ define <2 x double> @d2recp1(<2 x double> %x) #1 { ret <2 x double> %div ; CHECK-LABEL: d2recp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]] ; CHECK-NEXT: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]] ; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} @@ -142,7 +142,7 @@ define <4 x double> @d4recp0(<4 x double> %x) #0 { ret <4 x double> %div ; CHECK-LABEL: d4recp0: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: fmov ; CHECK-NEXT: fdiv ; CHECK-NEXT: fdiv @@ -153,7 +153,7 @@ define <4 x double> @d4recp1(<4 x double> %x) #1 { ret <4 x double> %div ; CHECK-LABEL: d4recp1: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frecpe [[R:v[0-7]\.2d]] ; CHECK: frecps {{v[0-7]\.2d(, v[0-7].2d)?}}, [[R]] ; CHECK: frecps {{v[0-7]\.2d}}, {{v[0-7]\.2d}}, {{v[0-7]\.2d}} diff --git a/test/CodeGen/AArch64/scheduledag-constreg.mir b/test/CodeGen/AArch64/scheduledag-constreg.mir index 6b83dc715e0a..013f59f52a9c 100644 --- a/test/CodeGen/AArch64/scheduledag-constreg.mir +++ b/test/CodeGen/AArch64/scheduledag-constreg.mir @@ -7,16 +7,16 @@ # Check that the instructions are not dependent on each other, even though # they all read/write to the zero register. # CHECK-LABEL: MI Scheduling -# CHECK: SU(0): %WZR = SUBSWri %W1, 0, 0, %NZCV +# CHECK: SU(0): dead %wzr = SUBSWri %w1, 0, 0, implicit-def dead %nzcv # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(1): %W2 = COPY %WZR +# CHECK: SU(1): %w2 = COPY %wzr # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(2): %WZR = SUBSWri %W3, 0, 0, %NZCV +# CHECK: SU(2): dead %wzr = SUBSWri %w3, 0, 0, implicit-def dead %nzcv # CHECK: # succs left : 0 # CHECK-NOT: Successors: -# CHECK: SU(3): %W4 = COPY %WZR +# CHECK: SU(3): %w4 = COPY %wzr # CHECK: # succs left : 0 # CHECK-NOT: Successors: name: func diff --git a/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/test/CodeGen/AArch64/selectcc-to-shiftand.ll index 0d89cdedfa8a..99190633547c 100644 --- a/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -4,7 +4,7 @@ define i32 @neg_sel_constants(i32 %a) { ; CHECK-LABEL: neg_sel_constants: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: and w0, w8, w0, asr #31 ; CHECK-NEXT: ret @@ -18,7 +18,7 @@ define i32 @neg_sel_constants(i32 %a) { define i32 @neg_sel_special_constant(i32 %a) { ; CHECK-LABEL: neg_sel_special_constant: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: lsr w8, w0, #22 ; CHECK-NEXT: and w0, w8, #0x200 ; CHECK-NEXT: ret @@ -32,7 +32,7 @@ define i32 @neg_sel_special_constant(i32 %a) { define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) { ; CHECK-LABEL: neg_sel_variable_and_zero: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w1, w0, asr #31 ; CHECK-NEXT: ret ; @@ -45,7 +45,7 @@ define i32 @neg_sel_variable_and_zero(i32 %a, i32 %b) { define i32 @not_pos_sel_same_variable(i32 %a) { ; CHECK-LABEL: not_pos_sel_same_variable: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: and w0, w0, w0, asr #31 ; CHECK-NEXT: ret ; @@ -60,7 +60,7 @@ define i32 @not_pos_sel_same_variable(i32 %a) { define i32 @pos_sel_constants(i32 %a) { ; CHECK-LABEL: pos_sel_constants: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: bic w0, w8, w0, asr #31 ; CHECK-NEXT: ret @@ -74,7 +74,7 @@ define i32 @pos_sel_constants(i32 %a) { define i32 @pos_sel_special_constant(i32 %a) { ; CHECK-LABEL: pos_sel_special_constant: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, wzr, #0x200 ; CHECK-NEXT: bic w0, w8, w0, lsr #22 ; CHECK-NEXT: ret @@ -88,7 +88,7 @@ define i32 @pos_sel_special_constant(i32 %a) { define i32 @pos_sel_variable_and_zero(i32 %a, i32 %b) { ; CHECK-LABEL: pos_sel_variable_and_zero: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: bic w0, w1, w0, asr #31 ; CHECK-NEXT: ret ; @@ -101,7 +101,7 @@ define i32 @pos_sel_variable_and_zero(i32 %a, i32 %b) { define i32 @not_neg_sel_same_variable(i32 %a) { ; CHECK-LABEL: not_neg_sel_same_variable: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: bic w0, w0, w0, asr #31 ; CHECK-NEXT: ret ; @@ -115,7 +115,7 @@ define i32 @not_neg_sel_same_variable(i32 %a) { ; ret = (x-y) > 0 ? x-y : 0 define i32 @PR31175(i32 %x, i32 %y) { ; CHECK-LABEL: PR31175: -; CHECK: // BB#0: +; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w0, w1 ; CHECK-NEXT: bic w0, w8, w8, asr #31 ; CHECK-NEXT: ret diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll index 9a44b43d14e6..be59f27fa858 100644 --- a/test/CodeGen/AArch64/sibling-call.ll +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -6,7 +6,7 @@ declare void @callee_stack16([8 x i32], i64, i64) define void @caller_to0_from0() nounwind { ; CHECK-LABEL: caller_to0_from0: -; CHECK-NEXT: // BB +; CHECK-NEXT: // %bb. tail call void @callee_stack0() ret void ; CHECK-NEXT: b callee_stack0 @@ -14,7 +14,7 @@ define void @caller_to0_from0() nounwind { define void @caller_to0_from8([8 x i32], i64) nounwind{ ; CHECK-LABEL: caller_to0_from8: -; CHECK-NEXT: // BB +; CHECK-NEXT: // %bb. tail call void @callee_stack0() ret void diff --git a/test/CodeGen/MIR/AArch64/spill-fold.mir b/test/CodeGen/AArch64/spill-fold.mir similarity index 94% rename from test/CodeGen/MIR/AArch64/spill-fold.mir rename to test/CodeGen/AArch64/spill-fold.mir index f812bc710aaf..b9406e54068e 100644 --- a/test/CodeGen/MIR/AArch64/spill-fold.mir +++ b/test/CodeGen/AArch64/spill-fold.mir @@ -16,13 +16,13 @@ body: | bb.0: ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0) undef %0.sub_32 = COPY %wzr - INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp + INLINEASM &nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp %x0 = COPY %0 RET_ReallyLR implicit %x0 ... --- # CHECK-LABEL: name: test_subreg_spill_fold2 -# Similar to test_subreg_spill_fold, but with a vreg0 register class not containing %WZR. +# Similar to test_subreg_spill_fold, but with a %0 register class not containing %WZR. name: test_subreg_spill_fold2 registers: - { id: 0, class: gpr64sp } @@ -30,7 +30,7 @@ body: | bb.0: ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0) undef %0.sub_32 = COPY %wzr - INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp + INLINEASM &nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp %x0 = ADDXri %0, 1, 0 RET_ReallyLR implicit %x0 ... @@ -44,7 +44,7 @@ body: | bb.0: ; CHECK: STRXui %xzr, %stack.0, 0 :: (store 8 into %stack.0) undef %0.ssub = COPY %wzr - INLINEASM $nop, 1, 12, implicit-def dead %d0, 12, implicit-def dead %d1, 12, implicit-def dead %d2, 12, implicit-def dead %d3, 12, implicit-def dead %d4, 12, implicit-def dead %d5, 12, implicit-def dead %d6, 12, implicit-def dead %d7, 12, implicit-def dead %d8, 12, implicit-def dead %d9, 12, implicit-def dead %d10, 12, implicit-def dead %d11, 12, implicit-def dead %d12, 12, implicit-def dead %d13, 12, implicit-def dead %d14, 12, implicit-def dead %d15, 12, implicit-def dead %d16, 12, implicit-def dead %d17, 12, implicit-def dead %d18, 12, implicit-def dead %d19, 12, implicit-def dead %d20, 12, implicit-def dead %d21, 12, implicit-def dead %d22, 12, implicit-def dead %d23, 12, implicit-def dead %d24, 12, implicit-def dead %d25, 12, implicit-def dead %d26, 12, implicit-def dead %d27, 12, implicit-def dead %d28, 12, implicit-def dead %d29, 12, implicit-def dead %d30, 12, implicit-def %d31 + INLINEASM &nop, 1, 12, implicit-def dead %d0, 12, implicit-def dead %d1, 12, implicit-def dead %d2, 12, implicit-def dead %d3, 12, implicit-def dead %d4, 12, implicit-def dead %d5, 12, implicit-def dead %d6, 12, implicit-def dead %d7, 12, implicit-def dead %d8, 12, implicit-def dead %d9, 12, implicit-def dead %d10, 12, implicit-def dead %d11, 12, implicit-def dead %d12, 12, implicit-def dead %d13, 12, implicit-def dead %d14, 12, implicit-def dead %d15, 12, implicit-def dead %d16, 12, implicit-def dead %d17, 12, implicit-def dead %d18, 12, implicit-def dead %d19, 12, implicit-def dead %d20, 12, implicit-def dead %d21, 12, implicit-def dead %d22, 12, implicit-def dead %d23, 12, implicit-def dead %d24, 12, implicit-def dead %d25, 12, implicit-def dead %d26, 12, implicit-def dead %d27, 12, implicit-def dead %d28, 12, implicit-def dead %d29, 12, implicit-def dead %d30, 12, implicit-def %d31 %x0 = COPY %0 RET_ReallyLR implicit %x0 ... @@ -58,7 +58,7 @@ registers: body: | bb.0: %0 = COPY %wzr - INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp + INLINEASM &nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp ; CHECK: undef %1.sub_32:gpr64 = LDRWui %stack.0, 0 :: (load 4 from %stack.0) undef %1.sub_32 = COPY %0 %x0 = COPY %1 @@ -74,7 +74,7 @@ registers: body: | bb.0: %0 = COPY %wzr - INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp + INLINEASM &nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp ; CHECK: undef %1.ssub:fpr64 = LDRSui %stack.0, 0 :: (load 4 from %stack.0) undef %1.ssub = COPY %0 %d0 = COPY %1 diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir index c4f589b5cc49..ddd02d1a86de 100644 --- a/test/CodeGen/AArch64/spill-undef.mir +++ b/test/CodeGen/AArch64/spill-undef.mir @@ -54,10 +54,10 @@ body: | bb.1: %4 = ADRP target-flags(aarch64-page) @g %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) - INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + INLINEASM &nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr bb.2: - INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + INLINEASM &nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr %6 = ADRP target-flags(aarch64-page) @g %w0 = MOVi32imm 42 STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g) diff --git a/test/CodeGen/AArch64/sqrt-fastmath.ll b/test/CodeGen/AArch64/sqrt-fastmath.ll index 4dd0516faf0c..ade9e3d8df32 100644 --- a/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -14,11 +14,11 @@ define float @fsqrt(float %a) #0 { ret float %1 ; FAULT-LABEL: fsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: fsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:s[0-7]]] ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] @@ -32,11 +32,11 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 { ret <2 x float> %1 ; FAULT-LABEL: f2sqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f2sqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] @@ -50,11 +50,11 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 { ret <4 x float> %1 ; FAULT-LABEL: f4sqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f4sqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] @@ -68,12 +68,12 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 { ret <8 x float> %1 ; FAULT-LABEL: f8sqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f8sqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] @@ -92,11 +92,11 @@ define double @dsqrt(double %a) #0 { ret double %1 ; FAULT-LABEL: dsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: dsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:d[0-7]]] ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] @@ -111,11 +111,11 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 { ret <2 x double> %1 ; FAULT-LABEL: d2sqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: d2sqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] @@ -130,12 +130,12 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 { ret <4 x double> %1 ; FAULT-LABEL: d4sqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; FAULT-NEXT: fsqrt ; CHECK-LABEL: d4sqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] @@ -158,11 +158,11 @@ define float @frsqrt(float %a) #0 { ret float %2 ; FAULT-LABEL: frsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: frsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:s[0-7]]] ; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] @@ -177,11 +177,11 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 { ret <2 x float> %2 ; FAULT-LABEL: f2rsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f2rsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] @@ -196,11 +196,11 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 { ret <4 x float> %2 ; FAULT-LABEL: f4rsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f4rsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] @@ -215,12 +215,12 @@ define <8 x float> @f8rsqrt(<8 x float> %a) #0 { ret <8 x float> %2 ; FAULT-LABEL: f8rsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; FAULT-NEXT: fsqrt ; CHECK-LABEL: f8rsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] ; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] ; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] @@ -237,11 +237,11 @@ define double @drsqrt(double %a) #0 { ret double %2 ; FAULT-LABEL: drsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: drsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:d[0-7]]] ; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] @@ -257,11 +257,11 @@ define <2 x double> @d2rsqrt(<2 x double> %a) #0 { ret <2 x double> %2 ; FAULT-LABEL: d2rsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; CHECK-LABEL: d2rsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] @@ -277,12 +277,12 @@ define <4 x double> @d4rsqrt(<4 x double> %a) #0 { ret <4 x double> %2 ; FAULT-LABEL: d4rsqrt: -; FAULT-NEXT: BB#0 +; FAULT-NEXT: %bb.0 ; FAULT-NEXT: fsqrt ; FAULT-NEXT: fsqrt ; CHECK-LABEL: d4rsqrt: -; CHECK-NEXT: BB#0 +; CHECK-NEXT: %bb.0 ; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] ; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] ; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] diff --git a/test/CodeGen/AArch64/swift-return.ll b/test/CodeGen/AArch64/swift-return.ll index 15c19ce36196..b909482dc0bf 100644 --- a/test/CodeGen/AArch64/swift-return.ll +++ b/test/CodeGen/AArch64/swift-return.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 ; CHECK-LABEL: test1 ; CHECK: bl _gen diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll index bcad19e391d0..ae218a7e97ec 100644 --- a/test/CodeGen/AArch64/swifterror.ll +++ b/test/CodeGen/AArch64/swifterror.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -disable-fp-elim -enable-shrink-wrap=false < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-APPLE %s -; RUN: llc -verify-machineinstrs -disable-fp-elim -O0 < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -verify-machineinstrs -disable-fp-elim -O0 -fast-isel < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck --check-prefix=CHECK-O0 %s declare i8* @malloc(i64) declare void @free(i8*) @@ -223,8 +223,8 @@ bb_end: ; parameter. define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) { ; CHECK-APPLE-LABEL: foo_sret: -; CHECK-APPLE: mov [[SRET:x[0-9]+]], x8 ; CHECK-APPLE: orr w0, wzr, #0x10 +; CHECK-APPLE: mov [[SRET:x[0-9]+]], x8 ; CHECK-APPLE: malloc ; CHECK-APPLE: orr [[ID:w[0-9]+]], wzr, #0x1 ; CHECK-APPLE: strb [[ID]], [x0, #8] @@ -406,7 +406,7 @@ entry: ret float %0 } -; CHECK-APPLE-LABEL: swifterror_clobber +; CHECK-APPLE-LABEL: swifterror_clobber: ; CHECK-APPLE: mov [[REG:x[0-9]+]], x21 ; CHECK-APPLE: nop ; CHECK-APPLE: mov x21, [[REG]] @@ -415,7 +415,7 @@ define swiftcc void @swifterror_clobber(%swift_error** nocapture swifterror %err ret void } -; CHECK-APPLE-LABEL: swifterror_reg_clobber +; CHECK-APPLE-LABEL: swifterror_reg_clobber: ; CHECK-APPLE: stp {{.*}}x21 ; CHECK-APPLE: nop ; CHECK-APPLE: ldp {{.*}}x21 @@ -423,7 +423,7 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { call void asm sideeffect "nop", "~{x21}"() ret void } -; CHECK-APPLE-LABEL: params_in_reg +; CHECK-APPLE-LABEL: params_in_reg: ; Save callee saved registers and swifterror since it will be clobbered by the first call to params_in_reg2. ; CHECK-APPLE: stp x21, x28, [sp ; CHECK-APPLE: stp x27, x26, [sp @@ -431,16 +431,15 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { ; CHECK-APPLE: stp x23, x22, [sp ; CHECK-APPLE: stp x20, x19, [sp ; CHECK-APPLE: stp x29, x30, [sp -; CHECK-APPLE: str x20, [sp +; CHECK-APPLE: str x7, [sp ; Store argument registers. -; CHECK-APPLE: mov x23, x7 -; CHECK-APPLE: mov x24, x6 -; CHECK-APPLE: mov x25, x5 -; CHECK-APPLE: mov x26, x4 -; CHECK-APPLE: mov x27, x3 -; CHECK-APPLE: mov x28, x2 -; CHECK-APPLE: mov x19, x1 -; CHECK-APPLE: mov x22, x0 +; CHECK-APPLE: mov x23, x6 +; CHECK-APPLE: mov x24, x5 +; CHECK-APPLE: mov x25, x4 +; CHECK-APPLE: mov x26, x3 +; CHECK-APPLE: mov x27, x2 +; CHECK-APPLE: mov x28, x1 +; CHECK-APPLE: mov x19, x0 ; Setup call. ; CHECK-APPLE: orr w0, wzr, #0x1 ; CHECK-APPLE: orr w1, wzr, #0x2 @@ -450,20 +449,20 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) { ; CHECK-APPLE: orr w5, wzr, #0x6 ; CHECK-APPLE: orr w6, wzr, #0x7 ; CHECK-APPLE: orr w7, wzr, #0x8 +; CHECK-APPLE: mov x22, x20 ; CHECK-APPLE: mov x20, xzr ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl _params_in_reg2 ; Restore original arguments for next call. -; CHECK-APPLE: mov x0, x22 -; CHECK-APPLE: mov x1, x19 -; CHECK-APPLE: mov x2, x28 -; CHECK-APPLE: mov x3, x27 -; CHECK-APPLE: mov x4, x26 -; CHECK-APPLE: mov x5, x25 -; CHECK-APPLE: mov x6, x24 -; CHECK-APPLE: mov x7, x23 +; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x1, x28 +; CHECK-APPLE: mov x2, x27 +; CHECK-APPLE: mov x3, x26 +; CHECK-APPLE: mov x4, x25 +; CHECK-APPLE: mov x5, x24 ; Restore original swiftself argument and swifterror %err. -; CHECK-APPLE: ldp x20, x21, [sp +; CHECK-APPLE: ldp x7, x21, [sp +; CHECK-APPLE: mov x20, x22 ; CHECK-APPLE: bl _params_in_reg2 ; Restore calle save registers but don't clober swifterror x21. ; CHECK-APPLE-NOT: x21 @@ -489,9 +488,9 @@ define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* s } declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) -; CHECK-APPLE-LABEL: params_and_return_in_reg +; CHECK-APPLE-LABEL: params_and_return_in_reg: ; Store callee saved registers. -; CHECK-APPLE: stp x20, x28, [sp, #24 +; CHECK-APPLE: stp x7, x28, [sp, #24 ; CHECK-APPLE: stp x27, x26, [sp ; CHECK-APPLE: stp x25, x24, [sp ; CHECK-APPLE: stp x23, x22, [sp @@ -499,14 +498,13 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: stp x29, x30, [sp ; Save original arguments. ; CHECK-APPLE: mov x23, x21 -; CHECK-APPLE: str x7, [sp, #16] -; CHECK-APPLE: mov x24, x6 -; CHECK-APPLE: mov x25, x5 -; CHECK-APPLE: mov x26, x4 -; CHECK-APPLE: mov x27, x3 -; CHECK-APPLE: mov x28, x2 -; CHECK-APPLE: mov x19, x1 -; CHECK-APPLE: mov x22, x0 +; CHECK-APPLE: str x6, [sp, #16] +; CHECK-APPLE: mov x24, x5 +; CHECK-APPLE: mov x25, x4 +; CHECK-APPLE: mov x26, x3 +; CHECK-APPLE: mov x27, x2 +; CHECK-APPLE: mov x28, x1 +; CHECK-APPLE: mov x19, x0 ; Setup call arguments. ; CHECK-APPLE: orr w0, wzr, #0x1 ; CHECK-APPLE: orr w1, wzr, #0x2 @@ -516,24 +514,26 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: orr w5, wzr, #0x6 ; CHECK-APPLE: orr w6, wzr, #0x7 ; CHECK-APPLE: orr w7, wzr, #0x8 +; CHECK-APPLE: mov x22, x20 ; CHECK-APPLE: mov x20, xzr ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl _params_in_reg2 ; Store swifterror %error_ptr_ref. ; CHECK-APPLE: str x21, [sp, #8] ; Setup call arguments from original arguments. -; CHECK-APPLE: mov x0, x22 -; CHECK-APPLE: mov x1, x19 -; CHECK-APPLE: mov x2, x28 -; CHECK-APPLE: mov x3, x27 -; CHECK-APPLE: mov x4, x26 -; CHECK-APPLE: mov x5, x25 -; CHECK-APPLE: mov x6, x24 -; CHECK-APPLE: ldp x7, x20, [sp, #16] +; CHECK-APPLE: mov x0, x19 +; CHECK-APPLE: mov x1, x28 +; CHECK-APPLE: mov x2, x27 +; CHECK-APPLE: mov x3, x26 +; CHECK-APPLE: mov x4, x25 +; CHECK-APPLE: mov x5, x24 +; CHECK-APPLE: ldp x6, x7, [sp, #16] +; CHECK-APPLE: mov x20, x22 ; CHECK-APPLE: mov x21, x23 ; CHECK-APPLE: bl _params_and_return_in_reg2 +; Save swifterror %err. +; CHECK-APPLE: str x0, [sp, #24] ; Store return values. -; CHECK-APPLE: mov x19, x0 ; CHECK-APPLE: mov x22, x1 ; CHECK-APPLE: mov x24, x2 ; CHECK-APPLE: mov x25, x3 @@ -541,8 +541,6 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: mov x27, x5 ; CHECK-APPLE: mov x28, x6 ; CHECK-APPLE: mov x23, x7 -; Save swifterror %err. -; CHECK-APPLE: str x21, [sp, #24] ; Setup call. ; CHECK-APPLE: orr w0, wzr, #0x1 ; CHECK-APPLE: orr w1, wzr, #0x2 @@ -552,12 +550,12 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: orr w5, wzr, #0x6 ; CHECK-APPLE: orr w6, wzr, #0x7 ; CHECK-APPLE: orr w7, wzr, #0x8 +; CHECK-APPLE: mov x19, x21 ; CHECK-APPLE: mov x20, xzr ; ... setup call with swiferror %error_ptr_ref. ; CHECK-APPLE: ldr x21, [sp, #8] ; CHECK-APPLE: bl _params_in_reg2 ; Restore return values for return from this function. -; CHECK-APPLE: mov x0, x19 ; CHECK-APPLE: mov x1, x22 ; CHECK-APPLE: mov x2, x24 ; CHECK-APPLE: mov x3, x25 @@ -565,13 +563,14 @@ declare swiftcc void @params_in_reg2(i64, i64, i64, i64, i64, i64, i64, i64, i8* ; CHECK-APPLE: mov x5, x27 ; CHECK-APPLE: mov x6, x28 ; CHECK-APPLE: mov x7, x23 +; CHECK-APPLE: mov x21, x19 ; Restore swifterror %err and callee save registers. -; CHECK-APPLE: ldp x21, x28, [sp, #24 ; CHECK-APPLE: ldp x29, x30, [sp ; CHECK-APPLE: ldp x20, x19, [sp ; CHECK-APPLE: ldp x23, x22, [sp ; CHECK-APPLE: ldp x25, x24, [sp ; CHECK-APPLE: ldp x27, x26, [sp +; CHECK-APPLE: ldp x0, x28, [sp, #24 ; CHECK-APPLE: ret define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, i8* swiftself, %swift_error** nocapture swifterror %err) { %error_ptr_ref = alloca swifterror %swift_error*, align 8 @@ -601,14 +600,14 @@ entry: declare swiftcc void @foo2(%swift_error** swifterror) ; Make sure we properly assign registers during fast-isel. -; CHECK-O0-LABEL: testAssign +; CHECK-O0-LABEL: testAssign: ; CHECK-O0: mov [[TMP:x.*]], xzr ; CHECK-O0: mov x21, [[TMP]] ; CHECK-O0: bl _foo2 ; CHECK-O0: str x21, [s[[STK:.*]]] ; CHECK-O0: ldr x0, [s[[STK]]] -; CHECK-APPLE-LABEL: testAssign +; CHECK-APPLE-LABEL: testAssign: ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl _foo2 ; CHECK-APPLE: mov x0, x21 diff --git a/test/CodeGen/AArch64/swiftself.ll b/test/CodeGen/AArch64/swiftself.ll index 33a49198430e..f19c852cb9b1 100644 --- a/test/CodeGen/AArch64/swiftself.ll +++ b/test/CodeGen/AArch64/swiftself.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-unknown-linux-gnu -o - %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s ; Parameter with swiftself should be allocated to x20. diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll index fa5d8b943b6b..ab63413bd3f1 100644 --- a/test/CodeGen/AArch64/tail-call.ll +++ b/test/CodeGen/AArch64/tail-call.ll @@ -7,7 +7,7 @@ declare extern_weak fastcc void @callee_weak() define fastcc void @caller_to0_from0() nounwind { ; CHECK-LABEL: caller_to0_from0: -; CHECK-NEXT: // BB +; CHECK-NEXT: // %bb. tail call fastcc void @callee_stack0() ret void diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll index b60958b5a25d..2ce4f44d0f94 100644 --- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll +++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll @@ -36,9 +36,9 @@ define void @test_tailcall_explicit_sret_alloca_unused() #0 { ; CHECK-LABEL: _test_tailcall_explicit_sret_alloca_dummyusers: ; CHECK: ldr [[PTRLOAD1:q[0-9]+]], [x0] -; CHECK: str [[PTRLOAD1]], [sp] ; CHECK: mov x8, sp -; CHECK-NEXT: bl _test_explicit_sret +; CHECK: str [[PTRLOAD1]], [sp] +; CHECK: bl _test_explicit_sret ; CHECK: ret define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 { %l = alloca i1024, align 8 @@ -75,10 +75,10 @@ define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 { } ; CHECK-LABEL: _test_indirect_tailcall_explicit_sret_nosret_arg: -; CHECK-DAG: mov x[[CALLERX8NUM:[0-9]+]], x8 -; CHECK-DAG: mov [[FPTR:x[0-9]+]], x0 +; CHECK: mov [[FPTR:x[0-9]+]], x0 ; CHECK: mov x0, sp -; CHECK-NEXT: blr [[FPTR]] +; CHECK: mov x[[CALLERX8NUM:[0-9]+]], x8 +; CHECK: blr [[FPTR]] ; CHECK: ldr [[CALLERSRET1:q[0-9]+]], [sp] ; CHECK: str [[CALLERSRET1:q[0-9]+]], [x[[CALLERX8NUM]]] ; CHECK: ret diff --git a/test/CodeGen/AArch64/tailcall-fastisel.ll b/test/CodeGen/AArch64/tailcall-fastisel.ll index 3ba639183161..ea173de274ed 100644 --- a/test/CodeGen/AArch64/tailcall-fastisel.ll +++ b/test/CodeGen/AArch64/tailcall-fastisel.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 -fast-isel | FileCheck %s ; CHECK: b _foo0 diff --git a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll index b970fb124151..c780d15b58db 100644 --- a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: b memcpy define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -12,7 +12,7 @@ entry: ; CHECK: b memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -20,12 +20,12 @@ entry: ; CHECK: b memset define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/tailcall-string-rvo.ll b/test/CodeGen/AArch64/tailcall-string-rvo.ll index bdc09235afd9..d9d2180b5ef0 100644 --- a/test/CodeGen/AArch64/tailcall-string-rvo.ll +++ b/test/CodeGen/AArch64/tailcall-string-rvo.ll @@ -32,7 +32,7 @@ bb: %tmp1 = bitcast %class.basic_string.11.42.73* %arg to %union.anon.8.39.70** store %union.anon.8.39.70* %tmp, %union.anon.8.39.70** %tmp1, align 8 %tmp2 = bitcast %union.anon.8.39.70* %tmp to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i1 false) %tmp3 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 1 store i64 13, i64* %tmp3, align 8 %tmp4 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 2, i32 1, i64 5 @@ -42,6 +42,6 @@ bb: } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 attributes #0 = { argmemonly nounwind } diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll index 7e76dac214a1..b926594e4504 100644 --- a/test/CodeGen/AArch64/tailcall_misched_graph.ll +++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -26,19 +26,19 @@ declare void @callee2(i8*, i8*, i8*, i8*, i8*, ; CHECK: fi#-2: {{.*}} fixed, at location [SP+8] ; CHECK: fi#-1: {{.*}} fixed, at location [SP] -; CHECK: [[VRA:%vreg.*]] = LDRXui -; CHECK: [[VRB:%vreg.*]] = LDRXui -; CHECK: STRXui %vreg{{.*}}, -; CHECK: STRXui [[VRB]], +; CHECK: [[VRA:%.*]]:gpr64 = LDRXui %fixed-stack.3 +; CHECK: [[VRB:%.*]]:gpr64 = LDRXui %fixed-stack.2 +; CHECK: STRXui %{{.*}}, %fixed-stack.0 +; CHECK: STRXui [[VRB]], %fixed-stack.1 ; Make sure that there is an dependence edge between fi#-2 and fi#-4. ; Without this edge the scheduler would be free to move the store accross the load. -; CHECK: SU({{.*}}): [[VRB]] = LDRXui +; CHECK: SU({{.*}}): [[VRB]]:gpr64 = LDRXui %fixed-stack.2 ; CHECK-NOT: SU ; CHECK: Successors: ; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0 ; CHECK: SU([[DEPSTOREA:.*]]): Ord Latency=0 -; CHECK: SU([[DEPSTOREA]]): STRXui %vreg{{.*}}, -; CHECK: SU([[DEPSTOREB]]): STRXui %vreg{{.*}}, +; CHECK: SU([[DEPSTOREA]]): STRXui %{{.*}}, %fixed-stack.0 +; CHECK: SU([[DEPSTOREB]]): STRXui %{{.*}}, %fixed-stack.1 diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll index 6dcd7c234dc6..ec3bf6dca57f 100644 --- a/test/CodeGen/AMDGPU/add.ll +++ b/test/CodeGen/AMDGPU/add.ll @@ -1,14 +1,15 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -;FUNC-LABEL: {{^}}test1: -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}s_add_i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} -;SI: v_mov_b32_e32 v[[REG]], s[[REG]] -;SI: buffer_store_dword v[[REG]], -define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} +; GCN: v_mov_b32_e32 v[[REG]], s[[REG]] +; GCN: buffer_store_dword v[[REG]], +define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in %b = load i32, i32 addrspace(1)* %b_ptr @@ -17,14 +18,13 @@ define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ret void } -;FUNC-LABEL: {{^}}test2: -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; FUNC-LABEL: {{^}}s_add_v2i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} - -define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr @@ -33,18 +33,17 @@ define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspa ret void } -;FUNC-LABEL: {{^}}test4: -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} -;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} - -define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +; FUNC-LABEL: {{^}}s_add_v4i32: +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} +define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr @@ -53,7 +52,7 @@ define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspa ret void } -; FUNC-LABEL: {{^}}test8: +; FUNC-LABEL: {{^}}s_add_v8i32: ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT @@ -63,22 +62,22 @@ define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspa ; EG: ADD_INT ; EG: ADD_INT -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -define amdgpu_kernel void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { entry: %0 = add <8 x i32> %a, %b store <8 x i32> %0, <8 x i32> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}test16: +; FUNC-LABEL: {{^}}s_add_v16i32: ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT @@ -96,32 +95,62 @@ entry: ; EG: ADD_INT ; EG: ADD_INT -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -; SI: s_add_i32 -define amdgpu_kernel void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +; GCN: s_add_i32 +define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { entry: %0 = add <16 x i32> %a, %b store <16 x i32> %0, <16 x i32> addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}v_add_i32: +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[B]], [[A]] +; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] +define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %b = load volatile i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_add_imm_i32: +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] +define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 + %a = load volatile i32, i32 addrspace(1)* %gep + %result = add i32 %a, 123 + store i32 %result, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}add64: -; SI: s_add_u32 -; SI: s_addc_u32 +; GCN: s_add_u32 +; GCN: s_addc_u32 ; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] ; EG-DAG: ADD_INT {{[* ]*}} @@ -131,8 +160,8 @@ entry: ; EG-NOT: SUB define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: - %0 = add i64 %a, %b - store i64 %0, i64 addrspace(1)* %out + %add = add i64 %a, %b + store i64 %add, i64 addrspace(1)* %out ret void } @@ -142,7 +171,7 @@ entry: ; to a VGPR before doing the add. ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: -; SI-NOT: v_addc_u32_e32 s +; GCN-NOT: v_addc_u32_e32 s ; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] ; EG-DAG: ADD_INT {{[* ]*}} @@ -160,8 +189,8 @@ entry: ; Test i64 add inside a branch. ; FUNC-LABEL: {{^}}add64_in_branch: -; SI: s_add_u32 -; SI: s_addc_u32 +; GCN: s_add_u32 +; GCN: s_addc_u32 ; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] ; EG-DAG: ADD_INT {{[* ]*}} @@ -187,3 +216,8 @@ endif: store i64 %3, i64 addrspace(1)* %out ret void } + +declare i32 @llvm.r600.read.tidig.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll index a89c1c21493b..a6c2901bd422 100644 --- a/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/test/CodeGen/AMDGPU/add.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: Need to handle non-uniform case for function below (load without gep). diff --git a/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 8cabc7dae133..b40fcb3e4920 100644 --- a/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s -declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 @@ -68,7 +68,7 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrsp ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { - call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false) + call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) ret void } diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll index 901b197b1b8f..27426fb3aebc 100644 --- a/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/test/CodeGen/AMDGPU/addrspacecast.ll @@ -17,7 +17,7 @@ ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} -; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16) +; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_SHARED_BASE]] @@ -57,7 +57,7 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} -; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(15, 0, 16) +; GFX9-DAG: s_getreg_b32 [[SSRC_PRIVATE:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) ; GFX9-DAG: s_lshl_b32 [[SSRC_PRIVATE_BASE:s[0-9]+]], [[SSRC_PRIVATE]], 16 ; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[SSRC_PRIVATE_BASE]] @@ -168,7 +168,7 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: ; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 ; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] -; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(15, 16, 16) +; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 16, 16) ; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 ; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]] diff --git a/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll new file mode 100644 index 000000000000..e967723384bf --- /dev/null +++ b/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain: +; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 +; GCN-NOT: v1 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0 +define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 { +main_body: + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = bitcast <2 x float> %tmp to <2 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> + %tmp4 = extractelement <4 x float> %tmp3, i32 0 + store volatile float %tmp4, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain: +; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 +; GCN-NOT: v1 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0 +define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 { +main_body: + %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = bitcast <2 x float> %tmp to <2 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> + %tmp4 = extractelement <4 x float> %tmp3, i32 1 + store volatile float %tmp4, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain: +; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2 +; GCN-NOT: v1 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0 +define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 { +main_body: + %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = bitcast <2 x float> %tmp to <2 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> + %tmp4 = extractelement <4 x float> %tmp3, i32 0 + store volatile float %tmp4, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain: +; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1 +; GCN-NOT: v1 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0 +define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 { +main_body: + %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = bitcast <2 x float> %tmp to <2 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> + %tmp4 = extractelement <4 x float> %tmp3, i32 1 + store volatile float %tmp4, float addrspace(1)* undef + ret void +} + +define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 { +main_body: + %tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false) + %tmp1 = bitcast <4 x float> %tmp to <4 x i32> + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> + %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> + %tmp4 = extractelement <4 x float> %tmp3, i32 0 + store volatile float %tmp4, float addrspace(1)* undef + ret void +} + + +declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } diff --git a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 228d3c7d4306..71c4c83c28f9 100644 --- a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -251,7 +251,8 @@ entry: ; R600: MOVA_INT -; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: +; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: +; SI-PROMOTE-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4 ; encoding: [0x04,0x00,0x60,0xe0 ; SI-ALLOCA-DAG: buffer_store_byte v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:5 ; encoding: [0x05,0x00,0x60,0xe0 diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll index 073d71ebad05..77bb582fee92 100644 --- a/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll b/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll index 4f9526ddab55..836ba764a5b6 100644 --- a/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll +++ b/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll @@ -1,13 +1,17 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SICI,SICIVI,GCN %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SICI,CIVI,SICIVI,GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,CIVI,SICIVI,GFX89,GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX89,GCN %s -; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: -; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc -; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c -; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 -; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 +; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset: +; GFX9-NOT: m0 +; SICIVI-DAG: s_mov_b32 m0 + +; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SICI-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GFX89-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 +; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] ; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 @@ -20,18 +24,21 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* % ret void } -; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: +; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset: +; GFX9-NOT: m0 +; SICIVI-DAG: s_mov_b32 m0 + ; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SICI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; VI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c -; VI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34 ; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 ; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]] ; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]] ; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 -; GCN: buffer_store_dwordx2 [[RESULT]], +; GCN: [[RESULT]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind { %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 @@ -41,9 +48,11 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* % ret void } -; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset +; GCN-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset +; GFX9-NOT: m0 ; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GFX9: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind { %sub = sub i32 %a, %b @@ -55,11 +64,15 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspac ret void } -; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset: -; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa -; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 -; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28 +; GCN-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset: +; GFX9-NOT: m0 +; SICIVI-DAG: s_mov_b32 m0 + + +; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; GFX89-DAG: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28 ; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]] @@ -72,11 +85,14 @@ define amdgpu_kernel void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* ret void } -; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset: -; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb -; VI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 -; VI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset: +; GFX9-NOT: m0 +; SICIVI-DAG: s_mov_b32 m0 + +; SICI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; SICI-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; GFX89-DAG: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c ; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7 ; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] diff --git a/test/CodeGen/AMDGPU/atomic_load_add.ll b/test/CodeGen/AMDGPU/atomic_load_add.ll index e0fe6641fa11..6b18fe0200d6 100644 --- a/test/CodeGen/AMDGPU/atomic_load_add.ll +++ b/test/CodeGen/AMDGPU/atomic_load_add.ll @@ -1,18 +1,24 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=R600,FUNC %s ; FUNC-LABEL: {{^}}atomic_add_local: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; R600: LDS_ADD * -; SI: ds_add_u32 +; GCN: ds_add_u32 define amdgpu_kernel void @atomic_add_local(i32 addrspace(3)* %local) { %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_local_const_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_ADD * -; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 define amdgpu_kernel void @atomic_add_local_const_offset(i32 addrspace(3)* %local) { %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4 %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst @@ -20,8 +26,11 @@ define amdgpu_kernel void @atomic_add_local_const_offset(i32 addrspace(3)* %loca } ; FUNC-LABEL: {{^}}atomic_add_ret_local: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_ADD_RET * -; SI: ds_add_rtn_u32 +; GCN: ds_add_rtn_u32 define amdgpu_kernel void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst store i32 %val, i32 addrspace(1)* %out @@ -29,8 +38,11 @@ define amdgpu_kernel void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addr } ; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_ADD_RET * -; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 +; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 define amdgpu_kernel void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5 %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst diff --git a/test/CodeGen/AMDGPU/atomic_load_sub.ll b/test/CodeGen/AMDGPU/atomic_load_sub.ll index a0275893919a..1d6fe169e109 100644 --- a/test/CodeGen/AMDGPU/atomic_load_sub.ll +++ b/test/CodeGen/AMDGPU/atomic_load_sub.ll @@ -1,18 +1,25 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600,FUNC %s ; FUNC-LABEL: {{^}}atomic_sub_local: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_SUB * -; SI: ds_sub_u32 +; GCN: ds_sub_u32 define amdgpu_kernel void @atomic_sub_local(i32 addrspace(3)* %local) { %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_local_const_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_SUB * -; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 +; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 define amdgpu_kernel void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) { %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4 %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst @@ -20,8 +27,11 @@ define amdgpu_kernel void @atomic_sub_local_const_offset(i32 addrspace(3)* %loca } ; FUNC-LABEL: {{^}}atomic_sub_ret_local: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 +; GCN: ds_sub_rtn_u32 define amdgpu_kernel void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst store i32 %val, i32 addrspace(1)* %out @@ -29,8 +39,11 @@ define amdgpu_kernel void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addr } ; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; R600: LDS_SUB_RET * -; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 +; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20 define amdgpu_kernel void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5 %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll index 9edf439b5863..ba632f97cda6 100644 --- a/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-NEXT: s_cbranch_scc1 [[BB3:BB[0-9]+_[0-9]+]] -; GCN-NEXT: ; BB#1: ; %bb2 +; GCN-NEXT: ; %bb.1: ; %bb2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: v_nop_e64 ; GCN-NEXT: v_nop_e64 @@ -100,7 +100,8 @@ bb3: ; GCN-LABEL: {{^}}uniform_conditional_min_long_forward_vcnd_branch: ; GCN: s_load_dword [[CND:s[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[V_CND:v[0-9]+]], [[CND]] -; GCN-DAG: v_cmp_eq_f32_e64 vcc, [[CND]], 0 +; GCN-DAG: v_cmp_eq_f32_e64 [[UNMASKED:s\[[0-9]+:[0-9]+\]]], [[CND]], 0 +; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]] ; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]] ; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0 @@ -275,7 +276,7 @@ bb4: } ; GCN-LABEL: {{^}}uniform_unconditional_min_long_backward_branch: -; GCN-NEXT: ; BB#0: ; %entry +; GCN-NEXT: ; %bb.0: ; %entry ; GCN-NEXT: [[LOOP:BB[0-9]_[0-9]+]]: ; %loop ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -311,7 +312,7 @@ loop: ; branch from %bb0 to %bb2 ; GCN-LABEL: {{^}}expand_requires_expand: -; GCN-NEXT: ; BB#0: ; %bb0 +; GCN-NEXT: ; %bb.0: ; %bb0 ; GCN: s_load_dword ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 0{{$}} ; GCN-NEXT: s_cbranch_scc0 [[BB1:BB[0-9]+_[0-9]+]] @@ -398,7 +399,7 @@ bb3: ; GCN: s_cmp_lg_u32 ; GCN: s_cbranch_scc1 [[ENDIF]] -; GCN-NEXT: ; BB#2: ; %if_uniform +; GCN-NEXT: ; %bb.2: ; %if_uniform ; GCN: buffer_store_dword ; GCN-NEXT: [[ENDIF]]: ; %endif @@ -500,8 +501,7 @@ ret: ; GCN: s_setpc_b64 ; GCN: [[LONG_BR_DEST0]] -; GCN: v_cmp_ne_u32_e32 -; GCN-NEXT: s_cbranch_vccz +; GCN: s_cbranch_vccz ; GCN: s_setpc_b64 ; GCN: s_endpgm @@ -520,6 +520,11 @@ bb9: ; preds = %bb br i1 %tmp12, label %bb19, label %bb14 bb13: ; preds = %bb + call void asm sideeffect + "v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64", ""() #0 br i1 %tmp6, label %bb19, label %bb14 bb14: ; preds = %bb13, %bb9 diff --git a/test/CodeGen/AMDGPU/byval-frame-setup.ll b/test/CodeGen/AMDGPU/byval-frame-setup.ll index 103b8c3a3039..732142c12083 100644 --- a/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s %struct.ByValStruct = type { [4 x i32] } diff --git a/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 0a9468fcbc9d..58e549ef04a6 100644 --- a/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s ; Make sure to run a GPU with the SGPR allocation bug. diff --git a/test/CodeGen/AMDGPU/call-preserved-registers.ll b/test/CodeGen/AMDGPU/call-preserved-registers.ll index 98a4f1320849..093dd68d9f39 100644 --- a/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare void @external_void_func_void() #0 diff --git a/test/CodeGen/AMDGPU/callee-frame-setup.ll b/test/CodeGen/AMDGPU/callee-frame-setup.ll index 9e01267150e5..88d165144f9d 100644 --- a/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}callee_no_stack: -; GCN: ; BB#0: +; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_no_stack() #0 { @@ -10,7 +10,7 @@ define void @callee_no_stack() #0 { } ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim: -; GCN: ; BB#0: +; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_no_stack_no_fp_elim() #1 { @@ -20,7 +20,7 @@ define void @callee_no_stack_no_fp_elim() #1 { ; Requires frame pointer for access to local regular object. ; GCN-LABEL: {{^}}callee_with_stack: -; GCN: ; BB#0: +; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} @@ -34,7 +34,7 @@ define void @callee_with_stack() #0 { } ; GCN-LABEL: {{^}}callee_with_stack_and_call: -; GCN: ; BB#0: +; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt ; GCN: s_mov_b32 s5, s32 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 diff --git a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 1518c0e503eb..8082a095a084 100644 --- a/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}use_dispatch_ptr: ; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0 diff --git a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 9b3bb69dc9ce..f5e68d188feb 100644 --- a/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt diff --git a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index 697f26b83a4d..1e04544d2cbc 100644 --- a/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -95,14 +95,14 @@ for.body: ; GCN-LABEL: {{^}}loop_arg_0: ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} -; GCN: v_cmp_eq_u32_e32 vcc, 1, +; GCN: v_cmp_eq_u32{{[^,]*}}, 1, ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]] ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80 ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 4 ; GCN: s_cbranch_vccnz [[LOOPBB]] -; GCN-NEXT: ; BB#2 +; GCN-NEXT: ; %bb.2 ; GCN-NEXT: s_endpgm define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n, i1 %cond) nounwind { entry: diff --git a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 3eef06950a48..071bcbcf81bf 100644 --- a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -13,7 +13,7 @@ ; VGPR: workitem_private_segment_byte_size = 12{{$}} -; GCN: {{^}}; BB#0: +; GCN: {{^}}; %bb.0: ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] @@ -89,9 +89,9 @@ endif: } ; GCN-LABEL: {{^}}divergent_loop: -; VGPR: workitem_private_segment_byte_size = 16{{$}} +; VGPR: workitem_private_segment_byte_size = 12{{$}} -; GCN: {{^}}; BB#0: +; GCN: {{^}}; %bb.0: ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] @@ -123,10 +123,9 @@ endif: ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] -; GCN: v_cmp_ne_u32_e32 vcc, -; GCN: s_and_b64 vcc, exec, vcc +; GCN: s_cmp_lg_u32 s{{[0-9]+}}, s{{[0-9]+}} ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN-NEXT: s_cbranch_vccnz [[LOOP]] +; GCN-NEXT: s_cbranch_scc1 [[LOOP]] ; GCN: [[END]]: @@ -168,7 +167,7 @@ end: } ; GCN-LABEL: {{^}}divergent_if_else_endif: -; GCN: {{^}}; BB#0: +; GCN: {{^}}; %bb.0: ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] diff --git a/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/test/CodeGen/AMDGPU/convergent-inlineasm.ll index 0074a41e44cf..80907bf1c1be 100644 --- a/test/CodeGen/AMDGPU/convergent-inlineasm.ll +++ b/test/CodeGen/AMDGPU/convergent-inlineasm.ll @@ -2,7 +2,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}convergent_inlineasm: -; GCN: BB#0: +; GCN: %bb.0: ; GCN: v_cmp_ne_u32_e64 ; GCN: ; mask branch ; GCN: BB{{[0-9]+_[0-9]+}}: diff --git a/test/CodeGen/AMDGPU/debug-value.ll b/test/CodeGen/AMDGPU/debug-value.ll new file mode 100644 index 000000000000..30c134233b53 --- /dev/null +++ b/test/CodeGen/AMDGPU/debug-value.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs < %s | FileCheck %s + +%struct.wombat = type { [4 x i32], [4 x i32], [4 x i32] } + +define amdgpu_kernel void @wobble(i8 addrspace(1)* nocapture readonly %arg) #0 !dbg !4 { +bb: + %tmp = load i32, i32 addrspace(1)* undef, align 4 + %tmp1 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> + %tmp4 = call float @barney() #2 + %tmp5 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 + %tmp6 = bitcast i8 addrspace(1)* %tmp5 to <2 x float> addrspace(1)* + %tmp7 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 0 + %tmp8 = bitcast i8 addrspace(1)* %tmp7 to %struct.wombat addrspace(1)* + %tmp9 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 0 + %tmp10 = load i32, i32 addrspace(1)* %tmp9, align 4 + %tmp11 = sext i32 %tmp10 to i64 + %tmp12 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp11 + %tmp13 = bitcast <2 x float> addrspace(1)* %tmp12 to i64 addrspace(1)* + %tmp14 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 undef + %tmp15 = bitcast i8 addrspace(1)* %tmp14 to <4 x float> addrspace(1)* + %tmp16 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp15, i64 undef + %tmp17 = load <4 x float>, <4 x float> addrspace(1)* %tmp16, align 16 + %tmp18 = fsub <4 x float> undef, %tmp17 + %tmp19 = fadd float undef, 0.000000e+00 + %tmp20 = fcmp oeq float %tmp19, 0.000000e+00 + br i1 %tmp20, label %bb21, label %bb25 + +bb21: ; preds = %bb + %tmp22 = fmul <4 x float> %tmp18, undef + %tmp23 = fadd <4 x float> undef, %tmp22 + %tmp24 = fmul <4 x float> undef, undef + br label %bb28 + +bb25: ; preds = %bb + %tmp26 = insertelement <4 x float> undef, float 0.000000e+00, i32 1 + %tmp27 = insertelement <4 x float> %tmp26, float undef, i32 2 + br label %bb28 + +bb28: ; preds = %bb25, %bb21 + %tmp29 = phi <4 x float> [ %tmp27, %bb25 ], [ %tmp24, %bb21 ] + store <4 x float> %tmp29, <4 x float> addrspace(5)* undef, align 16 + %tmp30 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(1)* %tmp8, i64 %tmp2, i32 2, i64 2 + %tmp31 = load i32, i32 addrspace(1)* %tmp30, align 4 + %tmp32 = sext i32 %tmp31 to i64 + %tmp33 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp6, i64 %tmp32 + %tmp34 = bitcast <2 x float> addrspace(1)* %tmp33 to i64 addrspace(1)* + %tmp35 = load i64, i64 addrspace(1)* %tmp34, align 8 + %tmp36 = load i32, i32 addrspace(1)* undef, align 4 + %tmp37 = sext i32 %tmp36 to i64 + %tmp38 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* null, i64 %tmp37 + %tmp39 = load <4 x float>, <4 x float> addrspace(1)* %tmp38, align 16 + %tmp40 = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 + %tmp41 = fsub <4 x float> zeroinitializer, %tmp40 + %tmp42 = fsub <4 x float> %tmp39, %tmp40 + %tmp43 = extractelement <4 x float> %tmp40, i32 1 + %tmp44 = fsub float %tmp43, undef + %tmp45 = fadd float undef, undef + %tmp46 = fdiv float %tmp44, %tmp45 + %tmp47 = insertelement <4 x float> undef, float %tmp46, i32 0 + %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer + %tmp49 = fsub <4 x float> %tmp48, %tmp40 + %tmp50 = extractelement <4 x float> %tmp41, i32 1 + %tmp51 = extractelement <4 x float> %tmp42, i32 2 + %tmp52 = fmul float undef, undef + %tmp53 = fadd float %tmp52, undef + %tmp54 = fadd float %tmp51, %tmp53 + %tmp55 = extractelement <4 x float> %tmp49, i32 1 + %tmp56 = fmul float %tmp55, %tmp50 + %tmp57 = fmul float %tmp54, %tmp56 + %tmp58 = fdiv float %tmp57, 0.000000e+00 + ; CHECK: ;DEBUG_VALUE: foo:var <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] + call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5 + %tmp59 = bitcast i64 %tmp35 to <2 x float> + %tmp60 = insertelement <2 x float> undef, float %tmp58, i32 0 + %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer + %tmp62 = fmul <2 x float> %tmp61, undef + %tmp63 = fsub <2 x float> %tmp62, %tmp59 + %tmp64 = extractelement <2 x float> %tmp63, i64 0 + call void @eggs(float %tmp64) #2 + store <2 x float> %tmp3, <2 x float> addrspace(1)* undef, align 8 + store float 0.000000e+00, float addrspace(1)* undef, align 4 + ret void +} + +declare float @barney() #2 +declare void @eggs(float) #2 +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { convergent nounwind "target-cpu"="gfx900" "target-features"="+fp32-denormals" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "foo.cl", directory: "/tmp") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !DILocalVariable(name: "var", arg: 8, scope: !4) +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, type: !12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!5 = !DILocation(line: 69, scope: !4) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !14} +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/test/CodeGen/AMDGPU/ds-combine-large-stride.ll index cf6c297906b3..6ae36cc1fbb1 100644 --- a/test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ b/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -1,15 +1,18 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: ds_read32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -46,12 +49,15 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -124,12 +130,15 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] + ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 @@ -160,8 +169,10 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 @@ -198,12 +209,15 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] + ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 @@ -234,12 +248,15 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -267,12 +284,15 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x320, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x640, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -327,12 +347,15 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 4, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4004, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8004, [[BASE]] + ; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 @@ -356,8 +379,10 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 0x960, [[BASE]] + ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 @@ -385,12 +410,15 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] + +; VI-DAG: v_add_u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B2:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] +; VI-DAG: v_add_u32_e32 [[B3:v[0-9]+]], vcc, {{s[0-9]+}}, [[BASE]] + +; GFX9-DAG: v_add_u32_e32 [[B1:v[0-9]+]], 8, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B2:v[0-9]+]], 0x4008, [[BASE]] +; GFX9-DAG: v_add_u32_e32 [[B3:v[0-9]+]], 0x8008, [[BASE]] + ; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 diff --git a/test/CodeGen/AMDGPU/ds-sub-offset.ll b/test/CodeGen/AMDGPU/ds-sub-offset.ll index d74bd5aa15ac..05d8dc0b9a04 100644 --- a/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -6,7 +7,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0 ; GCN-LABEL: {{^}}write_ds_sub0_offset0_global: ; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0 -; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]] +; CI: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]] +; GFX9: v_sub_u32_e32 [[BASEPTR:v[0-9]+]], 0, [[SHL]] ; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b ; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12 define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { @@ -21,7 +23,8 @@ entry: ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535 define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 { @@ -36,7 +39,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 { ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0x10000, [[SCALED]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN: ds_write_b8 [[NEG]], [[K]]{{$}} define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { @@ -51,7 +55,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN-NOT: v_sub ; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} @@ -73,7 +78,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 { ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN-NOT: v_sub ; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} @@ -93,7 +99,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255 define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 @@ -107,7 +114,8 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { ; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: ; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 -; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]] +; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]] +; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0x3fc, [[SCALED]] ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}} define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 { %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll index deb90df99dcf..131afb0c6aec 100644 --- a/test/CodeGen/AMDGPU/ds_read2.ll +++ b/test/CodeGen/AMDGPU/ds_read2.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s ; FIXME: We don't get cases where the address was an SGPR because we ; get a copy to the address register for each one. @@ -6,12 +7,16 @@ @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 -; SI-LABEL: @simple_read2_f32 -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: {{^}}simple_read2_f32: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] +; CI: buffer_store_dword [[RESULT]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -25,12 +30,16 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 { ret void } -; SI-LABEL: @simple_read2_f32_max_offset -; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: {{^}}simple_read2_f32_max_offset: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] + +; CI: buffer_store_dword [[RESULT]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -44,11 +53,14 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out) ret void } -; SI-LABEL: @simple_read2_f32_too_far -; SI-NOT ds_read2_b32 -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_too_far +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT ds_read2_b32 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -62,10 +74,13 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 ret void } -; SI-LABEL: @simple_read2_f32_x2 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_x2 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 @@ -93,11 +108,14 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { } ; Make sure there is an instruction between the two sets of reads. -; SI-LABEL: @simple_read2_f32_x2_barrier -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 -; SI: s_barrier -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_x2_barrier +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8 +; GCN: s_barrier +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 @@ -129,10 +147,13 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) ; For some reason adding something to the base address for the first ; element results in only folding the inner pair. -; SI-LABEL: @simple_read2_f32_x2_nonzero_base -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_x2_nonzero_base +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 @@ -165,11 +186,14 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* ; Base pointers come from different subregister of same super ; register. We can't safely merge this. -; SI-LABEL: @read2_ptr_is_subreg_arg_f32 -; SI-NOT: ds_read2_b32 -; SI: ds_read_b32 -; SI: ds_read_b32 -; SI: s_endpgm +; GCN-LABEL: @read2_ptr_is_subreg_arg_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2_b32 +; GCN: ds_read_b32 +; GCN: ds_read_b32 +; GCN: s_endpgm define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 @@ -191,11 +215,14 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, ; sure we are really rejecting it because of the different ; subregisters. -; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32 -; SI-NOT: ds_read2_b32 -; SI: ds_read_b32 -; SI: ds_read_b32 -; SI: s_endpgm +; GCN-LABEL: @read2_ptr_is_subreg_arg_offset_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2_b32 +; GCN: ds_read_b32 +; GCN: ds_read_b32 +; GCN: s_endpgm define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 @@ -216,9 +243,12 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1) ret void } -; SI-LABEL: {{^}}read2_ptr_is_subreg_f32: -; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} -; SI: s_endpgm +; GCN-LABEL: {{^}}read2_ptr_is_subreg_f32: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} +; GCN: s_endpgm define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0 @@ -238,11 +268,14 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 ret void } -; SI-LABEL: @simple_read2_f32_volatile_0 -; SI-NOT ds_read2_b32 -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_volatile_0 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT ds_read2_b32 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -256,11 +289,14 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out) ret void } -; SI-LABEL: @simple_read2_f32_volatile_1 -; SI-NOT ds_read2_b32 -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f32_volatile_1 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT ds_read2_b32 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -277,9 +313,12 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out) ; Can't fold since not correctly aligned. ; XXX: This isn't really testing anything useful now. I think CI ; allows unaligned LDS accesses, which would be a problem here. -; SI-LABEL: @unaligned_read2_f32 -; SI-NOT: ds_read2_b32 -; SI: s_endpgm +; GCN-LABEL: @unaligned_read2_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2_b32 +; GCN: s_endpgm define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i @@ -293,9 +332,12 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a ret void } -; SI-LABEL: @misaligned_2_simple_read2_f32 -; SI-NOT: ds_read2_b32 -; SI: s_endpgm +; GCN-LABEL: @misaligned_2_simple_read2_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2_b32 +; GCN: s_endpgm define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i @@ -309,12 +351,16 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou ret void } -; SI-LABEL: @simple_read2_f64 -; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} -; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8 -; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}} +; GCN: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8 +; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} + +; CI: buffer_store_dwordx2 [[RESULT]] +; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -328,9 +374,12 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 { ret void } -; SI-LABEL: @simple_read2_f64_max_offset -; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f64_max_offset +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -344,11 +393,14 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out ret void } -; SI-LABEL: @simple_read2_f64_too_far -; SI-NOT ds_read2_b64 -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 -; SI: s_endpgm +; GCN-LABEL: @simple_read2_f64_too_far +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT ds_read2_b64 +; GCN: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +; GCN: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 +; GCN: s_endpgm define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -363,10 +415,13 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) # } ; Alignment only 4 -; SI-LABEL: @misaligned_read2_f64 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 -; SI: s_endpgm +; GCN-LABEL: @misaligned_read2_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 +; GCN: s_endpgm define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i @@ -382,9 +437,12 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl @foo = addrspace(3) global [4 x i32] undef, align 4 -; SI-LABEL: @load_constant_adjacent_offsets -; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 +; GCN-LABEL: @load_constant_adjacent_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 @@ -393,9 +451,12 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out ret void } -; SI-LABEL: @load_constant_disjoint_offsets -; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2 +; GCN-LABEL: @load_constant_disjoint_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2 define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) { %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -406,10 +467,13 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out @bar = addrspace(3) global [4 x i64] undef, align 4 -; SI-LABEL: @load_misaligned64_constant_offsets -; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 +; GCN-LABEL: @load_misaligned64_constant_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3 define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 @@ -420,12 +484,15 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)* @bar.large = addrspace(3) global [4096 x i64] undef, align 4 -; SI-LABEL: @load_misaligned64_constant_large_offsets -; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} -; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000 -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1 -; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1 -; SI: s_endpgm +; GCN-LABEL: @load_misaligned64_constant_large_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} +; GCN-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000 +; GCN-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1 +; GCN-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) { %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 @@ -437,6 +504,10 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac @sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 +; GCN-LABEL: {{^}}sgemm_inner_loop_read2_sequence: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 @@ -481,20 +552,29 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* % ret void } +; GCN-LABEL: {{^}}misaligned_read2_v2i32: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 { %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4 store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8 ret void } +; GCN-LABEL: {{^}}misaligned_read2_i64: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 { %load = load i64, i64 addrspace(3)* %in, align 4 store i64 %load, i64 addrspace(1)* %out, align 8 ret void } -; SI-LABEL: ds_read_diff_base_interleaving -; SI-NOT: ds_read_b32 +; GCN-LABEL: ds_read_diff_base_interleaving +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read_b32 define amdgpu_kernel void @ds_read_diff_base_interleaving( float addrspace(1)* nocapture %arg, [4 x [4 x float]] addrspace(3)* %arg1, @@ -533,21 +613,13 @@ bb: ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workgroup.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workgroup.id.y() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 -; Function Attrs: convergent nounwind declare void @llvm.amdgcn.s.barrier() #2 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind readnone speculatable } attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll index b1fba8c240d7..c09dca4635f9 100644 --- a/test/CodeGen/AMDGPU/ds_read2st64.ll +++ b/test/CodeGen/AMDGPU/ds_read2st64.ll @@ -1,15 +1,19 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 -; SI-LABEL: @simple_read2st64_f32_0_1 -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f32_0_1 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] +; CI: buffer_store_dword [[RESULT]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -23,12 +27,15 @@ define amdgpu_kernel void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 ret void } -; SI-LABEL: @simple_read2st64_f32_1_2 -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f32_1_2 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] +; CI: buffer_store_dword [[RESULT]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -43,12 +50,15 @@ define amdgpu_kernel void @simple_read2st64_f32_1_2(float addrspace(1)* %out, fl ret void } -; SI-LABEL: @simple_read2st64_f32_max_offset -; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] -; SI: buffer_store_dword [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f32_max_offset +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[LO_VREG]], v[[HI_VREG]] +; CI: buffer_store_dword [[RESULT]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -63,12 +73,15 @@ define amdgpu_kernel void @simple_read2st64_f32_max_offset(float addrspace(1)* % ret void } -; SI-LABEL: @simple_read2st64_f32_over_max_offset -; SI-NOT: ds_read2st64_b32 -; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} -; SI-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 -; SI-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}} -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f32_over_max_offset +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st64_b32 +; GCN-DAG: v_add_{{i|u}}32_e32 [[BIGADD:v[0-9]+]], {{(vcc, )?}}0x10000, {{v[0-9]+}} +; GCN-DAG: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 +; GCN-DAG: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]{{$}} +; GCN: s_endpgm define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -83,9 +96,12 @@ define amdgpu_kernel void @simple_read2st64_f32_over_max_offset(float addrspace( ret void } -; SI-LABEL: @odd_invalid_read2st64_f32_0 -; SI-NOT: ds_read2st64_b32 -; SI: s_endpgm +; GCN-LABEL: @odd_invalid_read2st64_f32_0 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st64_b32 +; GCN: s_endpgm define amdgpu_kernel void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -99,9 +115,12 @@ define amdgpu_kernel void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) ret void } -; SI-LABEL: @odd_invalid_read2st64_f32_1 -; SI-NOT: ds_read2st64_b32 -; SI: s_endpgm +; GCN-LABEL: @odd_invalid_read2st64_f32_1 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st64_b32 +; GCN: s_endpgm define amdgpu_kernel void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -116,12 +135,15 @@ define amdgpu_kernel void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) ret void } -; SI-LABEL: @simple_read2st64_f64_0_1 -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f64_0_1 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} +; CI: buffer_store_dwordx2 [[RESULT]] +; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -135,12 +157,16 @@ define amdgpu_kernel void @simple_read2st64_f64_0_1(double addrspace(1)* %out) # ret void } -; SI-LABEL: @simple_read2st64_f64_1_2 -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f64_1_2 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} + +; CI: buffer_store_dwordx2 [[RESULT]] +; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -157,10 +183,13 @@ define amdgpu_kernel void @simple_read2st64_f64_1_2(double addrspace(1)* %out, d ; Alignment only -; SI-LABEL: @misaligned_read2st64_f64 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 -; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 -; SI: s_endpgm +; GCN-LABEL: @misaligned_read2st64_f64 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1 +; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 +; GCN: s_endpgm define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i @@ -175,12 +204,16 @@ define amdgpu_kernel void @misaligned_read2st64_f64(double addrspace(1)* %out, d } ; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff -; SI-LABEL: @simple_read2st64_f64_max_offset -; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127 -; SI: s_waitcnt lgkmcnt(0) -; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} -; SI: buffer_store_dwordx2 [[RESULT]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f64_max_offset +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127 +; GCN: s_waitcnt lgkmcnt(0) +; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}} + +; CI: buffer_store_dwordx2 [[RESULT]] +; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 256 @@ -195,12 +228,15 @@ define amdgpu_kernel void @simple_read2st64_f64_max_offset(double addrspace(1)* ret void } -; SI-LABEL: @simple_read2st64_f64_over_max_offset -; SI-NOT: ds_read2st64_b64 -; SI-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 -; SI-DAG: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} -; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] -; SI: s_endpgm +; GCN-LABEL: @simple_read2st64_f64_over_max_offset +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st64_b64 +; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 +; GCN-DAG: v_add_{{i|u}}32_e32 [[BIGADD:v[0-9]+]], {{(vcc, )?}}0x10000, {{v[0-9]+}} +; GCN: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] +; GCN: s_endpgm define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -215,9 +251,12 @@ define amdgpu_kernel void @simple_read2st64_f64_over_max_offset(double addrspace ret void } -; SI-LABEL: @invalid_read2st64_f64_odd_offset -; SI-NOT: ds_read2st64_b64 -; SI: s_endpgm +; GCN-LABEL: @invalid_read2st64_f64_odd_offset +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st64_b64 +; GCN: s_endpgm define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 @@ -235,10 +274,13 @@ define amdgpu_kernel void @invalid_read2st64_f64_odd_offset(double addrspace(1)* ; The stride of 8 elements is 8 * 8 bytes. We need to make sure the ; stride in elements, not bytes, is a multiple of 64. -; SI-LABEL: @byte_size_only_divisible_64_read2_f64 -; SI-NOT: ds_read2st_b64 -; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8 -; SI: s_endpgm +; GCN-LABEL: @byte_size_only_divisible_64_read2_f64 +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_read2st_b64 +; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i @@ -252,10 +294,7 @@ define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(double addrspac ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll index 0f49919a1d10..7cb070c12b65 100644 --- a/test/CodeGen/AMDGPU/ds_write2.ll +++ b/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,14 +1,17 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -strict-whitespace -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 +; GCN-LABEL: {{^}}simple_write2_one_val_f32: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 -; SI-LABEL: @simple_write2_one_val_f32 -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8 -; SI: s_endpgm +; GCN-DAG: {{buffer|flat|global}}_load_dword [[VAL:v[0-9]+]] +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -21,12 +24,19 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo ret void } -; SI-LABEL: @simple_write2_two_val_f32 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: {{^}}simple_write2_two_val_f32: +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -41,11 +51,14 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo ret void } -; SI-LABEL: @simple_write2_two_val_f32_volatile_0 -; SI-NOT: ds_write2_b32 -; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} -; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_f32_volatile_0 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_write2_b32 +; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} +; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i @@ -60,11 +73,14 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace( ret void } -; SI-LABEL: @simple_write2_two_val_f32_volatile_1 -; SI-NOT: ds_write2_b32 -; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} -; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_f32_volatile_1 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_write2_b32 +; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} +; GCN: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i @@ -80,12 +96,20 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace( } ; 2 data subregisters from different super registers. -; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32 -; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} -; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} -; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: {{^}}simple_write2_two_val_subreg2_mixed_f32: +; GFX9-NOT: m0 + +; CI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} +; CI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} +; CI-DAG: s_mov_b32 m0 + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} + +; GFX9: global_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} +; GFX9: global_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} + +; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i @@ -102,11 +126,14 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa ret void } -; SI-LABEL: @simple_write2_two_val_subreg2_f32 -; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_subreg2_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i @@ -121,11 +148,14 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2_two_val_subreg4_f32 -; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_subreg4_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: {{buffer|global}}_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i @@ -140,12 +170,19 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2_two_val_max_offset_f32 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_max_offset_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -160,10 +197,13 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace( ret void } -; SI-LABEL: @simple_write2_two_val_too_far_f32 -; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} -; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_too_far_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i @@ -178,10 +218,13 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2_two_val_f32_x2 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_f32_x2 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8 +; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x @@ -208,10 +251,13 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, ret void } -; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8 -; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_f32_x2_nonzero_base +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8 +; GCN: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x @@ -238,11 +284,14 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs ret void } -; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32 -; SI-NOT: ds_write2_b32 -; SI: ds_write_b32 -; SI: ds_write_b32 -; SI: s_endpgm +; GCN-LABEL: @write2_ptr_subreg_arg_two_val_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_write2_b32 +; GCN: ds_write_b32 +; GCN: ds_write_b32 +; GCN: s_endpgm define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i @@ -265,11 +314,14 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2_one_val_f64 -; SI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_one_val_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: {{buffer|global}}_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} +; GCN: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -282,12 +334,15 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do ret void } -; SI-LABEL: @misaligned_simple_write2_one_val_f64 -; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 -; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 -; SI: s_endpgm +; GCN-LABEL: @misaligned_simple_write2_one_val_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} +; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 +; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 +; GCN: s_endpgm define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -300,12 +355,20 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace ret void } -; SI-LABEL: @simple_write2_two_val_f64 -; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 -; SI: s_endpgm +; GCN-LABEL: @simple_write2_two_val_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 + +; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8 + + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} +; GCN: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -322,19 +385,25 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do @foo = addrspace(3) global [4 x i32] undef, align 4 -; SI-LABEL: @store_constant_adjacent_offsets -; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; GCN-LABEL: @store_constant_adjacent_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 define amdgpu_kernel void @store_constant_adjacent_offsets() { store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4 ret void } -; SI-LABEL: @store_constant_disjoint_offsets -; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}} -; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2 +; GCN-LABEL: @store_constant_disjoint_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}} +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2 define amdgpu_kernel void @store_constant_disjoint_offsets() { store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4 store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4 @@ -343,11 +412,14 @@ define amdgpu_kernel void @store_constant_disjoint_offsets() { @bar = addrspace(3) global [4 x i64] undef, align 4 -; SI-LABEL: @store_misaligned64_constant_offsets -; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} -; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 -; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 -; SI: s_endpgm +; GCN-LABEL: @store_misaligned64_constant_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} +; GCN-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; GCN-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3 +; GCN: s_endpgm define amdgpu_kernel void @store_misaligned64_constant_offsets() { store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 @@ -356,12 +428,15 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() { @bar.large = addrspace(3) global [4096 x i64] undef, align 4 -; SI-LABEL: @store_misaligned64_constant_large_offsets -; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} -; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}} -; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 -; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 -; SI: s_endpgm +; GCN-LABEL: @store_misaligned64_constant_large_offsets +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}} +; GCN-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}} +; GCN-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; GCN-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @store_misaligned64_constant_large_offsets() { store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4 @@ -406,10 +481,12 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld ret void } -; CI-LABEL: {{^}}simple_write2_v4f32_superreg_align4: -; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:2{{$}} -; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}} -; CI: s_endpgm +; GCN-LABEL: {{^}}simple_write2_v4f32_superreg_align4: +; CI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:2 offset1:3{{$}} +; GCN: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}} define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in @@ -419,18 +496,11 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workgroup.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workgroup.id.y() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind readnone speculatable } attributes #2 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll index a395af34b67b..54f2500afab4 100644 --- a/test/CodeGen/AMDGPU/ds_write2st64.ll +++ b/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,12 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 -; SI-LABEL: @simple_write2st64_one_val_f32_0_1 -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_one_val_f32_0_1 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0n + +; GCN-DAG: {{buffer|global}}_load_dword [[VAL:v[0-9]+]] +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -19,12 +23,20 @@ define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2st64_two_val_f32_2_5 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_f32_2_5 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -40,12 +52,20 @@ define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2st64_two_val_max_offset_f32 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_max_offset_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + +; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -60,12 +80,20 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrsp ret void } -; SI-LABEL: @simple_write2st64_two_val_max_offset_f64 -; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 -; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]], -; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_max_offset_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 + +; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8 + +; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 3, v{{[0-9]+}} +; GCN: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] +; GCN: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -81,10 +109,13 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(double addrs ret void } -; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64 -; SI-NOT: ds_write2st64_b64 -; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 -; SI: s_endpgm +; GCN-LABEL: @byte_size_only_divisible_64_write2st64_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_write2st64_b64 +; GCN: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -97,10 +128,7 @@ define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(double add ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/early-if-convert.ll b/test/CodeGen/AMDGPU/early-if-convert.ll index 792f0b1eaef4..d129ca5c140a 100644 --- a/test/CodeGen/AMDGPU/early-if-convert.ll +++ b/test/CodeGen/AMDGPU/early-if-convert.ll @@ -382,7 +382,7 @@ done: } ; GCN-LABEL: {{^}}ifcvt_undef_scc: -; GCN: {{^}}; BB#0: +; GCN: {{^}}; %bb.0: ; GCN-NEXT: s_load_dwordx2 ; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0 define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) { diff --git a/test/CodeGen/AMDGPU/else.ll b/test/CodeGen/AMDGPU/else.ll index 22338e4f50e5..c73ea936e8be 100644 --- a/test/CodeGen/AMDGPU/else.ll +++ b/test/CodeGen/AMDGPU/else.ll @@ -25,7 +25,7 @@ end: } ; CHECK-LABEL: {{^}}else_execfix_leave_wqm: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: s_mov_b64 [[INIT_EXEC:s\[[0-9]+:[0-9]+\]]], exec ; CHECK: ; %Flow ; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]], diff --git a/test/CodeGen/AMDGPU/enqueue-kernel.ll b/test/CodeGen/AMDGPU/enqueue-kernel.ll index a54453541ded..c04b9b1e8cab 100644 --- a/test/CodeGen/AMDGPU/enqueue-kernel.ll +++ b/test/CodeGen/AMDGPU/enqueue-kernel.ll @@ -65,7 +65,7 @@ entry: ret void } -; CHECK: define amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]] +; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]] define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { entry: @@ -77,7 +77,7 @@ entry: declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t*, i8 addrspace(4)*) local_unnamed_addr -; CHECK: define amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]] +; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]] define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { diff --git a/test/CodeGen/AMDGPU/extload-align.ll b/test/CodeGen/AMDGPU/extload-align.ll index 12cf27b918af..4df4b265b23b 100644 --- a/test/CodeGen/AMDGPU/extload-align.ll +++ b/test/CodeGen/AMDGPU/extload-align.ll @@ -1,4 +1,5 @@ -; RUN: llc -debug-only=machine-scheduler -march=amdgcn -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +; RUN: llc -debug-only=machine-scheduler -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +target datalayout = "A5" ; REQUIRES: asserts ; Verify that the extload generated from %eval has the default @@ -6,18 +7,18 @@ ; size and not 4 corresponding to the sign-extended size (i32). ; DEBUG: {{^}}# Machine code for function extload_align: -; DEBUG: mem:LD2[]{{[^(]}} +; DEBUG: mem:LD2[(addrspace=5)] ; DEBUG: {{^}}# End machine code for function extload_align. -define amdgpu_kernel void @extload_align(i32* %out, i32 %index) #0 { - %v0 = alloca [4 x i16] - %a1 = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 0 - %a2 = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 1 - store i16 0, i16* %a1 - store i16 1, i16* %a2 - %a = getelementptr inbounds [4 x i16], [4 x i16]* %v0, i32 0, i32 %index - %val = load i16, i16* %a +define amdgpu_kernel void @extload_align(i32 addrspace(5)* %out, i32 %index) #0 { + %v0 = alloca [4 x i16], addrspace(5) + %a1 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 0 + %a2 = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 1 + store i16 0, i16 addrspace(5)* %a1 + store i16 1, i16 addrspace(5)* %a2 + %a = getelementptr inbounds [4 x i16], [4 x i16] addrspace(5)* %v0, i32 0, i32 %index + %val = load i16, i16 addrspace(5)* %a %eval = sext i16 %val to i32 - store i32 %eval, i32* %out + store i32 %eval, i32 addrspace(5)* %out ret void } diff --git a/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll index 1f567ae05081..1e19ddf2b0ac 100644 --- a/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}extract_vector_elt_v2f16: ; GCN: s_load_dword [[VEC:s[0-9]+]] diff --git a/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll b/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll index db5bf0b4e808..2f13f63fa888 100644 --- a/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}extract_vector_elt_v3f64_2: ; GCN: buffer_load_dwordx4 diff --git a/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll index 4dee500c8429..06a0e2c7b653 100644 --- a/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SICIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}extract_vector_elt_v2i16: ; GCN: s_load_dword [[VEC:s[0-9]+]] diff --git a/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll b/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll index a8d127879a32..99019e2a83e6 100644 --- a/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; How the replacement of i64 stores with v2i32 stores resulted in ; breaking other users of the bitcast if they already existed diff --git a/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll b/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll index b7d768fd5525..a5ff0932c622 100644 --- a/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll +++ b/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}extract_vector_elt_v1i8: ; GCN: buffer_load_ubyte diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll index 3e2b44fe905c..4bbaf0ea3b62 100644 --- a/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fabs.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; DAGCombiner will transform: ; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) @@ -127,8 +127,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_mul_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; CI: v_cvt_f16_f32 -; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]] diff --git a/test/CodeGen/AMDGPU/fadd.ll b/test/CodeGen/AMDGPU/fadd.ll index 621a0de281db..a2f1f7195f27 100644 --- a/test/CodeGen/AMDGPU/fadd.ll +++ b/test/CodeGen/AMDGPU/fadd.ll @@ -72,4 +72,4 @@ define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float % } attributes #0 = { nounwind } -attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } \ No newline at end of file +attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } diff --git a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index f64c27d0c03d..48e4828557e5 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GCN-FLUSH %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=-fp32-denormals,+fp-exceptions < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-EXCEPT,VI,GCN-FLUSH %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-DENORM,GCN-DENORM %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=-fp32-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX9-FLUSH,GCN-FLUSH %s ; GCN-LABEL: {{^}}test_no_fold_canonicalize_loaded_value_f32: ; GCN-FLUSH: v_mul_f32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index 80f802bdce5b..24195660e576 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 @@ -207,7 +207,7 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace } ; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16: -; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} ; VI-NOT: v_and_b32 @@ -246,7 +246,7 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16: ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} -; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_e32 [[REG1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_or_b32 @@ -266,8 +266,7 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16: ; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}} -; VI: v_lshrrev_b32_e32 [[FNEGHI:v[0-9]+]], 16, [[FNEG]] -; VI-DAG: v_max_f16_sdwa [[REG1:v[0-9]+]], [[FNEG]], [[FNEGHI]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_max_f16_sdwa [[REG1:v[0-9]+]], [[FNEG]], [[FNEG]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_max_f16_e32 [[REG0:v[0-9]+]], [[FNEG]], [[FNEG]] ; VI-NOT: 0xffff diff --git a/test/CodeGen/AMDGPU/fcopysign.f16.ll b/test/CodeGen/AMDGPU/fcopysign.f16.ll index b14f4c85ba61..15d4d2a36676 100644 --- a/test/CodeGen/AMDGPU/fcopysign.f16.ll +++ b/test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX8 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s declare half @llvm.copysign.f16(half, half) declare float @llvm.copysign.f32(float, float) diff --git a/test/CodeGen/AMDGPU/fence-amdgiz.ll b/test/CodeGen/AMDGPU/fence-amdgiz.ll index 3055f325f3fa..0dd2a9241b23 100644 --- a/test/CodeGen/AMDGPU/fence-amdgiz.ll +++ b/test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" ; CHECK-LABEL: atomic_fence -; CHECK: BB#0: +; CHECK: %bb.0: ; CHECK-NOT: ATOMIC_FENCE ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_wbinvl1_vol diff --git a/test/CodeGen/AMDGPU/fma.ll b/test/CodeGen/AMDGPU/fma.ll index 952bd1f29544..8e51f82112ff 100644 --- a/test/CodeGen/AMDGPU/fma.ll +++ b/test/CodeGen/AMDGPU/fma.ll @@ -1,5 +1,12 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone diff --git a/test/CodeGen/AMDGPU/fmed3.ll b/test/CodeGen/AMDGPU/fmed3.ll index da21cc409fc6..934d20716c40 100644 --- a/test/CodeGen/AMDGPU/fmed3.ll +++ b/test/CodeGen/AMDGPU/fmed3.ll @@ -2,8 +2,8 @@ ; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=NOSNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SNAN -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}v_test_nnan_input_fmed3_r_i_i_f32: diff --git a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll index 925216d28450..4a9beef85840 100644 --- a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s - -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s + +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index df0dfc696d96..4d502b1af87f 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll index a27a0b444ae9..5649ddfc6e39 100644 --- a/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/test/CodeGen/AMDGPU/fneg.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; FIXME: Should be able to do scalar op ; GCN-LABEL: {{^}}s_fneg_f16: @@ -116,8 +116,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CI: v_cvt_f16_f32 -; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}} diff --git a/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir b/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir similarity index 100% rename from test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir rename to test/CodeGen/AMDGPU/fold-imm-f16-f32.mir diff --git a/test/CodeGen/MIR/AMDGPU/fold-multiple.mir b/test/CodeGen/AMDGPU/fold-multiple.mir similarity index 100% rename from test/CodeGen/MIR/AMDGPU/fold-multiple.mir rename to test/CodeGen/AMDGPU/fold-multiple.mir diff --git a/test/CodeGen/AMDGPU/fpext.f16.ll b/test/CodeGen/AMDGPU/fpext.f16.ll index 56da7f1f2371..84ff7e020922 100644 --- a/test/CodeGen/AMDGPU/fpext.f16.ll +++ b/test/CodeGen/AMDGPU/fpext.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s ; GCN-LABEL: {{^}}fpext_f16_to_f32 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] diff --git a/test/CodeGen/AMDGPU/fptrunc.f16.ll b/test/CodeGen/AMDGPU/fptrunc.f16.ll index eb7196714bab..3aff885b7b5f 100644 --- a/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll index 9bc46434d59e..9b75c44eac05 100644 --- a/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; Test that non-entry function frame indices are expanded properly to ; give an index relative to the scratch wave offset register @@ -7,8 +8,13 @@ ; GCN-LABEL: {{^}}func_mov_fi_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN: s_sub_u32 s6, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 -; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] + +; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] + +; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 +; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] + ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 define void @func_mov_fi_i32() #0 { @@ -23,9 +29,16 @@ define void @func_mov_fi_i32() #0 { ; GCN-LABEL: {{^}}func_add_constant_to_fi_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN: s_sub_u32 s6, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 -; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] -; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 + +; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] +; CI-NEXT: v_add_i32_e32 v0, vcc, 4, v0 + +; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 +; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] +; GFX9-NEXT: v_add_u32_e32 v0, 4, v0 + + ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 define void @func_add_constant_to_fi_i32() #0 { @@ -40,8 +53,13 @@ define void @func_add_constant_to_fi_i32() #0 { ; GCN-LABEL: {{^}}func_other_fi_user_i32: ; GCN: s_sub_u32 s6, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 -; GCN-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] + +; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; CI-NEXT: v_add_i32_e64 v0, s[6:7], 4, [[SCALED]] + +; GFX9-NEXT: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 +; GFX9-NEXT: v_add_u32_e32 v0, 4, [[SCALED]] + ; GCN-NEXT: v_mul_lo_i32 v0, v0, 9 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -73,9 +91,15 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 { ; GCN: s_waitcnt ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4 -; GCN-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 -; GCN-NEXT: v_add_i32_e64 [[ADD:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] -; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, [[ADD]] + +; CI-NEXT: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 +; CI-NEXT: v_add_i32_e64 [[ADD:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] +; CI-NEXT: v_add_i32_e32 v0, vcc, 4, [[ADD]] + +; GFX9-NEXT: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, [[SUB_OFFSET]] +; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 4, [[SHIFT]] +; GFX9-NEXT: v_add_u32_e32 v0, 4, [[ADD]] + ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 { @@ -106,12 +130,21 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) # ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: ; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4 -; GCN: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 -; GCN: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] + +; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 +; CI: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] + +; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, [[SUB_OFFSET]] +; GFX9: v_add_u32_e32 [[ADD:v[0-9]+]], 4, [[SHIFT]] + ; GCN: s_and_saveexec_b64 -; GCN: v_add_i32_e32 v0, vcc, 4, [[ADD]] -; GCN: buffer_load_dword v1, v0, s[0:3], s4 offen{{$}} +; CI: v_add_i32_e32 v0, vcc, 4, [[ADD]] +; CI: buffer_load_dword v1, v0, s[0:3], s4 offen{{$}} + +; GFX9: v_add_u32_e32 v0, 4, [[ADD]] +; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}} + ; GCN: ds_write_b32 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 { %cmp = icmp eq i32 %arg2, 0 @@ -131,9 +164,14 @@ ret: ; Added offset can't be used with VOP3 add ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32: ; GCN: s_sub_u32 s6, s5, s4 -; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 ; GCN-DAG: s_movk_i32 s6, 0x204 -; GCN: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]] + +; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 +; CI: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]] + +; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 +; GFX9: v_add_u32_e32 v0, s6, [[SCALED]] + ; GCN: v_mul_lo_i32 v0, v0, 9 ; GCN: ds_write_b32 v0, v0 define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { @@ -150,9 +188,14 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live: ; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s5, s4 -; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6 ; GCN-DAG: s_movk_i32 [[OFFSET:s[0-9]+]], 0x204 -; GCN: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]] + +; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6 +; CI: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, [[OFFSET]], [[SCALED]] + +; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, [[DIFF]] +; GFX9: v_add_u32_e32 v0, [[OFFSET]], [[SCALED]] + ; GCN: v_mul_lo_i32 v0, v0, 9 ; GCN: ds_write_b32 v0, v0 define void @func_other_fi_user_non_inline_imm_offset_i32_vcc_live() #0 { diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll index 5e3668a699f3..fc055a58e757 100644 --- a/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/test/CodeGen/AMDGPU/fsub.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fsub_f16: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] diff --git a/test/CodeGen/AMDGPU/function-args.ll b/test/CodeGen/AMDGPU/function-args.ll index 604619a69c23..ca36732540b2 100644 --- a/test/CodeGen/AMDGPU/function-args.ll +++ b/test/CodeGen/AMDGPU/function-args.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s ; GCN-LABEL: {{^}}void_func_i1: ; GCN: v_and_b32_e32 v0, 1, v0 @@ -24,7 +24,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i1_signext: ; GCN: s_waitcnt -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 +; GCN-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}12, v0 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_i1_signext(i1 signext %arg0) #0 { @@ -60,7 +60,7 @@ define void @void_func_i8(i8 %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i8_zeroext: ; GCN-NOT: and_b32 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}12, v0 define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -70,7 +70,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i8_signext: ; GCN-NOT: v_bfe_i32 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}12, v0 define void @void_func_i8_signext(i8 signext %arg0) #0 { %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -87,7 +87,7 @@ define void @void_func_i16(i16 %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i16_zeroext: ; GCN-NOT: v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}12, v0 define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -97,7 +97,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i16_signext: ; GCN-NOT: v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}12, v0 define void @void_func_i16_signext(i16 signext %arg0) #0 { %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -582,7 +582,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; GCN: buffer_store_byte [[TRUNC_ARG1_I1]], off ; GCN: buffer_store_byte [[LOAD_ARG2]], off ; GCN: buffer_store_short [[LOAD_ARG3]], off -; VI: buffer_store_short [[LOAD_ARG4]], off +; GFX89 buffer_store_short [[LOAD_ARG4]], off ; CI: buffer_store_short [[CVT_ARG4]], off define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { diff --git a/test/CodeGen/AMDGPU/function-returns.ll b/test/CodeGen/AMDGPU/function-returns.ll index 28406e16219a..4a24f5e285b9 100644 --- a/test/CodeGen/AMDGPU/function-returns.ll +++ b/test/CodeGen/AMDGPU/function-returns.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s ; GCN-LABEL: {{^}}i1_func_void: ; GCN: buffer_load_ubyte v0, off @@ -283,8 +283,9 @@ define <2 x i16> @v2i16_func_void() #0 { ; GCN-LABEL: {{^}}v3i16_func_void: ; GFX9: buffer_load_dwordx2 v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 +; GFX9: s_waitcnt vmcnt(0) +; GFX9: v_lshrrev_b32 +; GFX9: s_setpc_b64 define <3 x i16> @v3i16_func_void() #0 { %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val @@ -305,7 +306,7 @@ define <4 x i16> @v4i16_func_void() #0 { ; GFX9: buffer_load_ushort v4 ; GFX9: v_lshrrev_b32_e32 v3, 16, v1 ; GFX9: v_mov_b32_e32 v2, v1 -; GFX9: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9: v_lshrrev_b32_e32 v1, 16, v0 ; GCN: s_setpc_b64 define <5 x i16> @v5i16_func_void() #0 { %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef @@ -352,7 +353,7 @@ define <16 x i8> @v16i8_func_void() #0 { ; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 ; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 ; CI-DAG: v_bfe_u32 v1, v0, 8, 8 -; VI-DAG: v_lshrrev_b16_e32 v1, 8, v0 +; GFX89-DAG: v_lshrrev_b16_e32 v1, 8, v0 ; GCN: s_setpc_b64 define <4 x i8> @v4i8_func_void() #0 { %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef @@ -375,128 +376,56 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { ; GCN: buffer_load_dword [[VAL1:v[0-9]+]] ; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}} ; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}} -define void @void_func_sret_struct_i8_i32({ i8, i32 }* sret %arg0) #0 { +define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef - %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0 - %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1 - store i8 %val0, i8* %gep0 - store i32 %val1, i32* %gep1 + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 + store i8 %val0, i8 addrspace(5)* %gep0 + store i32 %val1, i32 addrspace(5)* %gep1 ret void } -; FIXME: Should be able to fold offsets in all of these. Call lowering -; introduces an extra CopyToReg/CopyFromReg obscuring the AssertZext -; inserted. Not using it introduces the spills. +; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call +; lowering introduces an extra CopyToReg/CopyFromReg obscuring the +; AssertZext inserted. Not using it introduces the spills. ; GCN-LABEL: {{^}}v33i32_func_void: -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill -; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill - -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} - -; GCN: buffer_load_dword v34 -; GCN: buffer_load_dword v33 -; GCN: buffer_load_dword v32 -; GCN: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 define <33 x i32> @v33i32_func_void() #0 { %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr @@ -504,113 +433,41 @@ define <33 x i32> @v33i32_func_void() #0 { } ; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill -; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill - -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} - -; GCN: buffer_load_dword v34 -; GCN: buffer_load_dword v33 -; GCN: buffer_load_dword v32 -; GCN: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr @@ -618,22 +475,41 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { } ; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill -; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill - -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} - - -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_256]], s[0:3], s4 offen{{$}} - -; GCN: buffer_load_dword v33 -; GCN: buffer_load_dword v32 -; GCN: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}} +; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}} +; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr diff --git a/test/CodeGen/AMDGPU/hazard-inlineasm.mir b/test/CodeGen/AMDGPU/hazard-inlineasm.mir new file mode 100644 index 000000000000..6f09bb8f55f9 --- /dev/null +++ b/test/CodeGen/AMDGPU/hazard-inlineasm.mir @@ -0,0 +1,24 @@ +# RUN: llc -mcpu=gfx900 -march=amdgcn -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck %s + +# If an INLINEASM statement is preceded by a vmem store of more than 8 bytes *and* +# the INLINEASM defs the vregs holding the data-to-be-stored by that preceding store, +# then the hazard recognizer should insert a s_nop in between them. + +... + +# GCN-LABEL: name: hazard-inlineasm +# CHECK: FLAT_STORE_DWORDX4 +# CHECK-NEXT: S_NOP 0 +# CHECK-NEXT: INLINEASM + +--- +name: hazard-inlineasm + +body: | + bb.0: + FLAT_STORE_DWORDX4 %vgpr49_vgpr50, %vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit %exec, implicit %flat_scr + INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def %vgpr26_vgpr27, 2818058, def dead %sgpr14_sgpr15, 589833, %sgpr12, 327689, killed %vgpr51, 2621449, %vgpr46_vgpr47 + S_ENDPGM +... + + diff --git a/test/CodeGen/AMDGPU/hazard.mir b/test/CodeGen/AMDGPU/hazard.mir index 80afcbf197aa..d0caacdb9451 100644 --- a/test/CodeGen/AMDGPU/hazard.mir +++ b/test/CodeGen/AMDGPU/hazard.mir @@ -54,7 +54,7 @@ body: | liveins: %sgpr7, %vgpr4 %m0 = S_MOV_B32 killed %sgpr7 - INLINEASM $"; no-op", 1, 327690, def %vgpr5 + INLINEASM &"; no-op", 1, 327690, def %vgpr5 %vgpr0 = V_INTERP_P1_F32 killed %vgpr4, 0, 0, implicit %m0, implicit %exec SI_RETURN_TO_EPILOG killed %vgpr5, killed %vgpr0 ... diff --git a/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll b/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll index c07c5556ce38..76545b0377e7 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll @@ -3,7 +3,8 @@ ; CHECK: - Name: test_ro_arg ; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: - Name: in +; CHECK-NEXT: TypeName: 'float*' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -12,7 +13,8 @@ ; CHECK-NEXT: AccQual: ReadOnly ; CHECK-NEXT: IsConst: true ; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: - Name: out +; CHECK-NEXT: TypeName: 'float*' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer diff --git a/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll b/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll index c5121a7fd3b2..83baf5be57b1 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll @@ -11,7 +11,8 @@ ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: char +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char ; CHECK-NEXT: Size: 1 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue @@ -43,7 +44,8 @@ define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: char +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char ; CHECK-NEXT: Size: 1 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue diff --git a/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index ea47f83aef3e..06fc5eae0616 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -28,7 +28,8 @@ ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: char +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char ; CHECK-NEXT: Size: 1 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue @@ -64,7 +65,8 @@ define amdgpu_kernel void @test_char(i8 %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: ushort2 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: ushort2 ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -98,7 +100,8 @@ define amdgpu_kernel void @test_ushort2(<2 x i16> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int3 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int3 ; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue @@ -132,7 +135,8 @@ define amdgpu_kernel void @test_int3(<3 x i32> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: ulong4 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: ulong4 ; CHECK-NEXT: Size: 32 ; CHECK-NEXT: Align: 32 ; CHECK-NEXT: ValueKind: ByValue @@ -166,7 +170,8 @@ define amdgpu_kernel void @test_ulong4(<4 x i64> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: half8 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: half8 ; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue @@ -200,7 +205,8 @@ define amdgpu_kernel void @test_half8(<8 x half> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: float16 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: float16 ; CHECK-NEXT: Size: 64 ; CHECK-NEXT: Align: 64 ; CHECK-NEXT: ValueKind: ByValue @@ -234,7 +240,8 @@ define amdgpu_kernel void @test_float16(<16 x float> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: double16 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: double16 ; CHECK-NEXT: Size: 128 ; CHECK-NEXT: Align: 128 ; CHECK-NEXT: ValueKind: ByValue @@ -268,7 +275,8 @@ define amdgpu_kernel void @test_double16(<16 x double> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -303,7 +311,8 @@ define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: image2d_t +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: image2d_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image @@ -338,7 +347,8 @@ define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: sampler_t +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: sampler_t ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: Sampler @@ -372,7 +382,8 @@ define amdgpu_kernel void @test_sampler(i32 %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: queue_t +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: queue_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Queue @@ -407,7 +418,8 @@ define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: struct A +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: struct A ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -442,7 +454,8 @@ define amdgpu_kernel void @test_struct(%struct.A* byval %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: i128 +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: i128 ; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: ByValue @@ -476,19 +489,22 @@ define amdgpu_kernel void @test_i128(i128 %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: short2 +; CHECK-NEXT: - Name: b +; CHECK-NEXT: TypeName: short2 ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I16 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: char3 +; CHECK-NEXT: - Name: c +; CHECK-NEXT: TypeName: char3 ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -522,21 +538,24 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: g +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: c +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AddrSpaceQual: Constant ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: l +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -574,7 +593,8 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -582,7 +602,8 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: IsVolatile: true -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: b +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -591,7 +612,8 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, ; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: IsConst: true ; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: - Name: c +; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Pipe @@ -629,21 +651,24 @@ define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: image1d_t +; CHECK-NEXT: - Name: ro +; CHECK-NEXT: TypeName: image1d_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: AccQual: ReadOnly -; CHECK-NEXT: - TypeName: image2d_t +; CHECK-NEXT: - Name: wo +; CHECK-NEXT: TypeName: image2d_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: AccQual: WriteOnly -; CHECK-NEXT: - TypeName: image3d_t +; CHECK-NEXT: - Name: rw +; CHECK-NEXT: TypeName: image3d_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image @@ -682,7 +707,8 @@ define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: half ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -718,7 +744,8 @@ define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: float ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -754,7 +781,8 @@ define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: double ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -790,7 +818,8 @@ define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: char ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -826,7 +855,8 @@ define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: short ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -862,7 +892,8 @@ define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: long ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -898,7 +929,8 @@ define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: unknown ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -935,7 +967,8 @@ define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) ; CHECK-NEXT: ReqdWorkGroupSize: [ 1, 2, 4 ] ; CHECK-NEXT: VecTypeHint: int ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -973,7 +1006,8 @@ define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) ; CHECK-NEXT: WorkGroupSizeHint: [ 8, 16, 32 ] ; CHECK-NEXT: VecTypeHint: uint4 ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: int ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue @@ -1008,7 +1042,8 @@ define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'int **' +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'int **' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -1043,7 +1078,8 @@ define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: struct B +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: struct B ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -1078,7 +1114,8 @@ define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'global int* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'global int* __attribute__((ext_vector_type(2)))' ; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue @@ -1112,7 +1149,8 @@ define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: clk_event_t +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: clk_event_t ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer @@ -1148,14 +1186,16 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: 'long *' +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: 'long *' ; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I64 ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char *' +; CHECK-NEXT: - Name: b +; CHECK-NEXT: TypeName: 'char *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1163,7 +1203,8 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: PointeeAlign: 1 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char2 *' +; CHECK-NEXT: - Name: c +; CHECK-NEXT: TypeName: 'char2 *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1171,7 +1212,8 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: PointeeAlign: 2 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char3 *' +; CHECK-NEXT: - Name: d +; CHECK-NEXT: TypeName: 'char3 *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1179,7 +1221,8 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: PointeeAlign: 4 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char4 *' +; CHECK-NEXT: - Name: e +; CHECK-NEXT: TypeName: 'char4 *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1187,7 +1230,8 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: PointeeAlign: 4 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char8 *' +; CHECK-NEXT: - Name: f +; CHECK-NEXT: TypeName: 'char8 *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1195,7 +1239,8 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( ; CHECK-NEXT: PointeeAlign: 8 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - TypeName: 'char16 *' +; CHECK-NEXT: - Name: g +; CHECK-NEXT: TypeName: 'char16 *' ; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer @@ -1239,7 +1284,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, ; CHECK-NEXT: Attrs: ; CHECK-NEXT: RuntimeHandle: __test_block_invoke_kernel_runtime_handle ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: __block_literal +; CHECK-NEXT: - Name: arg +; CHECK-NEXT: TypeName: __block_literal ; CHECK-NEXT: Size: 25 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue @@ -1274,7 +1320,8 @@ define amdgpu_kernel void @__test_block_invoke_kernel( ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - TypeName: char +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char ; CHECK-NEXT: Size: 1 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue diff --git a/test/CodeGen/AMDGPU/hsa-metadata-images.ll b/test/CodeGen/AMDGPU/hsa-metadata-images.ll index 286f57399b71..f6290d41f690 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-images.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -22,40 +22,52 @@ ; CHECK: - Name: test ; CHECK: SymbolName: 'test@kd' ; CHECK: Args: -; CHECK: - TypeName: image1d_t +; CHECK: - Name: a +; CHECK: TypeName: image1d_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image1d_array_t +; CHECK: - Name: b +; CHECK: TypeName: image1d_array_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image1d_buffer_t +; CHECK: - Name: c +; CHECK: TypeName: image1d_buffer_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_t +; CHECK: - Name: d +; CHECK: TypeName: image2d_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_array_t +; CHECK: - Name: e +; CHECK: TypeName: image2d_array_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_array_depth_t +; CHECK: - Name: f +; CHECK: TypeName: image2d_array_depth_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_array_msaa_t +; CHECK: - Name: g +; CHECK: TypeName: image2d_array_msaa_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_array_msaa_depth_t +; CHECK: - Name: h +; CHECK: TypeName: image2d_array_msaa_depth_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_depth_t +; CHECK: - Name: i +; CHECK: TypeName: image2d_depth_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_msaa_t +; CHECK: - Name: j +; CHECK: TypeName: image2d_msaa_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image2d_msaa_depth_t +; CHECK: - Name: k +; CHECK: TypeName: image2d_msaa_depth_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: - TypeName: image3d_t +; CHECK: - Name: l +; CHECK: TypeName: image3d_t ; CHECK: Size: 8 ; CHECK: ValueKind: Image define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index 2d02b46e479d..f4a914adddbf 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,26 +1,26 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +@var = addrspace(1) global float 0.0 + ; CHECK: --- ; CHECK: Version: [ 1, 0 ] - ; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: SymbolName: 'test@kd' -; CHECK: CodeProps: -; CHECK: KernargSegmentSize: 24 -; CHECK: GroupSegmentFixedSize: 0 -; CHECK: PrivateSegmentFixedSize: 0 -; CHECK: KernargSegmentAlign: 8 -; CHECK: WavefrontSize: 64 -; GFX700: NumSGPRs: 6 -; GFX800: NumSGPRs: 96 -; GFX900: NumSGPRs: 6 -; GFX700: NumVGPRs: 4 -; GFX800: NumVGPRs: 6 -; GFX900: NumVGPRs: 6 -; CHECK: MaxFlatWorkGroupSize: 256 + +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' +; CHECK: CodeProps: +; CHECK: KernargSegmentSize: 24 +; CHECK: GroupSegmentFixedSize: 0 +; CHECK: PrivateSegmentFixedSize: 0 +; CHECK: KernargSegmentAlign: 8 +; CHECK: WavefrontSize: 64 +; CHECK: NumSGPRs: 6 +; GFX700: NumVGPRs: 4 +; GFX803: NumVGPRs: 6 +; GFX900: NumVGPRs: 6 +; CHECK: MaxFlatWorkGroupSize: 256 define amdgpu_kernel void @test( half addrspace(1)* %r, half addrspace(1)* %a, @@ -32,3 +32,111 @@ entry: store half %r.val, half addrspace(1)* %r ret void } + +; CHECK: - Name: num_spilled_sgprs +; CHECK: SymbolName: 'num_spilled_sgprs@kd' +; CHECK: CodeProps: +; CHECK: NumSpilledSGPRs: 41 +define amdgpu_kernel void @num_spilled_sgprs( + i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, + i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, + i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8, + i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, + i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute, + i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4, + i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb, + i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { +entry: + store i32 %in0, i32 addrspace(1)* %out0 + store i32 %in1, i32 addrspace(1)* %out1 + store i32 %in2, i32 addrspace(1)* %out2 + store i32 %in3, i32 addrspace(1)* %out3 + store i32 %in4, i32 addrspace(1)* %out4 + store i32 %in5, i32 addrspace(1)* %out5 + store i32 %in6, i32 addrspace(1)* %out6 + store i32 %in7, i32 addrspace(1)* %out7 + store i32 %in8, i32 addrspace(1)* %out8 + store i32 %in9, i32 addrspace(1)* %out9 + store i32 %ina, i32 addrspace(1)* %outa + store i32 %inb, i32 addrspace(1)* %outb + store i32 %inc, i32 addrspace(1)* %outc + store i32 %ind, i32 addrspace(1)* %outd + store i32 %ine, i32 addrspace(1)* %oute + store i32 %inf, i32 addrspace(1)* %outf + ret void +} + +; CHECK: - Name: num_spilled_vgprs +; CHECK: SymbolName: 'num_spilled_vgprs@kd' +; CHECK: CodeProps: +; CHECK: NumSpilledVGPRs: 14 +define amdgpu_kernel void @num_spilled_vgprs() #1 { + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + %val7 = load volatile float, float addrspace(1)* @var + %val8 = load volatile float, float addrspace(1)* @var + %val9 = load volatile float, float addrspace(1)* @var + %val10 = load volatile float, float addrspace(1)* @var + %val11 = load volatile float, float addrspace(1)* @var + %val12 = load volatile float, float addrspace(1)* @var + %val13 = load volatile float, float addrspace(1)* @var + %val14 = load volatile float, float addrspace(1)* @var + %val15 = load volatile float, float addrspace(1)* @var + %val16 = load volatile float, float addrspace(1)* @var + %val17 = load volatile float, float addrspace(1)* @var + %val18 = load volatile float, float addrspace(1)* @var + %val19 = load volatile float, float addrspace(1)* @var + %val20 = load volatile float, float addrspace(1)* @var + %val21 = load volatile float, float addrspace(1)* @var + %val22 = load volatile float, float addrspace(1)* @var + %val23 = load volatile float, float addrspace(1)* @var + %val24 = load volatile float, float addrspace(1)* @var + %val25 = load volatile float, float addrspace(1)* @var + %val26 = load volatile float, float addrspace(1)* @var + %val27 = load volatile float, float addrspace(1)* @var + %val28 = load volatile float, float addrspace(1)* @var + %val29 = load volatile float, float addrspace(1)* @var + %val30 = load volatile float, float addrspace(1)* @var + + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + store volatile float %val7, float addrspace(1)* @var + store volatile float %val8, float addrspace(1)* @var + store volatile float %val9, float addrspace(1)* @var + store volatile float %val10, float addrspace(1)* @var + store volatile float %val11, float addrspace(1)* @var + store volatile float %val12, float addrspace(1)* @var + store volatile float %val13, float addrspace(1)* @var + store volatile float %val14, float addrspace(1)* @var + store volatile float %val15, float addrspace(1)* @var + store volatile float %val16, float addrspace(1)* @var + store volatile float %val17, float addrspace(1)* @var + store volatile float %val18, float addrspace(1)* @var + store volatile float %val19, float addrspace(1)* @var + store volatile float %val20, float addrspace(1)* @var + store volatile float %val21, float addrspace(1)* @var + store volatile float %val22, float addrspace(1)* @var + store volatile float %val23, float addrspace(1)* @var + store volatile float %val24, float addrspace(1)* @var + store volatile float %val25, float addrspace(1)* @var + store volatile float %val26, float addrspace(1)* @var + store volatile float %val27, float addrspace(1)* @var + store volatile float %val28, float addrspace(1)* @var + store volatile float %val29, float addrspace(1)* @var + store volatile float %val30, float addrspace(1)* @var + + ret void +} + +attributes #0 = { "amdgpu-num-sgpr"="14" } +attributes #1 = { "amdgpu-num-vgpr"="20" } diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll index 8583c00caede..80d033d3c1b0 100644 --- a/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll @@ -16,7 +16,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK: ReservedNumVGPRs: 4 ; GFX700: ReservedFirstVGPR: 8 ; GFX800: ReservedFirstVGPR: 8 -; GFX900: ReservedFirstVGPR: 11 +; GFX900: ReservedFirstVGPR: 10 ; CHECK: PrivateSegmentBufferSGPR: 0 ; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll index 81d9ed2eba8c..bfe8737b4ff2 100644 --- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -4,6 +4,8 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx704 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=bonaire | FileCheck --check-prefix=HSA --check-prefix=HSA-CI704 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s @@ -17,12 +19,9 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx801 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx802 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s ; HSA: .hsa_code_object_version 2,1 ; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" @@ -31,13 +30,11 @@ ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" ; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" +; HSA-CI704: .hsa_code_object_isa 7,0,4,"AMD","AMDGPU" ; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" ; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" -; HSA-VI804: .hsa_code_object_isa 8,0,4,"AMD","AMDGPU" ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" -; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" ; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" -; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" diff --git a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll index f6bf0b09486e..37d05c7ac414 100644 --- a/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll +++ b/test/CodeGen/AMDGPU/i1-copy-implicit-def.ll @@ -3,7 +3,7 @@ ; SILowerI1Copies was not handling IMPLICIT_DEF ; SI-LABEL: {{^}}br_implicit_def: -; SI: BB#0: +; SI: %bb.0: ; SI-NEXT: s_cbranch_scc1 define amdgpu_kernel void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 { bb: diff --git a/test/CodeGen/AMDGPU/image-schedule.ll b/test/CodeGen/AMDGPU/image-schedule.ll new file mode 100644 index 000000000000..856ba04a7913 --- /dev/null +++ b/test/CodeGen/AMDGPU/image-schedule.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +target triple = "amdgcn--amdpal" + +; The first image store and the second image load use the same descriptor and +; the same coordinate. Check that they do not get swapped by the machine +; instruction scheduler. + +; GCN-LABEL: {{^}}_amdgpu_cs_main: +; GCN: image_load +; GCN: image_store +; GCN: image_load +; GCN: image_store + +define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <3 x i32> inreg %arg3, i32 inreg %arg4, <3 x i32> %arg5) local_unnamed_addr #0 { +.entry: + %tmp = call i64 @llvm.amdgcn.s.getpc() #1 + %tmp6 = bitcast i64 %tmp to <2 x i32> + %.0.vec.insert = insertelement <2 x i32> undef, i32 %arg2, i32 0 + %.4.vec.insert = shufflevector <2 x i32> %.0.vec.insert, <2 x i32> %tmp6, <2 x i32> + %tmp7 = bitcast <2 x i32> %.4.vec.insert to i64 + %tmp8 = inttoptr i64 %tmp7 to [4294967295 x i8] addrspace(2)* + %tmp9 = add <3 x i32> %arg3, %arg5 + %tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 32 + %tmp11 = bitcast i8 addrspace(2)* %tmp10 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0 + %tmp12 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp11, align 16 + %tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> + %tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + %tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(2)* + %tmp16 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16 + call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + %tmp17 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp15, align 16 + %tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + %tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(2)* %tmp8, i64 0, i64 64 + %tmp20 = bitcast i8 addrspace(2)* %tmp19 to <8 x i32> addrspace(2)*, !amdgpu.uniform !0 + %tmp21 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp20, align 16 + call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0 + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare i64 @llvm.amdgcn.s.getpc() #1 + +; Function Attrs: nounwind readonly +declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2 + +; Function Attrs: nounwind writeonly +declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind writeonly } + +!0 = !{} diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll index 431c41482004..1135ab03ca3c 100644 --- a/test/CodeGen/AMDGPU/immv216.ll +++ b/test/CodeGen/AMDGPU/immv216.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; FIXME: Merge into imm.ll diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll index b2873402da43..63384f5e4450 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -17,3 +17,48 @@ entry: ret void } +; Make sure we don't hit use of undefined register errors when expanding an +; extract with undef index. + +; CHECK-LABEL: {{^}}extract_adjacent_blocks: +; CHECK: s_load_dword [[ARG:s[0-9]+]] +; CHECK: s_cmp_lg_u32 +; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]] + +; CHECK: buffer_load_dwordx4 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 + +; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] + +; CHECK: [[BB4]]: +; CHECK: buffer_load_dwordx4 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 + +; CHECK: [[ENDBB]]: +; CHECK: buffer_store_dword +; CHECK: s_endpgm + +define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { +bb: + %tmp = icmp eq i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb4 + +bb1: + %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp3 = extractelement <4 x float> %tmp2, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out + br label %bb7 + +bb4: + %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp6 = extractelement <4 x float> %tmp5, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out + br label %bb7 + +bb7: + %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] + store volatile float %tmp8, float addrspace(1)* undef + ret void +} diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 906a1f113cc7..5218c7845861 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1,7 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL,PREGFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MOVREL,PREGFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,IDXMODE,PREGFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,IDXMODE,GFX9 %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. @@ -146,6 +146,7 @@ entry: } ; GCN-LABEL: {{^}}extract_undef_offset_sgpr: +; undefined behavior, but shouldn't crash compiler define amdgpu_kernel void @extract_undef_offset_sgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { entry: %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in @@ -155,9 +156,7 @@ entry: } ; GCN-LABEL: {{^}}insert_undef_offset_sgpr_vector_src: -; GCN-DAG: buffer_load_dwordx4 -; MOVREL-DAG: s_mov_b32 m0, -; MOVREL: v_movreld_b32 +; undefined behavior, but shouldn't crash compiler define amdgpu_kernel void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { entry: %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in @@ -469,83 +468,8 @@ bb2: ret void } -; GCN-LABEL: {{^}}extract_adjacent_blocks: -; GCN: s_load_dword [[ARG:s[0-9]+]] -; GCN: s_cmp_lg_u32 -; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] - -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0 -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] - -; GCN: [[BB4]]: -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: [[ENDBB]]: -; GCN: buffer_store_dword -; GCN: s_endpgm -define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { -bb: - %tmp = icmp eq i32 %arg, 0 - br i1 %tmp, label %bb1, label %bb4 - -bb1: - %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp3 = extractelement <4 x float> %tmp2, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out - br label %bb7 - -bb4: - %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp6 = extractelement <4 x float> %tmp5, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out - br label %bb7 - -bb7: - %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] - store volatile float %tmp8, float addrspace(1)* undef - ret void -} ; GCN-LABEL: {{^}}insert_adjacent_blocks: -; GCN: s_load_dword [[ARG:s[0-9]+]] -; GCN: s_cmp_lg_u32 -; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] - -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movreld_b32_e32 - -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] - -; GCN: [[BB4]]: -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movreld_b32_e32 - -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: [[ENDBB]]: -; GCN: buffer_store_dword -; GCN: s_endpgm define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) #0 { bb: %tmp = icmp eq i32 %arg, 0 @@ -603,7 +527,8 @@ bb7: ; preds = %bb4, %bb1 ; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0 ; IDXMODE: s_set_gpr_idx_off -; GCN: s_mov_b32 m0, -1 +; PREGFX9: s_mov_b32 m0, -1 +; GFX9-NOT: s_mov_b32 m0 ; GCN: ds_write_b32 ; GCN: ds_write_b32 ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/inlineasm-packed.ll b/test/CodeGen/AMDGPU/inlineasm-packed.ll index 3c6c7e1d1b42..027435421095 100644 --- a/test/CodeGen/AMDGPU/inlineasm-packed.ll +++ b/test/CodeGen/AMDGPU/inlineasm-packed.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}inline_asm_input_v2i16: ; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll index 463e7ba53ddd..79e1943f8fb7 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index 16d9070849b9..698f2c3ebc47 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -548,7 +548,7 @@ body: | %flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc %flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc - DBG_VALUE _, 2, !5, !11, debug-location !12 + DBG_VALUE %noreg, 2, !5, !11, debug-location !12 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5 %sgpr8 = S_MOV_B32 %sgpr5 diff --git a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir index 67642282f75b..61aa39fcc25d 100644 --- a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -26,7 +26,7 @@ ... --- # CHECK-LABEL: name: invert_br_undef_vcc -# CHECK: S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc +# CHECK: S_CBRANCH_VCCZ %bb.1, implicit undef %vcc name: invert_br_undef_vcc alignment: 0 @@ -58,7 +58,7 @@ body: | %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) %sgpr7 = S_MOV_B32 61440 %sgpr6 = S_MOV_B32 -1 - S_CBRANCH_VCCNZ %bb.2.if, implicit undef %vcc + S_CBRANCH_VCCNZ %bb.2, implicit undef %vcc bb.1.else: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 @@ -66,7 +66,7 @@ body: | %vgpr0 = V_MOV_B32_e32 100, implicit %exec BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) %vgpr0 = V_MOV_B32_e32 1, implicit %exec - S_BRANCH %bb.3.done + S_BRANCH %bb.3 bb.2.if: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 diff --git a/test/CodeGen/AMDGPU/ipra.ll b/test/CodeGen/AMDGPU/ipra.ll index 9615ddd07cdb..276de14612d7 100644 --- a/test/CodeGen/AMDGPU/ipra.ll +++ b/test/CodeGen/AMDGPU/ipra.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s ; Kernels are not called, so there is no call preserved mask. ; GCN-LABEL: {{^}}kernel: diff --git a/test/CodeGen/AMDGPU/lds-alignment.ll b/test/CodeGen/AMDGPU/lds-alignment.ll index c23dea2b6b76..84c8d9b778c5 100644 --- a/test/CodeGen/AMDGPU/lds-alignment.ll +++ b/test/CodeGen/AMDGPU/lds-alignment.ll @@ -9,16 +9,16 @@ @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef -declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #0 ; HSA-LABEL: {{^}}test_no_round_size_1: ; HSA: workgroup_group_segment_byte_size = 38 define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -36,12 +36,12 @@ define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrsp ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) %lds.align16.1.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.1.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.1.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.1.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.1.bc, i32 38, i1 false) ret void } @@ -52,12 +52,12 @@ define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -67,11 +67,11 @@ define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 a ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -79,8 +79,8 @@ define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -89,8 +89,8 @@ define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspac ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* align 64 %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 64, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 64, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 64 %lds.arg, i8 addrspace(1)* align 64 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 64 %out, i8 addrspace(3)* align 64 %lds.arg, i32 38, i1 false) ret void } @@ -100,12 +100,12 @@ define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 add ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) ret void } @@ -116,12 +116,12 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) ret void } @@ -144,16 +144,16 @@ define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -165,16 +165,16 @@ define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -186,16 +186,16 @@ define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -207,16 +207,16 @@ define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } @@ -228,16 +228,16 @@ define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -249,16 +249,16 @@ define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 ad ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } diff --git a/test/CodeGen/AMDGPU/lds-output-queue.ll b/test/CodeGen/AMDGPU/lds-output-queue.ll index 8b7e9e6d6aa8..f8fb12eefa62 100644 --- a/test/CodeGen/AMDGPU/lds-output-queue.ll +++ b/test/CodeGen/AMDGPU/lds-output-queue.ll @@ -45,21 +45,21 @@ declare void @llvm.r600.group.barrier() nounwind convergent ; %2 = load i32, i32 addrspace(1)* %in ; ; The instruction selection phase will generate ISA that looks like this: -; %OQAP = LDS_READ_RET -; %vreg0 = MOV %OQAP -; %vreg1 = VTX_READ_32 -; %vreg2 = ADD_INT %vreg1, %vreg0 +; %oqap = LDS_READ_RET +; %0 = MOV %oqap +; %1 = VTX_READ_32 +; %2 = ADD_INT %1, %0 ; ; The bottom scheduler will schedule the two ALU instructions first: ; ; UNSCHEDULED: -; %OQAP = LDS_READ_RET -; %vreg1 = VTX_READ_32 +; %oqap = LDS_READ_RET +; %1 = VTX_READ_32 ; ; SCHEDULED: ; -; vreg0 = MOV %OQAP -; vreg2 = ADD_INT %vreg1, %vreg2 +; %0 = MOV %oqap +; %2 = ADD_INT %1, %2 ; ; The lack of proper aliasing results in the local memory read (LDS_READ_RET) ; to consider the global memory read (VTX_READ_32) has a chain dependency, so @@ -67,14 +67,14 @@ declare void @llvm.r600.group.barrier() nounwind convergent ; final program which looks like this: ; ; Alu clause: -; %OQAP = LDS_READ_RET +; %oqap = LDS_READ_RET ; VTX clause: -; %vreg1 = VTX_READ_32 +; %1 = VTX_READ_32 ; Alu clause: -; vreg0 = MOV %OQAP -; vreg2 = ADD_INT %vreg1, %vreg2 +; %0 = MOV %oqap +; %2 = ADD_INT %1, %2 ; -; This is an illegal program because the OQAP def and use know occur in +; This is an illegal program because the oqap def and use know occur in ; different ALU clauses. ; ; This test checks this scenario and makes sure it doesn't result in an diff --git a/test/CodeGen/AMDGPU/lds_atomic_f32.ll b/test/CodeGen/AMDGPU/lds_atomic_f32.ll new file mode 100644 index 000000000000..18aebe12e7f4 --- /dev/null +++ b/test/CodeGen/AMDGPU/lds_atomic_f32.ll @@ -0,0 +1,69 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s + +declare float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* nocapture, float, i32, i32, i1) +declare float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* nocapture, float, i32, i32, i1) +declare float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* nocapture, float, i32, i32, i1) + +; GCN-LABEL: {{^}}lds_atomic_fadd_f32: +; VI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 +; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 +; GCN: ds_add_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 +; GCN: ds_add_f32 [[V3:v[0-9]+]], [[V0]] offset:64 +; GCN: s_waitcnt lgkmcnt(1) +; GCN: ds_add_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] +define amdgpu_kernel void @lds_atomic_fadd_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { + %idx.add = add nuw i32 %idx, 4 + %shl0 = shl i32 %idx.add, 3 + %shl1 = shl i32 %idx.add, 4 + %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* + %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* + %a1 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) + %a2 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) + %a3 = call float @llvm.amdgcn.atomic.fadd.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) + store float %a3, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_fmin_f32: +; VI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 +; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 +; GCN: ds_min_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 +; GCN: ds_min_f32 [[V3:v[0-9]+]], [[V0]] offset:64 +; GCN: s_waitcnt lgkmcnt(1) +; GCN: ds_min_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] +define amdgpu_kernel void @lds_atomic_fmin_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { + %idx.add = add nuw i32 %idx, 4 + %shl0 = shl i32 %idx.add, 3 + %shl1 = shl i32 %idx.add, 4 + %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* + %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* + %a1 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) + %a2 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) + %a3 = call float @llvm.amdgcn.atomic.fmin.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) + store float %a3, float addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}lds_atomic_fmax_f32: +; VI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 +; GCN-DAG: v_mov_b32_e32 [[V0:v[0-9]+]], 0x42280000 +; GCN: ds_max_rtn_f32 [[V2:v[0-9]+]], [[V1:v[0-9]+]], [[V0]] offset:32 +; GCN: ds_max_f32 [[V3:v[0-9]+]], [[V0]] offset:64 +; GCN: s_waitcnt lgkmcnt(1) +; GCN: ds_max_rtn_f32 {{v[0-9]+}}, {{v[0-9]+}}, [[V2]] +define amdgpu_kernel void @lds_atomic_fmax_f32(float addrspace(1)* %out, float addrspace(3)* %ptrf, i32 %idx) { + %idx.add = add nuw i32 %idx, 4 + %shl0 = shl i32 %idx.add, 3 + %shl1 = shl i32 %idx.add, 4 + %ptr0 = inttoptr i32 %shl0 to float addrspace(3)* + %ptr1 = inttoptr i32 %shl1 to float addrspace(3)* + %a1 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptr0, float 4.2e+1, i32 0, i32 0, i1 false) + %a2 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptr1, float 4.2e+1, i32 0, i32 0, i1 false) + %a3 = call float @llvm.amdgcn.atomic.fmax.f32(float addrspace(3)* %ptrf, float %a1, i32 0, i32 0, i1 false) + store float %a3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/liveness.mir b/test/CodeGen/AMDGPU/liveness.mir index 6fd8466492d0..8bb946da9ad2 100644 --- a/test/CodeGen/AMDGPU/liveness.mir +++ b/test/CodeGen/AMDGPU/liveness.mir @@ -6,7 +6,7 @@ # liveranges needed it. # # Should see three distinct value numbers: -# CHECK: %vreg0 [{{.*}}:0)[{{.*}}:1)[{{.*}}:2) 0@{{[0-9]+[Berd]}} 1@{{[0-9]+[Berd]}} 2@{{[0-9]+B-phi}} +# CHECK: %0 [{{.*}}:0)[{{.*}}:1)[{{.*}}:2) 0@{{[0-9]+[Berd]}} 1@{{[0-9]+[Berd]}} 2@{{[0-9]+B-phi}} --- | define amdgpu_kernel void @test0() { ret void } ... diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll index 534824d8c113..80a08acfc98e 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -14,6 +14,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Make sure no crash on invalid non-constant ; GCN-LABEL: {{^}}invalid_variable_order_lds_atomic_dec_ret_i32: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %order.var) #0 { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 %order.var, i32 0, i1 false) store i32 %result, i32 addrspace(1)* %out @@ -22,6 +24,8 @@ define amdgpu_kernel void @invalid_variable_order_lds_atomic_dec_ret_i32(i32 add ; Make sure no crash on invalid non-constant ; GCN-LABEL: {{^}}invalid_variable_scope_lds_atomic_dec_ret_i32: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @invalid_variable_scope_lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %scope.var) #0 { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 %scope.var, i1 false) store i32 %result, i32 addrspace(1)* %out @@ -37,7 +41,10 @@ define amdgpu_kernel void @invalid_variable_volatile_lds_atomic_dec_ret_i32(i32 } ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) @@ -46,7 +53,10 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad } ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32_offset: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 @@ -56,9 +66,12 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_dec_u32 [[VPTR]], [[DATA]] define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) @@ -66,7 +79,10 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i32_offset: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16 define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 @@ -277,7 +293,10 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace @lds0 = addrspace(3) global [512 x i32] undef ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0: -; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -290,6 +309,9 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 } ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} @@ -300,6 +322,9 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32 @@ -311,6 +336,9 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} @@ -320,6 +348,9 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_dec_noret_i64_offset: +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_u64 v{{[0-9]+}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} offset:32{{$}} @@ -406,7 +437,10 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa @lds1 = addrspace(3) global [512 x i64] undef, align 8 ; GCN-LABEL: {{^}}atomic_dec_shl_base_lds_0_i64: -; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 726c3e2f4aae..75ce7f54ae39 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -13,7 +13,10 @@ declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) @@ -22,7 +25,10 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad } ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 @@ -32,9 +38,12 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_inc_u32 [[VPTR]], [[DATA]] define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) @@ -42,7 +51,10 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_inc_noret_i32_offset: -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; CIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16 define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 @@ -395,6 +407,19 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace ret void } +; GCN-LABEL: {{^}}nocse_lds_atomic_inc_ret_i32: +; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] +; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] +define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { + %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) + + store i32 %result0, i32 addrspace(1)* %out0 + store i32 %result1, i32 addrspace(1)* %out1 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { nounwind argmemonly } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll new file mode 100644 index 000000000000..43776728d5c1 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s + +; GCN-LABEL: {{^}}buffer_load_format_d16_x: +; GCN: buffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 +define amdgpu_ps half @buffer_load_format_d16_x(<4 x i32> inreg %rsrc) { +main_body: + %data = call half @llvm.amdgcn.buffer.load.format.f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) + ret half %data +} + +; GCN-LABEL: {{^}}buffer_load_format_d16_xy: +; UNPACKED: buffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] +define amdgpu_ps half @buffer_load_format_d16_xy(<4 x i32> inreg %rsrc) { +main_body: + %data = call <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) + %elt = extractelement <2 x half> %data, i32 1 + ret half %elt +} + +; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw: +; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] +define amdgpu_ps half @buffer_load_format_d16_xyzw(<4 x i32> inreg %rsrc) { +main_body: + %data = call <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) + %elt = extractelement <4 x half> %data, i32 3 + ret half %elt +} + +declare half @llvm.amdgcn.buffer.load.format.f16(<4 x i32>, i32, i32, i1, i1) +declare <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32>, i32, i32, i1, i1) +declare <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32>, i32, i32, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll index 4f8c61912248..49ca7d405724 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -127,7 +127,7 @@ entry: } ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged: -;CHECK-NEXT: BB# +;CHECK-NEXT: %bb. ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 ;CHECK: s_waitcnt @@ -151,7 +151,7 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_glc_slc: -;CHECK-NEXT: BB# +;CHECK-NEXT: %bb. ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}} ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}} ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}} @@ -176,7 +176,7 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged: -;CHECK-NEXT: BB# +;CHECK-NEXT: %bb. ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 ;CHECK: s_waitcnt define amdgpu_ps void @buffer_load_x2_offen_merged(<4 x i32> inreg %rsrc, i32 %a) { @@ -194,7 +194,7 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_x1_offset_merged: -;CHECK-NEXT: BB# +;CHECK-NEXT: %bb. ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4 ;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28 ;CHECK: s_waitcnt @@ -212,7 +212,7 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_x2_offset_merged: -;CHECK-NEXT: BB# +;CHECK-NEXT: %bb. ;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4 ;CHECK: s_waitcnt define amdgpu_ps void @buffer_load_x2_offset_merged(<4 x i32> inreg %rsrc) { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll new file mode 100644 index 000000000000..bcaa600a483b --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s + +; GCN-LABEL: {{^}}buffer_store_format_d16_x: +; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}} +; GCN: buffer_store_format_d16_x v[[LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +define amdgpu_kernel void @buffer_store_format_d16_x(<4 x i32> %rsrc, half %data, i32 %index) { +main_body: + call void @llvm.amdgcn.buffer.store.format.f16(half %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0) + ret void +} + +; GCN-LABEL: {{^}}buffer_store_format_d16_xy: + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: buffer_store_format_d16_xy v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen + +; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %index) { +main_body: + call void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0) + ret void +} + +; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw: + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen + +; GFX81: v_or_b32_e32 v[[HI:[0-9]+]] +; GFX81: v_or_b32_e32 v[[LO:[0-9]+]] + +; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]] +; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]] + +; PACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen +define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %index) { +main_body: + call void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0) + ret void +} + +declare void @llvm.amdgcn.buffer.store.format.f16(half, <4 x i32>, i32, i32, i1, i1) +declare void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i1, i1) +declare void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll index 10bea8ea63b0..69de9555035b 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll @@ -4,7 +4,7 @@ declare void @llvm.amdgcn.buffer.wbinvl1() #0 ; GCN-LABEL: {{^}}test_buffer_wbinvl1: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00] ; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00] ; GCN-NEXT: s_endpgm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll index fe60d16d90f7..d1c8f37b3d85 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll @@ -3,7 +3,7 @@ declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0 ; SI-LABEL: {{^}}test_buffer_wbinvl1_sc: -; SI-NEXT: ; BB#0: +; SI-NEXT: ; %bb.0: ; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] ; SI-NEXT: s_endpgm define amdgpu_kernel void @test_buffer_wbinvl1_sc() #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll index 061c1469ed4d..4dc938c9b0a2 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll @@ -4,7 +4,7 @@ declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0 ; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00] ; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll index 7b1cfa18721d..0aa64e2290dc 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX89 -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}s_cvt_pkrtz_v2f16_f32: ; GCN-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x{{b|2c}} @@ -25,7 +25,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1) ; FIXME: Folds to 0 on gfx9 ; GCN-LABEL: {{^}}s_cvt_pkrtz_undef_undef: -; GCN-NEXT: ; BB#0 +; GCN-NEXT: ; %bb.0 ; SI-NEXT: s_endpgm ; VI-NEXT: s_endpgm ; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll index b972ddb8cb77..f08f896fe384 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll index a4ae37b23c5f..91d1857f306b 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_fmed3_f16: ; GCN: v_med3_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll new file mode 100644 index 000000000000..71fc76952c24 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.ll @@ -0,0 +1,125 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s + +; GCN-LABEL: {{^}}image_load_f16 +; GCN: image_load v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 +define amdgpu_ps half @image_load_f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + %tex = call half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) + ret half %tex +} + +; GCN-LABEL: {{^}}image_load_v2f16: +; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: image_load v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] +define amdgpu_ps half @image_load_v2f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + %tex = call <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) + %elt = extractelement <2 x half> %tex, i32 1 + ret half %elt +} + +; GCN-LABEL: {{^}}image_load_v4f16: +; UNPACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: image_load v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] +define amdgpu_ps half @image_load_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + %elt = extractelement <4 x half> %tex, i32 3 + ret half %elt +} + +; GCN-LABEL: {{^}}image_load_mip_v4f16: +; UNPACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: image_load_mip v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] +define amdgpu_ps half @image_load_mip_v4f16(<4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + %elt = extractelement <4 x half> %tex, i32 3 + ret half %elt +} + +; GCN-LABEL: {{^}}image_store_f16 +; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}} +; GCN: image_store v[[LO]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 unorm d16 +define amdgpu_kernel void @image_store_f16(half %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + call void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 1, i1 false, i1 false, i1 false, i1 false) + ret void +} + +; GCN-LABEL: {{^}}image_store_v2f16 + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 + +; PACKED: image_store v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 unorm d16 +define amdgpu_kernel void @image_store_v2f16(<2 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + call void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 3, i1 false, i1 false, i1 false, i1 false) + ret void +} + +; GCN-LABEL: {{^}}image_store_v4f16 + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 + +; GFX81: v_or_b32_e32 v[[HI:[0-9]+]] +; GFX81: v_or_b32_e32 v[[LO:[0-9]+]] + +; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]] +; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]] + +; PACKED: image_store v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +define amdgpu_kernel void @image_store_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + call void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + ret void +} + +; GCN-LABEL: {{^}}image_store_mip_v4f16 + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: image_store_mip v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 + +; GFX81: v_or_b32_e32 v[[HI:[0-9]+]] +; GFX81: v_or_b32_e32 v[[LO:[0-9]+]] + +; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]] +; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]] + +; PACKED: image_store_mip v{{\[}}[[LO]]:[[HI]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf unorm d16 +define amdgpu_kernel void @image_store_mip_v4f16(<4 x half> %data, <4 x i32> %coords, <8 x i32> inreg %rsrc) { +main_body: + call void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 false, i1 false, i1 false, i1 false) + ret void +} + + +declare half @llvm.amdgcn.image.load.f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare <2 x half> @llvm.amdgcn.image.load.v2f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.load.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.load.mip.v4f16.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) + +declare void @llvm.amdgcn.image.store.f16.v4i32.v8i32(half, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare void @llvm.amdgcn.image.store.v2f16.v4i32.v8i32(<2 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare void @llvm.amdgcn.image.store.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) +declare void @llvm.amdgcn.image.store.mip.v4f16.v4i32.v8i32(<4 x half>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll new file mode 100644 index 000000000000..f0451c21fe84 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.ll @@ -0,0 +1,137 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s + + +; GCN-LABEL: {{^}}image_gather4_f16: +; GCN: image_gather4 v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16 + +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) + store half %tex, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_v2f16: +; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4 v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 + +; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]] +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off +define amdgpu_kernel void @image_gather4_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <2 x half> %tex, i32 1 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_v4f16: +; UNPACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4 v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_cl_v4f16: +; UNPACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_c_v4f16: +; UNPACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_o_v4f16: +; UNPACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_gather4_c_o_v4f16: +; UNPACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_gather4_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_gather4_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +declare half @llvm.amdgcn.image.gather4.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <2 x half> @llvm.amdgcn.image.gather4.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.gather4.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) + + +declare <4 x half> @llvm.amdgcn.image.gather4.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.gather4.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.gather4.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.gather4.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll index 2e78e2a4c6f5..dfe4aff7bc1c 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll @@ -3,6 +3,8 @@ ; GCN-LABEL: {{^}}getlod: ; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da +; GCN: s_waitcnt vmcnt(0) +; GCN: store_dwordx4 define amdgpu_kernel void @getlod(<4 x float> addrspace(1)* %out) { main_body: %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) @@ -12,6 +14,8 @@ main_body: ; GCN-LABEL: {{^}}getlod_v2: ; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da +; GCN: s_waitcnt vmcnt(0) +; GCN: store_dwordx4 define amdgpu_kernel void @getlod_v2(<4 x float> addrspace(1)* %out) { main_body: %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) @@ -21,6 +25,8 @@ main_body: ; GCN-LABEL: {{^}}getlod_v4: ; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da +; GCN: s_waitcnt vmcnt(0) +; GCN: store_dwordx4 define amdgpu_kernel void @getlod_v4(<4 x float> addrspace(1)* %out) { main_body: %r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index 42c870567463..d9be4a4d0191 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -129,6 +129,8 @@ main_body: ; GCN-LABEL: {{^}}getresinfo: ; GCN-NOT: s_waitcnt ; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf +; GCN: s_waitcnt vmcnt(0) +; GCN: exp define amdgpu_ps void @getresinfo() #0 { main_body: %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false) @@ -140,6 +142,19 @@ main_body: ret void } +; GCN-LABEL: {{^}}getresinfo_dmask0: +; GCN-NOT: image_get_resinfo +define amdgpu_ps void @getresinfo_dmask0() #0 { +main_body: + %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false) + %r0 = extractelement <4 x float> %r, i32 0 + %r1 = extractelement <4 x float> %r, i32 1 + %r2 = extractelement <4 x float> %r, i32 2 + %r3 = extractelement <4 x float> %r, i32 3 + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0 + ret void +} + ; Ideally, the register allocator would avoid the wait here ; ; GCN-LABEL: {{^}}image_store_wait: @@ -186,9 +201,10 @@ declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 -declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1 +declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll new file mode 100644 index 000000000000..b5f8da64628f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.ll @@ -0,0 +1,135 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s + + +; GCN-LABEL: {{^}}image_sample_f16: +; GCN: image_sample v[[HALF:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 d16 + +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 1, i1 0, i1 0, i1 0, i1 0, i1 0) + store half %tex, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_v2f16: +; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample v[[DATA:[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x3 d16 + +; GFX81: v_lshrrev_b32_e32 v[[HI:[0-9]+]], 16, v[[DATA]] +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; GFX9: global_store_short_d16_hi v[{{[0-9]+:[0-9]+}}], v[[DATA]], off +define amdgpu_kernel void @image_sample_v2f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 3, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <2 x half> %tex, i32 1 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_v4f16: +; UNPACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_cl_v4f16: +; UNPACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample_cl v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_cl_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_c_v4f16: +; UNPACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample_c v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] + +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_c_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_o_v4f16: +; UNPACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}image_sample_c_o_v4f16: +; UNPACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; UNPACKED: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HI]] + +; PACKED: image_sample_c_o v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0xf d16 +; PACKED: v_lshrrev_b32_e32 v[[HALF:[0-9]+]], 16, v[[HI]] + +; GFX81: flat_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]] +; GFX9: global_store_short v[{{[0-9]+:[0-9]+}}], v[[HALF]], off +define amdgpu_kernel void @image_sample_c_o_v4f16(<4 x float> %coords, <8 x i32> inreg %rsrc, <4 x i32> inreg %sample, half addrspace(1)* %out) { +main_body: + %tex = call <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float> %coords, <8 x i32> %rsrc, <4 x i32> %sample, i32 15, i1 0, i1 0, i1 0, i1 0, i1 0) + %elt = extractelement <4 x half> %tex, i32 3 + store half %elt, half addrspace(1)* %out + ret void +} + +declare half @llvm.amdgcn.image.sample.f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <2 x half> @llvm.amdgcn.image.sample.v2f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.sample.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) + + +declare <4 x half> @llvm.amdgcn.image.sample.cl.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.sample.c.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.sample.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) +declare <4 x half> @llvm.amdgcn.image.sample.c.o.v4f16.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll index a379f86e200e..5c4dafa38f53 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -51,7 +51,7 @@ main_body: ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 +; GCN: v_add_u32_e32 v0, s0, v0 define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) { main_body: call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) @@ -65,7 +65,7 @@ main_body: ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 +; GCN: v_add_u32_e32 v0, s0, v0 define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) { main_body: %s = add i32 %a, %count diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll index a1ecb7f750c7..d6b0628956a0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -31,8 +31,8 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) { } ; SI-LABEL: {{^}}true: -; SI-NEXT: BB# -; SI-NEXT: BB# +; SI-NEXT: %bb. +; SI-NEXT: %bb. ; SI-NEXT: s_endpgm define amdgpu_gs void @true() { call void @llvm.amdgcn.kill(i1 true) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll index 224b2ed72e3b..b7fb96a2d1a5 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -5,7 +5,7 @@ declare void @llvm.amdgcn.s.dcache.inv() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 ; GCN-LABEL: {{^}}test_s_dcache_inv: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7] ; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00] ; GCN-NEXT: s_endpgm @@ -15,7 +15,7 @@ define amdgpu_kernel void @test_s_dcache_inv() #0 { } ; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; GCN: s_dcache_inv ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll index f96d5db5794a..e8a363adde73 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -5,7 +5,7 @@ declare void @llvm.amdgcn.s.dcache.inv.vol() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 ; GCN-LABEL: {{^}}test_s_dcache_inv_vol: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7] ; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00] ; GCN-NEXT: s_endpgm @@ -15,7 +15,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 { } ; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait: -; GCN-NEXT: ; BB#0: +; GCN-NEXT: ; %bb.0: ; GCN-NEXT: s_dcache_inv_vol ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll index 99b651350439..254a0fae3c3b 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -4,7 +4,7 @@ declare void @llvm.amdgcn.s.dcache.wb() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 ; VI-LABEL: {{^}}test_s_dcache_wb: -; VI-NEXT: ; BB#0: +; VI-NEXT: ; %bb.0: ; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00] ; VI-NEXT: s_endpgm define amdgpu_kernel void @test_s_dcache_wb() #0 { @@ -13,7 +13,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 { } ; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait: -; VI-NEXT: ; BB#0: +; VI-NEXT: ; %bb.0: ; VI-NEXT: s_dcache_wb ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll index 844fcecdb48b..929cd1c5f0bb 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -4,7 +4,7 @@ declare void @llvm.amdgcn.s.dcache.wb.vol() #0 declare void @llvm.amdgcn.s.waitcnt(i32) #0 ; VI-LABEL: {{^}}test_s_dcache_wb_vol: -; VI-NEXT: ; BB#0: +; VI-NEXT: ; %bb.0: ; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00] ; VI-NEXT: s_endpgm define amdgpu_kernel void @test_s_dcache_wb_vol() #0 { @@ -13,7 +13,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 { } ; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait: -; VI-NEXT: ; BB#0: +; VI-NEXT: ; %bb.0: ; VI-NEXT: s_dcache_wb_vol ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll index f6c2cb44c993..61c287a896fe 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: @@ -20,6 +20,7 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> ; CHECK-LABEL: {{^}}test2: ; CHECK-NOT: s_waitcnt ; CHECK: image_load +; CHECK-NEXT: v_lshlrev_b32 ; CHECK-NEXT: s_waitcnt ; CHECK: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: image_store diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll new file mode 100644 index 000000000000..96d698ee51cd --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s + +; GCN-LABEL: {{^}}tbuffer_load_d16_x: +; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) { +main_body: + %data = call half @llvm.amdgcn.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0) + ret half %data +} + +; GCN-LABEL: {{^}}tbuffer_load_d16_xy: +; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] +define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) { +main_body: + %data = call <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0) + %elt = extractelement <2 x half> %data, i32 1 + ret half %elt +} + +; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: +; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] + +; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 +; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] +define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) { +main_body: + %data = call <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0) + %elt = extractelement <4 x half> %data, i32 3 + ret half %elt +} + +declare half @llvm.amdgcn.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll new file mode 100644 index 000000000000..6ccdc2d7f2cf --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s + + +; GCN-LABEL: {{^}}tbuffer_store_d16_x: +; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}} +; GCN: tbuffer_store_format_d16_x v[[LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) { +main_body: + call void @llvm.amdgcn.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0) + ret void +} + + +; GCN-LABEL: {{^}}tbuffer_store_d16_xy: + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen + +; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) { +main_body: + call void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0) + ret void +} + + +; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw: + +; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc +; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen + +; GFX81: v_or_b32_e32 v[[HI:[0-9]+]] +; GFX81: v_or_b32_e32 v[[LO:[0-9]+]] + +; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]] +; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]] + +; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen +define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) { +main_body: + call void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0) + ret void +} + +declare void @llvm.amdgcn.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) +declare void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll index c4a76de5989c..ace859c95752 100644 --- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -1,22 +1,37 @@ -; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -mattr=-flat-for-global < %s | FileCheck %s +; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOOPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,OPT %s -; CHECK-LABEL: {{^}}test_debug_value: -; CHECK: s_load_dwordx2 s[4:5] +; GCN-LABEL: {{^}}test_debug_value: +; NOOPT: s_load_dwordx2 s[4:5] ; FIXME: Why is the SGPR4_SGPR5 reference being removed from DBG_VALUE? -; CHECK: ; kill: %SGPR4_SGPR5 %SGPR4_SGPR5 -; CHECK-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef +; NOOPT: ; kill: def %sgpr8_sgpr9 killed %sgpr4_sgpr5 +; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef -; CHECK: buffer_store_dword -; CHECK: s_endpgm +; GCN: flat_store_dword +; GCN: s_endpgm define amdgpu_kernel void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 { entry: - tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14 + tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, metadata !10, metadata !13), !dbg !14 store i32 123, i32 addrspace(1)* %globalptr_arg, align 4 ret void } -declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 +; Check for infinite loop in some cases with dbg_value in +; SIOptimizeExecMaskingPreRA (somehow related to undef argument). + +; GCN-LABEL: {{^}}only_undef_dbg_value: +; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_constu 1, DW_OP_swap, DW_OP_xderef] undef +; NOOPT-NEXT: s_endpgm + +; OPT: s_endpgm +define amdgpu_kernel void @only_undef_dbg_value() #1 { +bb: + call void @llvm.dbg.value(metadata <4 x float> undef, metadata !10, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !14 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll index 4068c020e705..77eb4900ea52 100644 --- a/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: @@ -83,7 +83,7 @@ declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind ret void } @@ -128,7 +128,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind ret void } @@ -147,7 +147,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind ret void } @@ -164,7 +164,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind ret void } @@ -241,7 +241,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind ret void } @@ -284,7 +284,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind ret void } @@ -297,7 +297,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind ret void } @@ -310,7 +310,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind ret void } @@ -323,7 +323,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 add define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind ret void } @@ -342,7 +342,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 ad ; SI-DAG: buffer_store_dwordx4 define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false) ret void } @@ -367,6 +367,6 @@ define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noal ; SI: buffer_store_byte define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false) ret void } diff --git a/test/CodeGen/AMDGPU/load-hi16.ll b/test/CodeGen/AMDGPU/load-hi16.ll index e972dac84ea5..8039ec372e45 100644 --- a/test/CodeGen/AMDGPU/load-hi16.ll +++ b/test/CodeGen/AMDGPU/load-hi16.ll @@ -69,7 +69,6 @@ entry: ; FIXME: Remove m0 initialization ; GCN-LABEL: {{^}}load_local_hi_v2i16_zerolo_shift: ; GCN: s_waitcnt -; GFX9-NEXT: s_mov_b32 m0, -1 ; GFX9-NEXT: ds_read_u16 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -563,7 +562,6 @@ entry: ; FIXME: Is there a cost to using the extload over not? ; GCN-LABEL: {{^}}load_local_v2i16_split: ; GCN: s_waitcnt -; GFX9-NEXT: s_mov_b32 m0, -1 ; GFX9-NEXT: ds_read_u16 v1, v0 ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: ds_read_u16_d16_hi v1, v0 offset:2 diff --git a/test/CodeGen/AMDGPU/load-local-f32.ll b/test/CodeGen/AMDGPU/load-local-f32.ll index 09d7145424de..f035d22018a7 100644 --- a/test/CodeGen/AMDGPU/load-local-f32.ll +++ b/test/CodeGen/AMDGPU/load-local-f32.ll @@ -1,9 +1,10 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}load_f32_local: -; GCN: s_mov_b32 m0 +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -15,7 +16,9 @@ entry: } ; FUNC-LABEL: {{^}}load_v2f32_local: -; GCN: s_mov_b32 m0 +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_b64 ; EG: LDS_READ_RET @@ -29,6 +32,9 @@ entry: ; FIXME: should this do a read2_b64? ; FUNC-LABEL: {{^}}local_load_v3f32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8 ; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+$}} ; GCN: s_waitcnt @@ -46,6 +52,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4f32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; EG: LDS_READ_RET @@ -60,6 +69,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8f32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 @@ -79,6 +91,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16f32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 diff --git a/test/CodeGen/AMDGPU/load-local-f64.ll b/test/CodeGen/AMDGPU/load-local-f64.ll index 9ad6c087bf2e..ffb67101fd78 100644 --- a/test/CodeGen/AMDGPU/load-local-f64.ll +++ b/test/CodeGen/AMDGPU/load-local-f64.ll @@ -1,9 +1,13 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}local_load_f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] @@ -16,6 +20,9 @@ define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addr } ; FUNC-LABEL: {{^}}local_load_v2f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; EG: LDS_READ_RET @@ -30,6 +37,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v3f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read2_b64 ; GCN-DAG: ds_read_b64 @@ -47,6 +57,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 @@ -67,6 +80,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 @@ -96,6 +112,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16f64: +; SICIV: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 diff --git a/test/CodeGen/AMDGPU/load-local-i1.ll b/test/CodeGen/AMDGPU/load-local-i1.ll index 089ac3711698..0320debc828d 100644 --- a/test/CodeGen/AMDGPU/load-local-i1.ll +++ b/test/CodeGen/AMDGPU/load-local-i1.ll @@ -1,8 +1,12 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}local_load_i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_u8 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1 ; GCN: ds_write_b8 @@ -17,6 +21,8 @@ define amdgpu_kernel void @local_load_i1(i1 addrspace(3)* %out, i1 addrspace(3)* } ; FUNC-LABEL: {{^}}local_load_v2i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { %load = load <2 x i1>, <2 x i1> addrspace(3)* %in store <2 x i1> %load, <2 x i1> addrspace(3)* %out @@ -24,6 +30,8 @@ define amdgpu_kernel void @local_load_v2i1(<2 x i1> addrspace(3)* %out, <2 x i1> } ; FUNC-LABEL: {{^}}local_load_v3i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { %load = load <3 x i1>, <3 x i1> addrspace(3)* %in store <3 x i1> %load, <3 x i1> addrspace(3)* %out @@ -31,6 +39,8 @@ define amdgpu_kernel void @local_load_v3i1(<3 x i1> addrspace(3)* %out, <3 x i1> } ; FUNC-LABEL: {{^}}local_load_v4i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { %load = load <4 x i1>, <4 x i1> addrspace(3)* %in store <4 x i1> %load, <4 x i1> addrspace(3)* %out @@ -38,6 +48,8 @@ define amdgpu_kernel void @local_load_v4i1(<4 x i1> addrspace(3)* %out, <4 x i1> } ; FUNC-LABEL: {{^}}local_load_v8i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { %load = load <8 x i1>, <8 x i1> addrspace(3)* %in store <8 x i1> %load, <8 x i1> addrspace(3)* %out @@ -45,6 +57,8 @@ define amdgpu_kernel void @local_load_v8i1(<8 x i1> addrspace(3)* %out, <8 x i1> } ; FUNC-LABEL: {{^}}local_load_v16i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { %load = load <16 x i1>, <16 x i1> addrspace(3)* %in store <16 x i1> %load, <16 x i1> addrspace(3)* %out @@ -52,6 +66,8 @@ define amdgpu_kernel void @local_load_v16i1(<16 x i1> addrspace(3)* %out, <16 x } ; FUNC-LABEL: {{^}}local_load_v32i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { %load = load <32 x i1>, <32 x i1> addrspace(3)* %in store <32 x i1> %load, <32 x i1> addrspace(3)* %out @@ -59,6 +75,8 @@ define amdgpu_kernel void @local_load_v32i1(<32 x i1> addrspace(3)* %out, <32 x } ; FUNC-LABEL: {{^}}local_load_v64i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { %load = load <64 x i1>, <64 x i1> addrspace(3)* %in store <64 x i1> %load, <64 x i1> addrspace(3)* %out @@ -66,6 +84,9 @@ define amdgpu_kernel void @local_load_v64i1(<64 x i1> addrspace(3)* %out, <64 x } ; FUNC-LABEL: {{^}}local_zextload_i1_to_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_u8 ; GCN: ds_write_b32 define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 addrspace(3)* %in) #0 { @@ -76,6 +97,9 @@ define amdgpu_kernel void @local_zextload_i1_to_i32(i32 addrspace(3)* %out, i1 a } ; FUNC-LABEL: {{^}}local_sextload_i1_to_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_u8 ; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}} ; GCN: ds_write_b32 @@ -90,6 +114,8 @@ define amdgpu_kernel void @local_sextload_i1_to_i32(i32 addrspace(3)* %out, i1 a } ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { %load = load <1 x i1>, <1 x i1> addrspace(3)* %in %ext = zext <1 x i1> %load to <1 x i32> @@ -98,6 +124,8 @@ define amdgpu_kernel void @local_zextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { %load = load <1 x i1>, <1 x i1> addrspace(3)* %in %ext = sext <1 x i1> %load to <1 x i32> @@ -106,6 +134,8 @@ define amdgpu_kernel void @local_sextload_v1i1_to_v1i32(<1 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { %load = load <2 x i1>, <2 x i1> addrspace(3)* %in %ext = zext <2 x i1> %load to <2 x i32> @@ -114,6 +144,8 @@ define amdgpu_kernel void @local_zextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { %load = load <2 x i1>, <2 x i1> addrspace(3)* %in %ext = sext <2 x i1> %load to <2 x i32> @@ -122,6 +154,8 @@ define amdgpu_kernel void @local_sextload_v2i1_to_v2i32(<2 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { %load = load <3 x i1>, <3 x i1> addrspace(3)* %in %ext = zext <3 x i1> %load to <3 x i32> @@ -130,6 +164,8 @@ define amdgpu_kernel void @local_zextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { %load = load <3 x i1>, <3 x i1> addrspace(3)* %in %ext = sext <3 x i1> %load to <3 x i32> @@ -138,6 +174,8 @@ define amdgpu_kernel void @local_sextload_v3i1_to_v3i32(<3 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { %load = load <4 x i1>, <4 x i1> addrspace(3)* %in %ext = zext <4 x i1> %load to <4 x i32> @@ -146,6 +184,8 @@ define amdgpu_kernel void @local_zextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { %load = load <4 x i1>, <4 x i1> addrspace(3)* %in %ext = sext <4 x i1> %load to <4 x i32> @@ -154,6 +194,8 @@ define amdgpu_kernel void @local_sextload_v4i1_to_v4i32(<4 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { %load = load <8 x i1>, <8 x i1> addrspace(3)* %in %ext = zext <8 x i1> %load to <8 x i32> @@ -162,6 +204,8 @@ define amdgpu_kernel void @local_zextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { %load = load <8 x i1>, <8 x i1> addrspace(3)* %in %ext = sext <8 x i1> %load to <8 x i32> @@ -170,6 +214,8 @@ define amdgpu_kernel void @local_sextload_v8i1_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { %load = load <16 x i1>, <16 x i1> addrspace(3)* %in %ext = zext <16 x i1> %load to <16 x i32> @@ -178,6 +224,8 @@ define amdgpu_kernel void @local_zextload_v16i1_to_v16i32(<16 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { %load = load <16 x i1>, <16 x i1> addrspace(3)* %in %ext = sext <16 x i1> %load to <16 x i32> @@ -186,6 +234,8 @@ define amdgpu_kernel void @local_sextload_v16i1_to_v16i32(<16 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { %load = load <32 x i1>, <32 x i1> addrspace(3)* %in %ext = zext <32 x i1> %load to <32 x i32> @@ -194,6 +244,8 @@ define amdgpu_kernel void @local_zextload_v32i1_to_v32i32(<32 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { %load = load <32 x i1>, <32 x i1> addrspace(3)* %in %ext = sext <32 x i1> %load to <32 x i32> @@ -202,6 +254,8 @@ define amdgpu_kernel void @local_sextload_v32i1_to_v32i32(<32 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { %load = load <64 x i1>, <64 x i1> addrspace(3)* %in %ext = zext <64 x i1> %load to <64 x i32> @@ -210,6 +264,8 @@ define amdgpu_kernel void @local_zextload_v64i1_to_v64i32(<64 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { %load = load <64 x i1>, <64 x i1> addrspace(3)* %in %ext = sext <64 x i1> %load to <64 x i32> @@ -218,6 +274,9 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i32(<64 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_i1_to_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read_u8 [[LOAD:v[0-9]+]], ; GCN-DAG: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}} ; GCN: ds_write_b64 @@ -229,6 +288,9 @@ define amdgpu_kernel void @local_zextload_i1_to_i64(i64 addrspace(3)* %out, i1 a } ; FUNC-LABEL: {{^}}local_sextload_i1_to_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_u8 [[LOAD:v[0-9]+]], ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}} ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]] @@ -241,6 +303,8 @@ define amdgpu_kernel void @local_sextload_i1_to_i64(i64 addrspace(3)* %out, i1 a } ; FUNC-LABEL: {{^}}local_zextload_v1i1_to_v1i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { %load = load <1 x i1>, <1 x i1> addrspace(3)* %in %ext = zext <1 x i1> %load to <1 x i64> @@ -249,6 +313,8 @@ define amdgpu_kernel void @local_zextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i1_to_v1i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i1> addrspace(3)* %in) #0 { %load = load <1 x i1>, <1 x i1> addrspace(3)* %in %ext = sext <1 x i1> %load to <1 x i64> @@ -257,6 +323,8 @@ define amdgpu_kernel void @local_sextload_v1i1_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i1_to_v2i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { %load = load <2 x i1>, <2 x i1> addrspace(3)* %in %ext = zext <2 x i1> %load to <2 x i64> @@ -265,6 +333,8 @@ define amdgpu_kernel void @local_zextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i1_to_v2i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i1> addrspace(3)* %in) #0 { %load = load <2 x i1>, <2 x i1> addrspace(3)* %in %ext = sext <2 x i1> %load to <2 x i64> @@ -273,6 +343,8 @@ define amdgpu_kernel void @local_sextload_v2i1_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v3i1_to_v3i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { %load = load <3 x i1>, <3 x i1> addrspace(3)* %in %ext = zext <3 x i1> %load to <3 x i64> @@ -281,6 +353,8 @@ define amdgpu_kernel void @local_zextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v3i1_to_v3i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* %out, <3 x i1> addrspace(3)* %in) #0 { %load = load <3 x i1>, <3 x i1> addrspace(3)* %in %ext = sext <3 x i1> %load to <3 x i64> @@ -289,6 +363,8 @@ define amdgpu_kernel void @local_sextload_v3i1_to_v3i64(<3 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i1_to_v4i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { %load = load <4 x i1>, <4 x i1> addrspace(3)* %in %ext = zext <4 x i1> %load to <4 x i64> @@ -297,6 +373,8 @@ define amdgpu_kernel void @local_zextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i1_to_v4i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i1> addrspace(3)* %in) #0 { %load = load <4 x i1>, <4 x i1> addrspace(3)* %in %ext = sext <4 x i1> %load to <4 x i64> @@ -305,6 +383,8 @@ define amdgpu_kernel void @local_sextload_v4i1_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i1_to_v8i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { %load = load <8 x i1>, <8 x i1> addrspace(3)* %in %ext = zext <8 x i1> %load to <8 x i64> @@ -313,6 +393,8 @@ define amdgpu_kernel void @local_zextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i1_to_v8i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i1> addrspace(3)* %in) #0 { %load = load <8 x i1>, <8 x i1> addrspace(3)* %in %ext = sext <8 x i1> %load to <8 x i64> @@ -321,6 +403,8 @@ define amdgpu_kernel void @local_sextload_v8i1_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i1_to_v16i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { %load = load <16 x i1>, <16 x i1> addrspace(3)* %in %ext = zext <16 x i1> %load to <16 x i64> @@ -329,6 +413,8 @@ define amdgpu_kernel void @local_zextload_v16i1_to_v16i64(<16 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v16i1_to_v16i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i1> addrspace(3)* %in) #0 { %load = load <16 x i1>, <16 x i1> addrspace(3)* %in %ext = sext <16 x i1> %load to <16 x i64> @@ -337,6 +423,8 @@ define amdgpu_kernel void @local_sextload_v16i1_to_v16i64(<16 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v32i1_to_v32i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { %load = load <32 x i1>, <32 x i1> addrspace(3)* %in %ext = zext <32 x i1> %load to <32 x i64> @@ -345,6 +433,8 @@ define amdgpu_kernel void @local_zextload_v32i1_to_v32i64(<32 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v32i1_to_v32i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i1> addrspace(3)* %in) #0 { %load = load <32 x i1>, <32 x i1> addrspace(3)* %in %ext = sext <32 x i1> %load to <32 x i64> @@ -353,6 +443,8 @@ define amdgpu_kernel void @local_sextload_v32i1_to_v32i64(<32 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v64i1_to_v64i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { %load = load <64 x i1>, <64 x i1> addrspace(3)* %in %ext = zext <64 x i1> %load to <64 x i64> @@ -361,6 +453,8 @@ define amdgpu_kernel void @local_zextload_v64i1_to_v64i64(<64 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v64i1_to_v64i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i1> addrspace(3)* %in) #0 { %load = load <64 x i1>, <64 x i1> addrspace(3)* %in %ext = sext <64 x i1> %load to <64 x i64> diff --git a/test/CodeGen/AMDGPU/load-local-i16.ll b/test/CodeGen/AMDGPU/load-local-i16.ll index 875af807ad4b..d3557c14540c 100644 --- a/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/test/CodeGen/AMDGPU/load-local-i16.ll @@ -1,8 +1,12 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,GFX89,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_u16 v{{[0-9]+}} ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z @@ -18,6 +22,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v2i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b32 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z @@ -33,6 +40,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v3i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b16 @@ -47,6 +57,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; EG: LDS_READ_RET @@ -59,6 +72,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; EG: LDS_READ_RET @@ -73,6 +89,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i16: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} @@ -94,6 +113,9 @@ entry: } ; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_u16 ; GCN: ds_write_b32 @@ -111,7 +133,10 @@ define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 + +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_i16 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z @@ -129,6 +154,9 @@ define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 } ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_u16 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z @@ -144,6 +172,9 @@ define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_i16 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z @@ -162,7 +193,9 @@ define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -175,7 +208,9 @@ define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -189,6 +224,9 @@ define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 @@ -203,6 +241,9 @@ entry: } ; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 @@ -221,7 +262,9 @@ entry: ; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; EG: LDS_READ_RET @@ -235,7 +278,9 @@ define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspa ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read_b64 ; EG: LDS_READ_RET @@ -252,6 +297,9 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; EG: LDS_READ_RET @@ -266,6 +314,9 @@ define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; EG: LDS_READ_RET @@ -288,6 +339,9 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} @@ -312,6 +366,9 @@ define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} @@ -348,6 +405,9 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 @@ -377,6 +437,9 @@ define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} @@ -414,6 +477,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 @@ -479,6 +545,8 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -520,6 +588,9 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} @@ -538,13 +609,16 @@ define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 } ; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; FIXME: Need to optimize this sequence to avoid an extra shift. ; t25: i32,ch = load t12, t10, undef:i32 ; t28: i64 = any_extend t25 ; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16 ; SI: ds_read_i16 v[[LO:[0-9]+]], -; VI: ds_read_u16 v[[ULO:[0-9]+]] -; VI: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 +; GFX89: ds_read_u16 v[[ULO:[0-9]+]] +; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 ; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] @@ -565,6 +639,9 @@ define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 } ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] @@ -579,6 +656,9 @@ define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] @@ -596,6 +676,9 @@ define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { @@ -606,6 +689,9 @@ define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG-DAG: BFE_INT @@ -618,6 +704,9 @@ define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -629,6 +718,9 @@ define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -644,6 +736,9 @@ define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -657,6 +752,9 @@ define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -678,6 +776,9 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -695,6 +796,9 @@ define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -728,6 +832,9 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -753,6 +860,9 @@ define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 + ; EG: LDS_READ_RET ; EG: LDS_READ_RET diff --git a/test/CodeGen/AMDGPU/load-local-i32.ll b/test/CodeGen/AMDGPU/load-local-i32.ll index 86055413d2cf..c736586fa217 100644 --- a/test/CodeGen/AMDGPU/load-local-i32.ll +++ b/test/CodeGen/AMDGPU/load-local-i32.ll @@ -1,11 +1,12 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - ; FUNC-LABEL: {{^}}local_load_i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0, -1 +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -17,6 +18,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v2i32: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + ; GCN: ds_read_b64 define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { entry: @@ -26,6 +30,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v3i32: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read_b64 ; GCN-DAG: ds_read_b32 define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 { @@ -36,6 +43,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4i32: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { @@ -46,6 +56,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8i32: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { @@ -56,6 +69,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i32: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}} ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} @@ -72,6 +88,9 @@ entry: } ; FUNC-LABEL: {{^}}local_zextload_i32_to_i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { %ld = load i32, i32 addrspace(3)* %in %ext = zext i32 %ld to i64 @@ -80,6 +99,9 @@ define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 } ; FUNC-LABEL: {{^}}local_sextload_i32_to_i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { %ld = load i32, i32 addrspace(3)* %in %ext = sext i32 %ld to i64 @@ -88,6 +110,9 @@ define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 } ; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in %ext = zext <1 x i32> %ld to <1 x i64> @@ -96,6 +121,9 @@ define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in %ext = sext <1 x i32> %ld to <1 x i64> @@ -104,6 +132,9 @@ define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in %ext = zext <2 x i32> %ld to <2 x i64> @@ -112,6 +143,9 @@ define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in %ext = sext <2 x i32> %ld to <2 x i64> @@ -120,6 +154,9 @@ define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in %ext = zext <4 x i32> %ld to <4 x i64> @@ -128,6 +165,9 @@ define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in %ext = sext <4 x i32> %ld to <4 x i64> @@ -136,6 +176,9 @@ define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in %ext = zext <8 x i32> %ld to <8 x i64> @@ -144,6 +187,9 @@ define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in %ext = sext <8 x i32> %ld to <8 x i64> @@ -152,6 +198,9 @@ define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in %ext = sext <16 x i32> %ld to <16 x i64> @@ -160,6 +209,9 @@ define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64 +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in %ext = zext <16 x i32> %ld to <16 x i64> @@ -168,6 +220,9 @@ define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in %ext = sext <32 x i32> %ld to <32 x i64> @@ -176,6 +231,9 @@ define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace( } ; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64: +; SICIVI: s_mov_b32 m0, -1 +; GFX9-NOT: m0 + define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in %ext = zext <32 x i32> %ld to <32 x i64> diff --git a/test/CodeGen/AMDGPU/load-local-i64.ll b/test/CodeGen/AMDGPU/load-local-i64.ll index 0c719a9e0bf9..376f6f513c3a 100644 --- a/test/CodeGen/AMDGPU/load-local-i64.ll +++ b/test/CodeGen/AMDGPU/load-local-i64.ll @@ -1,9 +1,13 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}local_load_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] @@ -16,6 +20,9 @@ define amdgpu_kernel void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace( } ; FUNC-LABEL: {{^}}local_load_v2i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; EG: LDS_READ_RET @@ -30,6 +37,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v3i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: ds_read2_b64 ; GCN-DAG: ds_read_b64 @@ -47,6 +57,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 @@ -67,6 +80,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 @@ -96,6 +112,9 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 ; GCN: ds_read2_b64 diff --git a/test/CodeGen/AMDGPU/load-local-i8.ll b/test/CodeGen/AMDGPU/load-local-i8.ll index b20f6ba55a76..72f5408675fc 100644 --- a/test/CodeGen/AMDGPU/load-local-i8.ll +++ b/test/CodeGen/AMDGPU/load-local-i8.ll @@ -1,11 +1,13 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i8: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; GCN: ds_read_u8 ; EG: LDS_UBYTE_READ_RET @@ -18,7 +20,8 @@ entry: ; FUNC-LABEL: {{^}}local_load_v2i8: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; GCN: ds_read_u16 ; EG: LDS_USHORT_READ_RET @@ -30,6 +33,7 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v3i8: +; GFX9-NOT: m0 ; GCN: ds_read_b32 ; EG: DS_READ_RET @@ -41,6 +45,7 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v4i8: +; GFX9-NOT: m0 ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -52,6 +57,7 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v8i8: +; GFX9-NOT: m0 ; GCN: ds_read_b64 ; EG: LDS_READ_RET @@ -64,6 +70,7 @@ entry: } ; FUNC-LABEL: {{^}}local_load_v16i8: +; GFX9-NOT: m0 ; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} ; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}} @@ -79,8 +86,9 @@ entry: } ; FUNC-LABEL: {{^}}local_zextload_i8_to_i32: +; GFX9-NOT: m0 ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_u8 ; EG: LDS_UBYTE_READ_RET @@ -93,7 +101,8 @@ define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 a ; FUNC-LABEL: {{^}}local_sextload_i8_to_i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_i8 ; EG: LDS_UBYTE_READ_RET @@ -116,6 +125,7 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32: +; GFX9-NOT: m0 ; EG: LDS_UBYTE_READ_RET ; EG: BFE_INT @@ -127,6 +137,7 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32: +; GFX9-NOT: m0 ; GCN: ds_read_u16 ; EG: LDS_USHORT_READ_RET @@ -139,7 +150,8 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_u16 ; FIXME: Need to optimize this sequence to avoid extra shift on VI. ; t23: i16 = srl t39, Constant:i32<8> @@ -164,6 +176,7 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32: +; GFX9-NOT: m0 ; GCN: ds_read_b32 ; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 @@ -182,7 +195,8 @@ entry: ; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_b32 ; GCN-DAG: v_bfe_i32 @@ -207,7 +221,8 @@ entry: ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_b32 ; EG: LDS_READ_RET @@ -223,7 +238,8 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32: ; GCN-NOT: s_wqm_b64 -; GCN: s_mov_b32 m0 +; GFX9-NOT: m0 +; SICIVI: s_mov_b32 m0 ; GCN: ds_read_b32 ; EG-DAG: LDS_READ_RET @@ -239,6 +255,8 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -256,6 +274,8 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -275,6 +295,8 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -300,6 +322,8 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -329,6 +353,8 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -346,6 +372,8 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -363,6 +391,8 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -388,6 +418,8 @@ define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG-DAG: LDS_READ_RET ; EG-DAG: LDS_READ_RET @@ -413,6 +445,9 @@ define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_i8_to_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]], ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] @@ -428,6 +463,9 @@ define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 a } ; FUNC-LABEL: {{^}}local_sextload_i8_to_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_read_i8 v[[LO:[0-9]+]], ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] @@ -445,6 +483,8 @@ define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 a } ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_UBYTE_READ_RET ; EG: MOV {{.*}}, literal @@ -458,6 +498,8 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_UBYTE_READ_RET ; EG: ASHR @@ -471,6 +513,8 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { @@ -481,6 +525,8 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_USHORT_READ_RET ; EG: BFE_INT @@ -493,6 +539,8 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { @@ -503,6 +551,8 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { @@ -513,6 +563,8 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -524,6 +576,8 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -544,6 +598,8 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -557,6 +613,8 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -570,6 +628,8 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -587,6 +647,8 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -620,6 +682,8 @@ define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3 ; } ; FUNC-LABEL: {{^}}local_zextload_i8_to_i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; GCN: ds_read_u8 v[[VAL:[0-9]+]], ; GCN: ds_write_b16 v[[VAL:[0-9]+]] @@ -633,6 +697,8 @@ define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 a } ; FUNC-LABEL: {{^}}local_sextload_i8_to_i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; GCN: ds_read_i8 v[[VAL:[0-9]+]], ; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]] @@ -647,6 +713,8 @@ define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 a } ; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_UBYTE_READ_RET ; EG: LDS_SHORT_WRITE @@ -658,6 +726,8 @@ define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_UBYTE_READ_RET ; EG: BFE_INT @@ -670,6 +740,8 @@ define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_USHORT_READ_RET ; EG: LDS_WRITE @@ -681,6 +753,8 @@ define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_USHORT_READ_RET ; EG: BFE_INT @@ -694,6 +768,8 @@ define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_WRITE @@ -706,6 +782,8 @@ define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR @@ -723,6 +801,8 @@ define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -738,6 +818,8 @@ define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -762,6 +844,8 @@ define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* } ; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -783,6 +867,8 @@ define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -821,6 +907,8 @@ define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3 } ; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET @@ -854,6 +942,8 @@ define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3 } ; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 ; EG: LDS_READ_RET ; EG: LDS_READ_RET diff --git a/test/CodeGen/AMDGPU/local-64.ll b/test/CodeGen/AMDGPU/local-64.ll index bf4a93237bd4..87c18a7fc449 100644 --- a/test/CodeGen/AMDGPU/local-64.ll +++ b/test/CodeGen/AMDGPU/local-64.ll @@ -1,10 +1,14 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; BOTH-LABEL: {{^}}local_i32_load -; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 -; BOTH: buffer_store_dword [[REG]], +; GCN-LABEL: {{^}}local_i32_load +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 +; GCN: buffer_store_dword [[REG]], define amdgpu_kernel void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 %val = load i32, i32 addrspace(3)* %gep, align 4 @@ -12,19 +16,25 @@ define amdgpu_kernel void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace( ret void } -; BOTH-LABEL: {{^}}local_i32_load_0_offset -; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} -; BOTH: buffer_store_dword [[REG]], +; GCN-LABEL: {{^}}local_i32_load_0_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} +; GCN: buffer_store_dword [[REG]], define amdgpu_kernel void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind { %val = load i32, i32 addrspace(3)* %in, align 4 store i32 %val, i32 addrspace(1)* %out, align 4 ret void } -; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset: -; BOTH-NOT: ADD -; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 -; BOTH: buffer_store_byte [[REG]], +; GCN-LABEL: {{^}}local_i8_load_i16_max_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 +; GCN: buffer_store_byte [[REG]], define amdgpu_kernel void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535 %val = load i8, i8 addrspace(3)* %gep, align 4 @@ -32,14 +42,20 @@ define amdgpu_kernel void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i ret void } -; BOTH-LABEL: {{^}}local_i8_load_over_i16_max_offset: +; GCN-LABEL: {{^}}local_i8_load_over_i16_max_offset: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; The LDS offset will be 65536 bytes, which is larger than the size of LDS on ; SI, which is why it is being OR'd with the base pointer. -; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 -; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 -; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] -; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] -; BOTH: buffer_store_byte [[REG]], +; SI-DAG: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 +; CI-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 +; VI-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 +; GFX9-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000 + +; GCN-DAG: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]] +; GCN: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] +; GCN: buffer_store_byte [[REG]], define amdgpu_kernel void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind { %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536 %val = load i8, i8 addrspace(3)* %gep, align 4 @@ -47,10 +63,13 @@ define amdgpu_kernel void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %o ret void } -; BOTH-LABEL: {{^}}local_i64_load: -; BOTH-NOT: ADD -; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 -; BOTH: buffer_store_dwordx2 [[REG]], +; GCN-LABEL: {{^}}local_i64_load: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 +; GCN: buffer_store_dwordx2 [[REG]], define amdgpu_kernel void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7 %val = load i64, i64 addrspace(3)* %gep, align 8 @@ -58,19 +77,25 @@ define amdgpu_kernel void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace( ret void } -; BOTH-LABEL: {{^}}local_i64_load_0_offset -; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} -; BOTH: buffer_store_dwordx2 [[REG]], +; GCN-LABEL: {{^}}local_i64_load_0_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} +; GCN: buffer_store_dwordx2 [[REG]], define amdgpu_kernel void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind { %val = load i64, i64 addrspace(3)* %in, align 8 store i64 %val, i64 addrspace(1)* %out, align 8 ret void } -; BOTH-LABEL: {{^}}local_f64_load: -; BOTH-NOT: ADD -; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 -; BOTH: buffer_store_dwordx2 [[REG]], +; GCN-LABEL: {{^}}local_f64_load: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 +; GCN: buffer_store_dwordx2 [[REG]], define amdgpu_kernel void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %gep = getelementptr double, double addrspace(3)* %in, i32 7 %val = load double, double addrspace(3)* %gep, align 8 @@ -78,83 +103,110 @@ define amdgpu_kernel void @local_f64_load(double addrspace(1)* %out, double addr ret void } -; BOTH-LABEL: {{^}}local_f64_load_0_offset -; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} -; BOTH: buffer_store_dwordx2 [[REG]], +; GCN-LABEL: {{^}}local_f64_load_0_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} +; GCN: buffer_store_dwordx2 [[REG]], define amdgpu_kernel void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind { %val = load double, double addrspace(3)* %in, align 8 store double %val, double addrspace(1)* %out, align 8 ret void } -; BOTH-LABEL: {{^}}local_i64_store: -; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 +; GCN-LABEL: {{^}}local_i64_store: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 define amdgpu_kernel void @local_i64_store(i64 addrspace(3)* %out) nounwind { %gep = getelementptr i64, i64 addrspace(3)* %out, i32 7 store i64 5678, i64 addrspace(3)* %gep, align 8 ret void } -; BOTH-LABEL: {{^}}local_i64_store_0_offset: -; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} +; GCN-LABEL: {{^}}local_i64_store_0_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind { store i64 1234, i64 addrspace(3)* %out, align 8 ret void } -; BOTH-LABEL: {{^}}local_f64_store: -; BOTH-NOT: ADD -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 +; GCN-LABEL: {{^}}local_f64_store: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 define amdgpu_kernel void @local_f64_store(double addrspace(3)* %out) nounwind { %gep = getelementptr double, double addrspace(3)* %out, i32 7 store double 16.0, double addrspace(3)* %gep, align 8 ret void } -; BOTH-LABEL: {{^}}local_f64_store_0_offset -; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} +; GCN-LABEL: {{^}}local_f64_store_0_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind { store double 20.0, double addrspace(3)* %out, align 8 ret void } -; BOTH-LABEL: {{^}}local_v2i64_store: -; BOTH-NOT: ADD -; BOTH: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 -; BOTH: s_endpgm +; GCN-LABEL: {{^}}local_v2i64_store: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 +; GCN: s_endpgm define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7 store <2 x i64> , <2 x i64> addrspace(3)* %gep, align 16 ret void } -; BOTH-LABEL: {{^}}local_v2i64_store_0_offset: -; BOTH-NOT: ADD -; BOTH: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 -; BOTH: s_endpgm +; GCN-LABEL: {{^}}local_v2i64_store_0_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind { store <2 x i64> , <2 x i64> addrspace(3)* %out, align 16 ret void } -; BOTH-LABEL: {{^}}local_v4i64_store: -; BOTH-NOT: ADD -; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 -; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 -; BOTH: s_endpgm +; GCN-LABEL: {{^}}local_v4i64_store: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 +; GCN: s_endpgm define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind { %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7 store <4 x i64> , <4 x i64> addrspace(3)* %gep, align 16 ret void } -; BOTH-LABEL: {{^}}local_v4i64_store_0_offset: -; BOTH-NOT: ADD -; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 -; BOTH-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 -; BOTH: s_endpgm +; GCN-LABEL: {{^}}local_v4i64_store_0_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: add +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 +; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind { store <4 x i64> , <4 x i64> addrspace(3)* %out, align 16 ret void diff --git a/test/CodeGen/AMDGPU/local-atomics.ll b/test/CodeGen/AMDGPU/local-atomics.ll index de029d964b0d..d2167f5a730a 100644 --- a/test/CodeGen/AMDGPU/local-atomics.ll +++ b/test/CodeGen/AMDGPU/local-atomics.ll @@ -1,13 +1,18 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -18,6 +23,9 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRXCHG_RET * ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -31,9 +39,13 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -44,6 +56,9 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -55,6 +70,9 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 @@ -70,7 +88,11 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1) ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32: ; EG: LDS_ADD_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -81,7 +103,11 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 a ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset: ; EG: LDS_ADD_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -92,6 +118,9 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 @@ -107,6 +136,10 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: ; EG: LDS_SUB_RET * + +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -117,6 +150,10 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: ; EG: LDS_SUB_RET * + +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -128,7 +165,11 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32: ; EG: LDS_SUB_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -139,7 +180,11 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 a ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset: ; EG: LDS_SUB_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -151,6 +196,10 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: ; EG: LDS_AND_RET * + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_rtn_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -160,6 +209,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_AND_RET * ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -171,6 +223,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_OR_RET * ; GCN: ds_or_rtn_b32 ; GCN: s_endpgm @@ -181,6 +236,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 add } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_OR_RET * ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -192,6 +250,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_XOR_RET * ; GCN: ds_xor_rtn_b32 ; GCN: s_endpgm @@ -202,6 +263,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_XOR_RET * ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -221,6 +285,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, ; } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_INT_RET * ; GCN: ds_min_rtn_i32 ; GCN: s_endpgm @@ -231,6 +298,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_INT_RET * ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -242,6 +312,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_INT_RET * ; GCN: ds_max_rtn_i32 ; GCN: s_endpgm @@ -252,6 +325,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_INT_RET * ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -263,6 +339,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_UINT_RET * ; GCN: ds_min_rtn_u32 ; GCN: s_endpgm @@ -273,6 +352,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_UINT_RET * ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -284,6 +366,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_UINT_RET * ; GCN: ds_max_rtn_u32 ; GCN: s_endpgm @@ -294,6 +379,9 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_UINT_RET * ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -305,9 +393,12 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -316,6 +407,9 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -325,9 +419,12 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_add_u32 [[VPTR]], [[DATA]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -336,6 +433,9 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -345,6 +445,9 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -357,7 +460,10 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace( } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -366,7 +472,10 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -376,6 +485,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -388,6 +500,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -396,6 +511,9 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -405,7 +523,10 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -414,7 +535,10 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -424,6 +548,9 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -432,6 +559,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -441,6 +571,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -449,6 +582,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounw } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -458,6 +594,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -466,6 +605,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -482,6 +624,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %pt ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -490,6 +635,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -499,6 +647,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -507,6 +658,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -516,6 +670,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -524,6 +681,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -533,6 +693,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -541,6 +704,9 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { diff --git a/test/CodeGen/AMDGPU/local-atomics64.ll b/test/CodeGen/AMDGPU/local-atomics64.ll index 6572a7bcd4fe..6155bfcf1aef 100644 --- a/test/CodeGen/AMDGPU/local-atomics64.ll +++ b/test/CodeGen/AMDGPU/local-atomics64.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,VI,SICIVI,GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -11,6 +15,9 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 a } ; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -21,6 +28,9 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_rtn_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -30,10 +40,13 @@ define amdgpu_kernel void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb -; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c -; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 -; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 ; GCN: buffer_store_dwordx2 [[RESULT]], @@ -46,9 +59,12 @@ define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64: -; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} -; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} -; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} +; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; GCN: buffer_store_dwordx2 [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -58,6 +74,9 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i64(i64 addrspace(1)* %out, i64 a } ; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_rtn_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -68,6 +87,9 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -77,6 +99,9 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -87,9 +112,12 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64: -; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} -; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} -; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} +; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; GCN: buffer_store_dwordx2 [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -99,6 +127,9 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i64(i64 addrspace(1)* %out, i64 a } ; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -109,6 +140,9 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_rtn_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -118,6 +152,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_rtn_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -128,6 +165,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_rtn_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -137,6 +177,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 add } ; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_rtn_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -147,6 +190,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_rtn_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -156,6 +202,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_rtn_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -174,6 +223,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, ; } ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_rtn_i64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -183,6 +235,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_rtn_i64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -193,6 +248,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_rtn_i64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -202,6 +260,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 ad } ; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_rtn_i64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -212,6 +273,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, } ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_rtn_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -221,6 +285,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 a } ; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_rtn_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -231,6 +298,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_rtn_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -240,6 +310,9 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 a } ; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_rtn_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind { @@ -250,6 +323,9 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out } ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -258,6 +334,9 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nou } ; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -267,6 +346,9 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %p } ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -275,12 +357,15 @@ define amdgpu_kernel void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset: -; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 -; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 -; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 -; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 +; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 ; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 +; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4 @@ -289,9 +374,12 @@ define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %pt } ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64: +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} -; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst @@ -299,6 +387,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i64(i64 addrspace(3)* %ptr) nou } ; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -308,6 +399,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(i64 addrspace(3)* %p } ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -316,6 +410,9 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -325,9 +422,12 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %pt } ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64: -; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} -; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} -; GCN: ds_sub_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} +; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} +; GCN: ds_sub_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nounwind { %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst @@ -335,6 +435,9 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i64(i64 addrspace(3)* %ptr) nou } ; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -344,6 +447,9 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(i64 addrspace(3)* %p } ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -352,6 +458,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -361,6 +470,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %pt } ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -369,6 +481,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounw } ; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -378,6 +493,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr } ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -386,6 +504,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -402,6 +523,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %pt ; } ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -410,6 +534,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -419,6 +546,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %pt } ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -427,6 +557,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) noun } ; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -436,6 +569,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %pt } ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -444,6 +580,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nou } ; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { @@ -453,6 +592,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %p } ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u64 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind { @@ -461,6 +603,9 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nou } ; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u64 {{.*}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { diff --git a/test/CodeGen/AMDGPU/loop_break.ll b/test/CodeGen/AMDGPU/loop_break.ll index 4acd1b247957..b2641cd4d2e4 100644 --- a/test/CodeGen/AMDGPU/loop_break.ll +++ b/test/CodeGen/AMDGPU/loop_break.ll @@ -31,7 +31,7 @@ ; GCN: s_and_b64 vcc, exec, vcc ; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] -; GCN: ; BB#2: ; %bb4 +; GCN: ; %bb.2: ; %bb4 ; GCN: buffer_load_dword ; GCN: v_cmp_ge_i32_e32 vcc, ; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]] @@ -41,7 +41,7 @@ ; GCN: s_andn2_b64 exec, exec, [[MASK]] ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] -; GCN: ; BB#4: ; %bb9 +; GCN: ; %bb.4: ; %bb9 ; GCN-NEXT: s_endpgm define amdgpu_kernel void @break_loop(i32 %arg) #0 { bb: diff --git a/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll index e1a2af6c7ef9..498a65dc0a64 100644 --- a/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ b/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -1,47 +1,39 @@ ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -amdgpu-lower-intrinsics -use-wide-memcpy-loop-lowering=true %s | FileCheck -check-prefix=WOPT %s -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1 +declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 ; Test the upper bound for sizes to leave ; OPT-LABEL: @max_size_small_static_memcpy_caller0( -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } ; Smallest static size which will be expanded ; OPT-LABEL: @min_size_large_static_memcpy_caller0( ; OPT-NOT: call -; OPT: getelementptr -; OPT-NEXT: load i8 -; OPT: getelementptr -; OPT-NEXT: store i8 - -; WOPT-LABEL: @min_size_large_static_memcpy_caller0( -; WOPT-NOT: call -; WOPT: br label %load-store-loop -; WOPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index -; WOPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]] -; WOPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index -; WOPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]] -; WOPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1 -; WOPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 -; WOPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split +; OPT: br label %load-store-loop +; OPT: [[T1:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %src, i64 %loop-index +; OPT-NEXT: [[T2:%[0-9]+]] = load i8, i8 addrspace(1)* [[T1]] +; OPT-NEXT: [[T3:%[0-9]+]] = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %loop-index +; OPT-NEXT: store i8 [[T2]], i8 addrspace(1)* [[T3]] +; OPT-NEXT: [[T4:%[0-9]+]] = add i64 %loop-index, 1 +; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 +; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memmove_caller0( -; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } @@ -52,14 +44,14 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1) ; OPT: getelementptr ; OPT-NEXT: store i8 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memset_caller0( -; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) ret void } @@ -68,7 +60,7 @@ define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* ; OPT: getelementptr ; OPT: store i8 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) ret void } @@ -76,7 +68,7 @@ define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -84,7 +76,7 @@ define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 add ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -95,8 +87,8 @@ define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 add ; OPT: phi ; OPT-NOT: call define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) ret void } @@ -107,7 +99,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) ret void } @@ -118,10 +110,10 @@ define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3 ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) ret void } diff --git a/test/CodeGen/AMDGPU/lshr.v2i16.ll b/test/CodeGen/AMDGPU/lshr.v2i16.ll index 72aac2322a43..f6b6a79ae6d2 100644 --- a/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ b/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/max.i16.ll b/test/CodeGen/AMDGPU/max.i16.ll index abd75258c4d4..8bfb616daa62 100644 --- a/test/CodeGen/AMDGPU/max.i16.ll +++ b/test/CodeGen/AMDGPU/max.i16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=VIPLUS %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VIPLUS %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VIPLUS %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_imax_sge_i16: diff --git a/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll b/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll index a563cfd02831..c8e920a1854a 100644 --- a/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll +++ b/test/CodeGen/AMDGPU/memory-legalizer-atomic-fence.ll @@ -3,7 +3,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=GCN -check-prefix=GFX8 %s ; FUNC-LABEL: {{^}}system_acquire -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GFX6: s_waitcnt vmcnt(0){{$}} ; GFX6-NEXT: buffer_wbinvl1{{$}} @@ -17,7 +17,7 @@ entry: } ; FUNC-LABEL: {{^}}system_release -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_endpgm @@ -28,7 +28,7 @@ entry: } ; FUNC-LABEL: {{^}}system_acq_rel -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GFX6: buffer_wbinvl1{{$}} @@ -41,7 +41,7 @@ entry: } ; FUNC-LABEL: {{^}}system_seq_cst -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GFX6: buffer_wbinvl1{{$}} @@ -54,7 +54,7 @@ entry: } ; FUNC-LABEL: {{^}}singlethread_acquire -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @singlethread_acquire() { @@ -64,7 +64,7 @@ entry: } ; FUNC-LABEL: {{^}}singlethread_release -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @singlethread_release() { @@ -74,7 +74,7 @@ entry: } ; FUNC-LABEL: {{^}}singlethread_acq_rel -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @singlethread_acq_rel() { @@ -84,7 +84,7 @@ entry: } ; FUNC-LABEL: {{^}}singlethread_seq_cst -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @singlethread_seq_cst() { @@ -94,7 +94,7 @@ entry: } ; FUNC-LABEL: {{^}}agent_acquire -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GFX6: s_waitcnt vmcnt(0){{$}} ; GFX6-NEXT: buffer_wbinvl1{{$}} @@ -108,7 +108,7 @@ entry: } ; FUNC-LABEL: {{^}}agent_release -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN: s_endpgm @@ -119,7 +119,7 @@ entry: } ; FUNC-LABEL: {{^}}agent_acq_rel -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GFX6: buffer_wbinvl1{{$}} @@ -132,7 +132,7 @@ entry: } ; FUNC-LABEL: {{^}}agent_seq_cst -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GFX6: buffer_wbinvl1{{$}} @@ -145,7 +145,7 @@ entry: } ; FUNC-LABEL: {{^}}workgroup_acquire -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_acquire() { @@ -155,7 +155,7 @@ entry: } ; FUNC-LABEL: {{^}}workgroup_release -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_release() { @@ -165,7 +165,7 @@ entry: } ; FUNC-LABEL: {{^}}workgroup_acq_rel -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_acq_rel() { @@ -175,7 +175,7 @@ entry: } ; FUNC-LABEL: {{^}}workgroup_seq_cst -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_seq_cst() { @@ -185,7 +185,7 @@ entry: } ; FUNC-LABEL: {{^}}wavefront_acquire -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @wavefront_acquire() { @@ -195,7 +195,7 @@ entry: } ; FUNC-LABEL: {{^}}wavefront_release -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @wavefront_release() { @@ -205,7 +205,7 @@ entry: } ; FUNC-LABEL: {{^}}wavefront_acq_rel -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @wavefront_acq_rel() { @@ -215,7 +215,7 @@ entry: } ; FUNC-LABEL: {{^}}wavefront_seq_cst -; GCN: BB#0 +; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @wavefront_seq_cst() { diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir b/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir similarity index 100% rename from test/CodeGen/MIR/AMDGPU/memory-legalizer-atomic-insert-end.mir rename to test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir similarity index 98% rename from test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir rename to test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir index c5598bf3b1e0..2f3095c777a3 100644 --- a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -115,9 +115,9 @@ body: | liveins: %sgpr0_sgpr1, %sgpr3 %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) - %sgpr8 = S_MOV_B32 $SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr9 = S_MOV_B32 $SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr10 = S_MOV_B32 4294967295, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr11 = S_MOV_B32 15204352, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %vgpr0 = V_MOV_B32_e32 1, implicit %exec diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir similarity index 98% rename from test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir rename to test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir index ed701563f66f..263bbeb05966 100644 --- a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir +++ b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir @@ -113,9 +113,9 @@ body: | liveins: %sgpr0_sgpr1, %sgpr3 %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) - %sgpr8 = S_MOV_B32 $SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr9 = S_MOV_B32 $SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr10 = S_MOV_B32 4294967295, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr11 = S_MOV_B32 15204352, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %vgpr0 = V_MOV_B32_e32 1, implicit %exec diff --git a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir similarity index 98% rename from test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir rename to test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir index 26e6df90d1ad..7e0c9e44e374 100644 --- a/test/CodeGen/MIR/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir +++ b/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir @@ -113,9 +113,9 @@ body: | liveins: %sgpr0_sgpr1, %sgpr3 %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`) - %sgpr8 = S_MOV_B32 $SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr9 = S_MOV_B32 $SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 + %sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr10 = S_MOV_B32 4294967295, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %sgpr11 = S_MOV_B32 15204352, implicit-def %sgpr8_sgpr9_sgpr10_sgpr11 %vgpr0 = V_MOV_B32_e32 1, implicit %exec diff --git a/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll b/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll new file mode 100644 index 000000000000..f97785beab6f --- /dev/null +++ b/test/CodeGen/AMDGPU/memory-legalizer-store-infinite-loop.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Effectively, check that the compile finishes; in the case +; of an infinite loop, llc toggles between merging 2 ST4s +; ( MergeConsecutiveStores() ) and breaking the resulting ST8 +; apart ( LegalizeStoreOps() ). + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" + +; GCN-LABEL: {{^}}_Z6brokenPd: +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} +define amdgpu_kernel void @_Z6brokenPd(double* %arg) { +bb: + %tmp = alloca double, align 8, addrspace(5) + %tmp1 = alloca double, align 8, addrspace(5) + %tmp2 = load double, double* %arg, align 8 + br i1 1, label %bb6, label %bb4 + +bb3: ; No predecessors! + br label %bb4 + +bb4: ; preds = %bb3, %bb + %tmp5 = phi double addrspace(5)* [ %tmp1, %bb3 ], [ %tmp, %bb ] + store double %tmp2, double addrspace(5)* %tmp5, align 8 + br label %bb6 + +bb6: ; preds = %bb4, %bb + %tmp7 = phi double [ 0x7FF8123000000000, %bb4 ], [ 0x7FF8000000000000, %bb ] + store double %tmp7, double* %arg, align 8 + ret void +} diff --git a/test/CodeGen/AMDGPU/merge-load-store-vreg.mir b/test/CodeGen/AMDGPU/merge-load-store-vreg.mir new file mode 100644 index 000000000000..fbd5611b3fcf --- /dev/null +++ b/test/CodeGen/AMDGPU/merge-load-store-vreg.mir @@ -0,0 +1,60 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s + +# If there's a base offset, check that SILoadStoreOptimizer creates +# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than +# %vcc, which is used in _e32); this ensures that %vcc is not inadvertently +# clobbered. + +# GCN-LABEL: name: kernel + +# VI: V_ADD_I32_e64 %6, %0, +# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8, +# VI: V_ADD_I32_e64 %10, %3, +# VI-NEXT: DS_READ2_B32 killed %11, 0, 8, + +# GFX9: V_ADD_U32_e64 %6, %0, +# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8, +# GFX9: V_ADD_U32_e64 %9, %3, +# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8, + +--- | + @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4 + + define amdgpu_kernel void @kernel() { + bb.0: + br label %bb2 + + bb1: + ret void + + bb2: + %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0 + %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8 + %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24 + br label %bb1 + } +--- +name: kernel +body: | + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + S_BRANCH %bb.2 + + bb.1: + S_ENDPGM + + bb.2: + %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit %exec + %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit %exec + V_CMP_NE_U32_e32 1, %2, implicit-def %vcc, implicit %exec + DS_WRITE_B32 %0, %0, 1024, 0, implicit %m0, implicit %exec :: (store 4 into %ir.tmp) + %3:vgpr_32 = V_MOV_B32_e32 0, implicit %exec + DS_WRITE_B32 %0, %3, 1056, 0, implicit %m0, implicit %exec :: (store 4 into %ir.tmp1) + %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit %m0, implicit %exec :: (load 4 from %ir.tmp2) + %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit %m0, implicit %exec :: (load 4 from %ir.tmp3) + %vcc = S_AND_B64 %exec, %vcc, implicit-def %scc + S_CBRANCH_VCCNZ %bb.1, implicit %vcc + S_BRANCH %bb.1 +... diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll index 76a613dd58f5..6a1cb68d130e 100644 --- a/test/CodeGen/AMDGPU/min.ll +++ b/test/CodeGen/AMDGPU/min.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_test_imin_sle_i32: diff --git a/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/test/CodeGen/AMDGPU/mubuf-offset-private.ll index 742c4f8af85d..23bd2e4bc823 100644 --- a/test/CodeGen/AMDGPU/mubuf-offset-private.ll +++ b/test/CodeGen/AMDGPU/mubuf-offset-private.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; Test addressing modes when the scratch base is not a frame index. @@ -133,4 +133,23 @@ define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { ret void } +; MUBUF used for stack access has bounds checking enabled before gfx9, +; so a possibly negative base index can't be used for the vgpr offset. + +; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr: +; SICIVI: v_add_{{i|u}}32_e32 [[ADDR0:v[0-9]+]], vcc, 4 +; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR0]] +; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} + +; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4, +; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32 +define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 { + %alloca = alloca [16 x i32], align 4 + %vaddr = load volatile i32, i32 addrspace(1)* undef + %vaddr.off = add i32 %vaddr, 8 + %gep = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %vaddr.off + store volatile i32 9, i32* %gep + ret void +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/multilevel-break.ll b/test/CodeGen/AMDGPU/multilevel-break.ll index 15de689b953e..5b556f12f0d6 100644 --- a/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/test/CodeGen/AMDGPU/multilevel-break.ll @@ -34,7 +34,7 @@ ; GCN-NEXT: s_andn2_b64 exec, exec, [[OR_BREAK]] ; GCN-NEXT: s_cbranch_execnz [[INNER_LOOP]] -; GCN: ; BB#{{[0-9]+}}: ; %Flow1{{$}} +; GCN: ; %bb.{{[0-9]+}}: ; %Flow1{{$}} ; GCN-NEXT: ; in Loop: Header=[[OUTER_LOOP]] Depth=1 ; Ensure copy is eliminated @@ -66,9 +66,10 @@ ENDIF: ; preds = %LOOP ; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop( ; OPT: llvm.amdgcn.break -; OPT: llvm.amdgcn.loop +; OPT: llvm.amdgcn.break ; OPT: llvm.amdgcn.if.break ; OPT: llvm.amdgcn.if.break +; OPT: llvm.amdgcn.loop ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: diff --git a/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 672549c8ea63..ce2e86827ad0 100644 --- a/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -63,8 +63,7 @@ ; GCN-NEXT: s_cbranch_scc1 ; FIXME: Should fold to unconditional branch? -; GCN: s_mov_b64 vcc, -1 -; GCN-NEXT: ; implicit-def +; GCN: ; implicit-def ; GCN: s_cbranch_vccz ; GCN: ds_read_b32 @@ -124,55 +123,100 @@ bb23: ; preds = %bb10 ; Earlier version of above, before a run of the structurizer. ; IR-LABEL: @nested_loop_conditions( -; IR: Flow7: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17) -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15) -; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 -; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 -; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8 +; IR: %tmp1235 = icmp slt i32 %tmp1134, 9 +; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow + +; IR: bb14.lr.ph: +; IR: br label %bb14 + +; IR: Flow3: +; IR: call void @llvm.amdgcn.end.cf(i64 %18) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17) +; IR: %1 = extractvalue { i1, i64 } %0, 0 +; IR: %2 = extractvalue { i1, i64 } %0, 1 +; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 + +; IR: bb4.bb13_crit_edge: +; IR: br label %Flow4 + +; IR: Flow4: +; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ] +; IR: call void @llvm.amdgcn.end.cf(i64 %2) +; IR: br label %Flow + +; IR: bb13: +; IR: br label %bb31 + +; IR: Flow: +; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ] +; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4) +; IR: %6 = extractvalue { i1, i64 } %5, 0 +; IR: %7 = extractvalue { i1, i64 } %5, 1 +; IR: br i1 %6, label %bb13, label %bb31 + +; IR: bb14: +; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ] +; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ] +; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ] +; IR: %tmp15 = icmp eq i32 %tmp1037, 1 +; IR: %8 = xor i1 %tmp15, true +; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8) +; IR: %10 = extractvalue { i1, i64 } %9, 0 +; IR: %11 = extractvalue { i1, i64 } %9, 1 +; IR: br i1 %10, label %bb31.loopexit, label %Flow1 ; IR: Flow1: -; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ] -; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ] -; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ] -; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ] -; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi) -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11) -; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17) -; IR-NEXT: br i1 %18, label %Flow7, label %bb14 +; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11) +; IR: %13 = extractvalue { i1, i64 } %12, 0 +; IR: %14 = extractvalue { i1, i64 } %12, 1 +; IR: br i1 %13, label %bb16, label %Flow2 + +; IR: bb16: +; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32> +; IR: br label %bb18 ; IR: Flow2: -; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ] -; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ] -; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ] -; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ] -; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ] -; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23) -; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0 -; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1 -; IR-NEXT: br i1 %25, label %bb21, label %Flow3 +; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ] +; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ] +; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ] +; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ] +; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi) +; IR: call void @llvm.amdgcn.end.cf(i64 %14) +; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18) +; IR: br i1 %19, label %Flow3, label %bb14 + +; IR: bb18: +; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef +; IR: %tmp20 = icmp slt i32 %tmp19, 9 +; IR: br i1 %tmp20, label %bb21, label %bb18 ; IR: bb21: -; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %27 = xor i1 %tmp12, true -; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken) -; IR-NEXT: br label %Flow3 - -; IR: Flow3: -; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ] -; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ] -; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ] -; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ] -; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ] -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26) -; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4 +; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1 +; IR: %tmp23 = lshr i32 %tmp22, 16 +; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23 +; IR: %tmp25 = uitofp i32 %tmp24 to float +; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000 +; IR: %tmp27 = fsub float %tmp26, undef +; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01 +; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2 +; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29 +; IR: %tmp7 = zext i32 %tmp30 to i64 +; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7 +; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16 +; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0 +; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef +; IR: %tmp12 = icmp slt i32 %tmp11, 9 +; IR: %20 = xor i1 %tmp12, true +; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken) +; IR: br label %Flow2 + +; IR: bb31.loopexit: +; IR: br label %Flow1 ; IR: bb31: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7) -; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef -; IR-NEXT: ret void +; IR: call void @llvm.amdgcn.end.cf(i64 %7) +; IR: store volatile i32 0, i32 addrspace(1)* undef +; IR: ret void ; GCN-LABEL: {{^}}nested_loop_conditions: diff --git a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir index b5dc9d9dac84..24e8ed8e29cb 100644 --- a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -184,8 +184,8 @@ body: | %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -241,8 +241,8 @@ body: | %vgpr0 = V_MOV_B32_e32 4, implicit %exec %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -298,8 +298,8 @@ body: | %vgpr0 = V_MOV_B32_e32 4, implicit %exec %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -359,8 +359,8 @@ body: | BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -384,7 +384,7 @@ body: | # CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc # CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc # CHECK-NEXT: %exec = COPY %sgpr0_sgpr1 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit %exec name: optimize_if_and_saveexec_xor_wrong_reg alignment: 0 exposesReturnsTwice: false @@ -420,8 +420,8 @@ body: | %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term %sgpr0_sgpr1 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 @@ -443,7 +443,7 @@ body: | # CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc # CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc # CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit %exec name: optimize_if_and_saveexec_xor_modify_copy_to_exec alignment: 0 @@ -479,8 +479,8 @@ body: | %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -540,8 +540,8 @@ body: | %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 @@ -565,7 +565,7 @@ body: | # CHECK: %sgpr0_sgpr1 = COPY %exec # CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc # CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit %exec name: optimize_if_unknown_saveexec alignment: 0 @@ -599,8 +599,8 @@ body: | %vgpr0 = V_MOV_B32_e32 4, implicit %exec %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -656,8 +656,8 @@ body: | %vgpr0 = V_MOV_B32_e32 4, implicit %exec %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 @@ -680,7 +680,7 @@ body: | # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} # CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc # CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit %exec name: optimize_if_andn2_saveexec_no_commute alignment: 0 exposesReturnsTwice: false @@ -713,8 +713,8 @@ body: | %vgpr0 = V_MOV_B32_e32 4, implicit %exec %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 bb.1.if: liveins: %sgpr0_sgpr1 diff --git a/test/CodeGen/AMDGPU/pack.v2f16.ll b/test/CodeGen/AMDGPU/pack.v2f16.ll index 82875f8ddcdc..c50d3f7010a4 100644 --- a/test/CodeGen/AMDGPU/pack.v2f16.ll +++ b/test/CodeGen/AMDGPU/pack.v2f16.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s @@ -87,7 +87,7 @@ define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] -; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/pack.v2i16.ll b/test/CodeGen/AMDGPU/pack.v2i16.ll index d211999ada13..343b94b06bd0 100644 --- a/test/CodeGen/AMDGPU/pack.v2i16.ll +++ b/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s @@ -81,7 +81,7 @@ define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] -; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/packed-op-sel.ll b/test/CodeGen/AMDGPU/packed-op-sel.ll index 69675a3351ce..a9d7f43a67d9 100644 --- a/test/CodeGen/AMDGPU/packed-op-sel.ll +++ b/test/CodeGen/AMDGPU/packed-op-sel.ll @@ -233,7 +233,7 @@ bb: ; GCN: ds_read_b32 [[VEC0:v[0-9]+]] ; GCN: ds_read_b32 [[VEC1:v[0-9]+]] ; GCN: ds_read_u16 [[PACKED:v[0-9]+]] -; GCN-NEXT: s_waitcnt +; GCN: s_waitcnt ; GCN: ds_read_u16_d16_hi [[PACKED]] ; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} diff --git a/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll index 7343dd6bbdad..fcf64ce8016d 100644 --- a/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ b/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -1,52 +1,52 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1 ; CHECK-LABEL: @promote_with_memcpy( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memmove( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memmove.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memset( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false) define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memset.p0i8.i32(i8* %alloca.bc, i8 7, i32 68, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false) ret void } diff --git a/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll index 55c2229fb6bd..ebeed0dd4435 100644 --- a/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll +++ b/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll @@ -130,4 +130,4 @@ bb: } attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" } -attributes #1 = { norecurse nounwind } \ No newline at end of file +attributes #1 = { norecurse nounwind } diff --git a/test/CodeGen/AMDGPU/regcoalesce-dbg.mir b/test/CodeGen/AMDGPU/regcoalesce-dbg.mir index c5a9a0ad01ab..69538d8b7382 100644 --- a/test/CodeGen/AMDGPU/regcoalesce-dbg.mir +++ b/test/CodeGen/AMDGPU/regcoalesce-dbg.mir @@ -63,7 +63,7 @@ body: | %19.sub1 = COPY killed %18 %10 = S_MOV_B32 61440 %11 = S_MOV_B32 0 - DBG_VALUE debug-use %11, debug-use _, !1, !8, debug-location !9 + DBG_VALUE debug-use %11, debug-use %noreg, !1, !8, debug-location !9 undef %12.sub0 = COPY killed %11 %12.sub1 = COPY killed %10 undef %13.sub0_sub1 = COPY killed %4 diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll index 7c2e28108df8..f87e8cbea4fc 100644 --- a/test/CodeGen/AMDGPU/ret_jump.ll +++ b/test/CodeGen/AMDGPU/ret_jump.ll @@ -57,7 +57,7 @@ ret.bb: ; preds = %else, %main_body ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable: ; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]] -; GCN: ; BB#{{[0-9]+}}: ; %else +; GCN: ; %bb.{{[0-9]+}}: ; %else ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc ; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AMDGPU/saddo.ll b/test/CodeGen/AMDGPU/saddo.ll index c92ea657be0f..0b52821f72cc 100644 --- a/test/CodeGen/AMDGPU/saddo.ll +++ b/test/CodeGen/AMDGPU/saddo.ll @@ -1,5 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone @@ -49,8 +50,11 @@ define amdgpu_kernel void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_saddo_i64: -; SI: v_add_{{[iu]}}32 -; SI: v_addc_u32 +; SICIVI: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc +; SICIVI: v_addc_u32_e32 v{{[0-9]+}}, vcc + +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4 diff --git a/test/CodeGen/AMDGPU/scalar-branch-missing-and-exec.ll b/test/CodeGen/AMDGPU/scalar-branch-missing-and-exec.ll new file mode 100644 index 000000000000..70ee24f0b22c --- /dev/null +++ b/test/CodeGen/AMDGPU/scalar-branch-missing-and-exec.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; This checks for a bug where uniform control flow can result in multiple +; v_cmp results being combined together with s_and_b64, s_or_b64 and s_xor_b64, +; using the resulting mask in s_cbranch_vccnz +; without ensuring that the resulting mask has bits clear for inactive lanes. +; The problematic case is s_xor_b64, as, unlike the other ops, it can actually +; set bits for inactive lanes. +; +; The check for an s_xor_b64 is just to check that this test tests what it is +; supposed to test. If the s_xor_b64 disappears due to some other case, it does +; not necessarily mean that the bug has reappeared. +; +; The check for "s_and_b64 vcc, exec, something" checks that the bug is fixed. + +; CHECK: {{^}}main: +; CHECK: s_xor_b64 +; CHECK: s_and_b64 vcc, exec, + +define amdgpu_cs void @main(i32 inreg %arg) { +.entry: + %tmp44 = load volatile <2 x float>, <2 x float> addrspace(1)* undef + %tmp16 = load volatile float, float addrspace(1)* undef + %tmp22 = load volatile float, float addrspace(1)* undef + %tmp25 = load volatile float, float addrspace(1)* undef + %tmp31 = fcmp olt float %tmp16, 0x3FA99999A0000000 + br i1 %tmp31, label %bb, label %.exit.thread + +bb: ; preds = %.entry + %tmp42 = fcmp olt float %tmp25, 0x3FA99999A0000000 + br i1 %tmp42, label %bb43, label %.exit.thread + +bb43: + %tmp46 = fcmp olt <2 x float> %tmp44, + %tmp47 = extractelement <2 x i1> %tmp46, i32 0 + %tmp48 = extractelement <2 x i1> %tmp46, i32 1 + %tmp49 = and i1 %tmp47, %tmp48 + br i1 %tmp49, label %bb50, label %.exit.thread + +bb50: + %tmp53 = fcmp olt float %tmp22, 0x3FA99999A0000000 + br i1 %tmp53, label %.exit3.i, label %.exit.thread + +.exit3.i: + store volatile i32 0, i32 addrspace(1)* undef + br label %.exit.thread + +.exit.thread: + ret void +} + diff --git a/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir new file mode 100644 index 000000000000..a89011a0cce0 --- /dev/null +++ b/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -0,0 +1,333 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + %struct.widget.0 = type { float, i32, i32 } + %struct.baz = type { <4 x float>, <4 x float>, <2 x float>, i32, i32 } + %struct.snork = type { float, float, float, i32, float, float, float, float, %struct.spam } + %struct.spam = type { %struct.zot, [16 x i8] } + %struct.zot = type { float, float, float, float, <4 x float> } + %struct.wombat = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [2 x i16], [2 x i16] } + %struct.wombat.1 = type { [4 x i32], [4 x i32], [4 x i32], [4 x i32], i32, i32, i32, i32 } + + @sched_dbg_value_crash.tmp6 = internal unnamed_addr addrspace(3) global [256 x [16 x i8]] undef, align 16 + + define amdgpu_kernel void @sched_dbg_value_crash(i8 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture readonly %arg1, %struct.widget.0 addrspace(1)* nocapture readonly %arg2, %struct.baz addrspace(1)* nocapture readonly %arg3, %struct.snork addrspace(1)* nocapture %arg4) local_unnamed_addr #2 { + bb: + %0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3 + %tmp5 = alloca %struct.wombat, align 16 + %1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + %2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)* + %3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1 + %4 = bitcast i32 addrspace(2)* %3 to <2 x i32> addrspace(2)*, !amdgpu.uniform !3, !amdgpu.noclobber !3 + %5 = load <2 x i32>, <2 x i32> addrspace(2)* %4, align 4, !invariant.load !3 + %6 = extractelement <2 x i32> %5, i32 0 + %7 = extractelement <2 x i32> %5, i32 1 + %8 = lshr i32 %6, 16 + %9 = call i32 @llvm.amdgcn.workitem.id.x(), !range !4 + %10 = call i32 @llvm.amdgcn.workitem.id.y(), !range !4 + %11 = call i32 @llvm.amdgcn.workitem.id.z(), !range !4 + %12 = mul nuw nsw i32 %8, %7 + %13 = mul i32 %12, %9 + %14 = mul nuw nsw i32 %10, %7 + %15 = add i32 %13, %14 + %16 = add i32 %15, %11 + %17 = getelementptr inbounds [256 x [16 x i8]], [256 x [16 x i8]] addrspace(3)* @sched_dbg_value_crash.tmp6, i32 0, i32 %16 + %tmp7 = load i64, i64 addrspace(2)* null, align 536870912 + %tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4 + %tmp9 = zext i32 %tmp8 to i64 + %tmp10 = add i64 %tmp7, %tmp9 + %tmp11 = shl i64 %tmp10, 32 + %tmp12 = ashr exact i64 %tmp11, 32 + %tmp13 = getelementptr inbounds %struct.widget.0, %struct.widget.0 addrspace(1)* %arg2, i64 %tmp12, i32 1 + %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4 + %tmp15 = getelementptr inbounds %struct.baz, %struct.baz addrspace(1)* %arg3, i64 %tmp12, i32 1 + %tmp16 = load <4 x float>, <4 x float> addrspace(1)* %tmp15, align 16 + %tmp17 = sext i32 %tmp14 to i64 + %tmp18 = load i32, i32 addrspace(1)* %0, align 4 + %tmp19 = zext i32 %tmp18 to i64 + %tmp20 = shl nuw nsw i64 %tmp19, 2 + %tmp21 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp20 + %tmp22 = bitcast i8 addrspace(1)* %tmp21 to %struct.wombat.1 addrspace(1)* + %tmp23 = bitcast %struct.wombat* %tmp5 to i8* + call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull %tmp23) #3 + %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 6 + %tmp25 = getelementptr i32, i32 addrspace(1)* %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3 + %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4 + %tmp27 = zext i32 %tmp26 to i64 + %tmp28 = shl nuw nsw i64 %tmp27, 2 + %tmp29 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp28 + %tmp30 = bitcast i8 addrspace(1)* %tmp29 to <2 x float> addrspace(1)* + %tmp31 = getelementptr inbounds %struct.wombat.1, %struct.wombat.1 addrspace(1)* %tmp22, i64 %tmp17, i32 2, i64 0 + %18 = bitcast i32 addrspace(1)* %tmp31 to <3 x i32> addrspace(1)* + %19 = load <3 x i32>, <3 x i32> addrspace(1)* %18, align 4 + %tmp325 = extractelement <3 x i32> %19, i32 0 + %tmp386 = extractelement <3 x i32> %19, i32 1 + %tmp447 = extractelement <3 x i32> %19, i32 2 + %tmp33 = sext i32 %tmp325 to i64 + %tmp34 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp33 + %tmp35 = load <2 x float>, <2 x float> addrspace(1)* %tmp34, align 8 + %tmp36 = extractelement <2 x float> %tmp35, i32 1 + %tmp39 = sext i32 %tmp386 to i64 + %tmp40 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp39 + %tmp41 = load <2 x float>, <2 x float> addrspace(1)* %tmp40, align 8 + %tmp42 = extractelement <2 x float> %tmp41, i32 1 + %tmp45 = sext i32 %tmp447 to i64 + %tmp46 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp45 + %tmp47 = load <2 x float>, <2 x float> addrspace(1)* %tmp46, align 8 + %tmp48 = extractelement <2 x float> %tmp47, i32 1 + %tmp49 = getelementptr i32, i32 addrspace(1)* %arg1, i64 1, !amdgpu.uniform !3, !amdgpu.noclobber !3 + %tmp50 = load i32, i32 addrspace(1)* %tmp49, align 4 + %tmp51 = zext i32 %tmp50 to i64 + %tmp52 = shl nuw nsw i64 %tmp51, 2 + %tmp53 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp52 + %tmp54 = bitcast i8 addrspace(1)* %tmp53 to <4 x float> addrspace(1)* + %tmp55 = getelementptr inbounds %struct.wombat.1, %struct.wombat.1 addrspace(1)* %tmp22, i64 %tmp17, i32 0, i64 0 + %20 = bitcast i32 addrspace(1)* %tmp55 to <2 x i32> addrspace(1)* + %21 = load <2 x i32>, <2 x i32> addrspace(1)* %20, align 4 + %tmp568 = extractelement <2 x i32> %21, i32 0 + %tmp639 = extractelement <2 x i32> %21, i32 1 + %tmp57 = sext i32 %tmp568 to i64 + %tmp58 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp54, i64 %tmp57 + %tmp59 = load <4 x float>, <4 x float> addrspace(1)* %tmp58, align 16 + %tmp60 = extractelement <4 x float> %tmp59, i32 0 + %tmp61 = extractelement <4 x float> %tmp59, i32 1 + %tmp64 = sext i32 %tmp639 to i64 + %tmp65 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp54, i64 %tmp64 + %tmp66 = load <4 x float>, <4 x float> addrspace(1)* %tmp65, align 16 + %tmp67 = extractelement <4 x float> %tmp16, i64 0 + %tmp69 = fsub fast float -0.000000e+00, %tmp67 + %tmp70 = fmul float %tmp67, 0.000000e+00 + %tmp = fmul fast float %tmp67, undef + %tmp71 = fsub fast float %tmp, %tmp70 + %tmp73 = fadd fast float %tmp, undef + %tmp74 = insertelement <4 x float> , float %tmp69, i32 0 + %tmp75 = insertelement <4 x float> %tmp74, float %tmp71, i32 1 + %tmp76 = insertelement <4 x float> %tmp75, float %tmp73, i32 2 + store <4 x float> %tmp76, <4 x float>* %tmp24, align 16 + %tmp77 = fsub float undef, %tmp60 + %tmp78 = fsub float undef, %tmp61 + %tmp79 = extractelement <4 x float> %tmp66, i32 2 + %tmp80 = extractelement <4 x float> %tmp59, i32 2 + %tmp81 = fsub float %tmp79, %tmp80 + %tmp82 = fmul fast float %tmp81, undef + %tmp83 = fmul fast float %tmp78, undef + %tmp84 = fadd fast float %tmp83, %tmp77 + %tmp85 = fadd fast float %tmp84, undef + %tmp86 = fmul float %tmp82, %tmp82 + %tmp87 = fdiv float 1.000000e+00, %tmp86 + tail call void @llvm.dbg.value(metadata float %tmp87, metadata !5, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !8 + %tmp88 = fmul float %tmp82, 0.000000e+00 + %tmp89 = fsub fast float %tmp85, %tmp88 + %tmp90 = fdiv float %tmp89, %tmp86 + %tmp91 = fsub float 1.000000e+00, %tmp87 + %tmp92 = fsub float %tmp91, %tmp90 + %tmp93 = fmul float %tmp42, %tmp87 + %tmp94 = call float @llvm.fmuladd.f32(float %tmp92, float %tmp36, float %tmp93) + %tmp95 = call float @llvm.fmuladd.f32(float %tmp48, float undef, float %tmp94) + %tmp96 = fsub float extractelement (<2 x float> fadd (<2 x float> fmul (<2 x float> undef, <2 x float> undef), <2 x float> undef), i64 1), %tmp95 + %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 8, i32 1 + call void @func(float %tmp96, i64 0, i16* nonnull %tmp97) #3 + %tmp984 = bitcast [16 x i8] addrspace(3)* %17 to i8 addrspace(3)* + %tmp99 = getelementptr inbounds %struct.snork, %struct.snork addrspace(1)* %arg4, i64 %tmp12, i32 8, i32 1, i64 0 + call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %tmp99, i8 addrspace(3)* %tmp984, i64 16, i32 16, i1 false) + call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull %tmp23) #3 + ret void + } + + declare void @func(float, i64, i16*) + declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 + declare float @llvm.fmuladd.f32(float, float, float) #1 + declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 + declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 + declare i32 @llvm.amdgcn.workitem.id.x() #1 + declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1 + declare i32 @llvm.amdgcn.workitem.id.y() #1 + declare i32 @llvm.amdgcn.workitem.id.z() #1 + declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 + declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0 + + attributes #0 = { argmemonly nounwind } + attributes #1 = { nounwind readnone speculatable } + attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" "target-features"="+fp32-denormals" } + attributes #3 = { nounwind } + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!2} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) + !1 = !DIFile(filename: "foo.cl", directory: "/dev/null") + !2 = !{i32 2, !"Debug Info Version", i32 3} + !3 = !{} + !4 = !{i32 0, i32 256} + !5 = !DILocalVariable(name: "bar", scope: !6, file: !1, line: 102, type: !7) + !6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 81, isLocal: false, isDefinition: true, scopeLine: 86, flags: DIFlagPrototyped, isOptimized: true, unit: !0) + !7 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) + !8 = !DILocation(line: 102, column: 8, scope: !6) + +... +--- + +# CHECK: name: sched_dbg_value_crash +# CHECK: DBG_VALUE debug-use %99, debug-use %noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8 + +name: sched_dbg_value_crash +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0', virtual-reg: '%0' } + - { reg: '%vgpr1', virtual-reg: '%1' } + - { reg: '%vgpr2', virtual-reg: '%2' } + - { reg: '%sgpr4_sgpr5', virtual-reg: '%3' } + - { reg: '%sgpr6_sgpr7', virtual-reg: '%4' } +fixedStack: +stack: + - { id: 0, name: tmp5, type: default, offset: 0, size: 128, alignment: 16, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: 0, di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.0.bb: + liveins: %vgpr0, %vgpr1, %vgpr2, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr32, %sgpr101 + + %4:sgpr_64 = COPY %sgpr6_sgpr7 + %3:sgpr_64 = COPY %sgpr4_sgpr5 + %2:vgpr_32 = COPY %vgpr2 + %1:vgpr_32 = COPY %vgpr1 + %0:vgpr_32 = COPY %vgpr0 + %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0 + %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0 + %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0 + %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead %scc + %12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1 + %13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit %exec + %14:vgpr_32 = V_MUL_LO_I32 %1, %10.sub1, implicit %exec + %15:vgpr_32 = V_ADD_I32_e32 0, %13, implicit-def dead %vcc, implicit %exec + %16:vgpr_32 = V_ADD_I32_e32 0, %15, implicit-def dead %vcc, implicit %exec + %17:vgpr_32 = IMPLICIT_DEF + %18:sreg_64 = S_MOV_B64 0 + %19:sreg_32_xm0_xexec = IMPLICIT_DEF + %20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead %vcc, implicit %exec + %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit %exec + %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, implicit %exec + %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit %exec + %26:vreg_128 = IMPLICIT_DEF + undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0 + %27.sub1:sreg_64_xexec = S_MOV_B32 0 + %28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead %scc + undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def %scc + %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead %scc, implicit killed %scc + undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0 + %27.sub0:sreg_64_xexec = IMPLICIT_DEF + %31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead %scc + %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def %scc + %33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead %scc, implicit killed %scc + %34:vgpr_32 = IMPLICIT_DEF + %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32 %23, %34, 0, 0, implicit %exec + %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, implicit %exec + undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit %exec + %38.sub0:vreg_64 = COPY %37.sub0 + %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit %exec + undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, implicit %exec + %42:vgpr_32 = COPY %33 + %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, implicit %exec + %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, implicit %exec :: (load 8 from %ir.tmp34) + undef %45.sub1:vreg_64 = IMPLICIT_DEF + %45.sub0:vreg_64 = COPY %37.sub1 + %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit %exec + undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_I32_e64 %32, %46.sub0, implicit %exec + %49:vgpr_32 = COPY %33 + %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, implicit %exec + %51:vreg_64 = IMPLICIT_DEF + undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, implicit %exec :: (load 4 from %ir.18 + 8) + %52.sub1:vreg_64 = IMPLICIT_DEF + %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit %exec + undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, implicit %exec + %56:vgpr_32 = COPY %33 + %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, implicit %exec + %58:vreg_64 = IMPLICIT_DEF + %30.sub1:sreg_64_xexec = IMPLICIT_DEF + %59:sreg_64 = IMPLICIT_DEF + %60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def %scc + %61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead %scc, implicit killed %scc + %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, implicit %exec :: (load 8 from %ir.20, align 4) + undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit %exec + %63.sub0:vreg_64 = COPY %62.sub0 + %64:vreg_64 = IMPLICIT_DEF + undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, implicit %exec + %67:vgpr_32 = COPY %61 + %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, implicit %exec + %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, implicit %exec :: (load 16 from %ir.tmp58) + undef %70.sub1:vreg_64 = IMPLICIT_DEF + %70.sub0:vreg_64 = IMPLICIT_DEF + %71:vreg_64 = IMPLICIT_DEF + undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, implicit %exec + %74:vgpr_32 = COPY %61 + %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, implicit %exec + %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, implicit %exec + %77:vgpr_32 = IMPLICIT_DEF + %78:vgpr_32 = IMPLICIT_DEF + %79:vgpr_32 = V_MUL_F32_e32 0, %77, implicit %exec + %80:vgpr_32 = IMPLICIT_DEF + %81:vgpr_32 = IMPLICIT_DEF + %84:vgpr_32 = IMPLICIT_DEF + BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr101, 108, 0, 0, 0, implicit %exec + BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr101, 104, 0, 0, 0, implicit %exec + BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr101, 100, 0, 0, 0, implicit %exec + BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr101, 96, 0, 0, 0, implicit %exec + %85:vgpr_32 = IMPLICIT_DEF + %86:vgpr_32 = IMPLICIT_DEF + %87:vgpr_32 = IMPLICIT_DEF + %88:vgpr_32 = IMPLICIT_DEF + %90:vgpr_32 = IMPLICIT_DEF + %91:vgpr_32, dead %92:sreg_64 = V_DIV_SCALE_F32 %90, %90, 1065353216, implicit %exec + %95:vgpr_32 = V_FMA_F32 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit %exec + %96:vgpr_32, %97:sreg_64 = V_DIV_SCALE_F32 1065353216, %90, 1065353216, implicit %exec + %98:vgpr_32 = IMPLICIT_DEF + %99:vgpr_32 = IMPLICIT_DEF + %100:vgpr_32 = IMPLICIT_DEF + %101:vgpr_32 = IMPLICIT_DEF + %102:vgpr_32 = IMPLICIT_DEF + %103:vgpr_32 = IMPLICIT_DEF + %104:vgpr_32 = IMPLICIT_DEF + %105:vgpr_32 = IMPLICIT_DEF + %106:vgpr_32, dead %107:sreg_64 = V_DIV_SCALE_F32 %90, %90, %105, implicit %exec + %108:vgpr_32 = V_RCP_F32_e32 0, implicit %exec + %109:vgpr_32 = IMPLICIT_DEF + %110:vgpr_32 = V_FMA_F32 0, 0, 0, 0, 0, 0, 0, 0, implicit %exec + %111:vgpr_32, %112:sreg_64 = V_DIV_SCALE_F32 0, 0, 0, implicit %exec + %113:vgpr_32 = V_MUL_F32_e32 0, %110, implicit %exec + %114:vgpr_32 = IMPLICIT_DEF + %115:vgpr_32 = IMPLICIT_DEF + %116:vgpr_32 = IMPLICIT_DEF + %vcc = IMPLICIT_DEF + %117:vgpr_32 = V_DIV_FMAS_F32 0, %116, 0, %110, 0, %115, 0, 0, implicit killed %vcc, implicit %exec + %118:vgpr_32 = V_DIV_FIXUP_F32 0, %117, 0, %90, 0, %105, 0, 0, implicit %exec + %119:vgpr_32 = IMPLICIT_DEF + %120:vgpr_32 = IMPLICIT_DEF + %121:vgpr_32 = IMPLICIT_DEF + %122:vgpr_32 = IMPLICIT_DEF + %123:vgpr_32 = IMPLICIT_DEF + %124:vgpr_32 = IMPLICIT_DEF + %125:vgpr_32 = IMPLICIT_DEF + %126:vgpr_32 = IMPLICIT_DEF + DBG_VALUE debug-use %103, debug-use _, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8 + ADJCALLSTACKUP 0, 0, implicit-def %sgpr32, implicit %sgpr32 + %127:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead %scc + %sgpr4 = COPY %sgpr101 + %vgpr0 = COPY %124 + %vgpr1_vgpr2 = IMPLICIT_DEF + %vgpr3 = COPY %126 + dead %sgpr30_sgpr31 = SI_CALL %127, @func, csr_amdgpu_highregs, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr4, implicit %vgpr0, implicit %vgpr1_vgpr2, implicit killed %vgpr3 + ADJCALLSTACKDOWN 0, 0, implicit-def %sgpr32, implicit %sgpr32 + %128:vreg_64, dead %129:sreg_64 = V_MAD_I64_I32 %20, %34, 0, 0, implicit %exec + S_ENDPGM + +... diff --git a/test/CodeGen/AMDGPU/schedule-regpressure.mir b/test/CodeGen/AMDGPU/schedule-regpressure.mir index 3a20ec732e5b..afc2fab08f87 100644 --- a/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ b/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -4,7 +4,7 @@ # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use # CHECK: ScheduleDAGMILive::schedule starting -# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0, %EXEC +# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} implicit %m0, implicit %exec # CHECK: Pressure Diff : {{$}} # CHECK: SU({{.*}} DS_WRITE_B32 diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll index 5e0178072e5e..238bcc54928f 100644 --- a/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/test/CodeGen/AMDGPU/scratch-simple.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=gfx804 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx803 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s ; This used to fail due to a v_add_i32 instruction with an illegal immediate ; operand that was created during Local Stack Slot Allocation. Test case derived diff --git a/test/CodeGen/AMDGPU/sdiv.ll b/test/CodeGen/AMDGPU/sdiv.ll index f75bec411d28..b79bca54bbdd 100644 --- a/test/CodeGen/AMDGPU/sdiv.ll +++ b/test/CodeGen/AMDGPU/sdiv.ll @@ -1,5 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; The code generated by sdiv is long and complex and may frequently change. diff --git a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir index 77c231c584a2..0d1534e3f4e8 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -148,13 +148,13 @@ body: | # GCN-LABEL: {{^}}name: vop2_instructions -# VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec # VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec # VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec # VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec # VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec # GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll index d7d2c43e6cf0..de5229e0550a 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -5,9 +5,10 @@ ; GCN-LABEL: {{^}}add_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} ; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa +; NOSDWA-NOT: v_add_{{(_co)?}}_u32_sdwa -; SDWA: v_add_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9: v_add_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -20,10 +21,10 @@ define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* ; GCN-LABEL: {{^}}sub_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} ; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_subrev_{{[_cou]*}}32_sdwa - -; SDWA: v_subrev_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; NOSDWA-NOT: v_subrev_{{(_co)?}}_u32_sdwa +; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9: v_sub_u32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 %shr = lshr i32 %a, 16 @@ -426,7 +427,7 @@ entry: } ; GCN-LABEL: {{^}}add_bb_v2i16: -; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa +; NOSDWA-NOT: v_add_{{(_co)?}}_u32_sdwa ; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 @@ -496,3 +497,26 @@ entry: store <8 x i8> %tmp19, <8 x i8> addrspace(1)* %arrayidx5, align 8 ret void } + +; GCN-LABEL: {{^}}sdwa_crash_inlineasm_de +; GCN: s_mov_b32 s{{[0-9]+}}, 0xffff +; GCN: v_and_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x10000, +define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %tmp = phi <2 x i32> [ %tmp12, %bb11 ], [ undef, %bb ] + br i1 true, label %bb2, label %bb11 + +bb2: ; preds = %bb1 + %tmp3 = call i32 asm "v_and_b32_e32 $0, $1, $2", "=v,s,v"(i32 65535, i32 undef) #1 + %tmp5 = or i32 %tmp3, 65536 + %tmp6 = insertelement <2 x i32> %tmp, i32 %tmp5, i64 0 + br label %bb11 + +bb11: ; preds = %bb10, %bb2 + %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ] + br label %bb1 +} diff --git a/test/CodeGen/AMDGPU/sdwa-preserve.mir b/test/CodeGen/AMDGPU/sdwa-preserve.mir new file mode 100644 index 000000000000..99a000cbd39a --- /dev/null +++ b/test/CodeGen/AMDGPU/sdwa-preserve.mir @@ -0,0 +1,56 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s + +# SDWA-LABEL: {{^}}add_f16_u32_preserve + +# SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] +# SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] + +# SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3 +# SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1 + +# SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]] + +--- +name: add_f16_u32_preserve +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: sreg_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: vgpr_32 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vgpr_32 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 + + %2 = COPY %sgpr30_sgpr31 + %1 = COPY %vgpr2_vgpr3 + %0 = COPY %vgpr0_vgpr1 + %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + + %5 = V_AND_B32_e32 65535, %3, implicit %exec + %6 = V_LSHRREV_B32_e64 16, %4, implicit %exec + %7 = V_BFE_U32 %3, 8, 8, implicit %exec + %8 = V_LSHRREV_B32_e32 24, %4, implicit %exec + + %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit %exec + %10 = V_LSHLREV_B16_e64 8, %9, implicit %exec + %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit %exec + %12 = V_LSHLREV_B32_e64 16, %11, implicit %exec + + %13 = V_OR_B32_e64 %10, %12, implicit %exec + + FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4) + %sgpr30_sgpr31 = COPY %2 + S_SETPC_B64_return %sgpr30_sgpr31 diff --git a/test/CodeGen/AMDGPU/select-opt.ll b/test/CodeGen/AMDGPU/select-opt.ll index d56b952118b5..540eb9ca93b2 100644 --- a/test/CodeGen/AMDGPU/select-opt.ll +++ b/test/CodeGen/AMDGPU/select-opt.ll @@ -134,8 +134,8 @@ define amdgpu_kernel void @opt_select_i64_or_cmp_f32(i64 addrspace(1)* %out, flo } ; GCN-LABEL: {{^}}regression: -; GCN: v_cmp_neq_f32_e64 vcc -; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0 +; GCN: v_cmp_neq_f32_e64 +; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0 ; GCN: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} define amdgpu_kernel void @regression(float addrspace(1)* %out, float %c0, float %c1) #0 { diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index d3f33e896784..4cf284630c22 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FIXME: i16 promotion pass ruins the scalar cases when legal. diff --git a/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/test/CodeGen/AMDGPU/sgpr-control-flow.ll index 8e18ab5554e4..575938b5a5cb 100644 --- a/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -37,7 +37,7 @@ endif: ; SI: s_cmp_lg_u32 ; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]] -; SI: ; BB#1: ; %else +; SI: ; %bb.1: ; %else ; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe ; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf ; SI-NOT: add diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll index 6bbf9363888f..157808b39651 100644 --- a/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/shrink-carry.mir b/test/CodeGen/AMDGPU/shrink-carry.mir index cf000ffa7747..d499b2192e97 100644 --- a/test/CodeGen/AMDGPU/shrink-carry.mir +++ b/test/CodeGen/AMDGPU/shrink-carry.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -verify-machineinstrs -start-before si-shrink-instructions -stop-before si-insert-skips -o - %s | FileCheck -check-prefix=GCN %s # GCN-LABEL: name: subbrev{{$}} -# GCN: V_SUBBREV_U32_e64 0, undef %vgpr0, killed %vcc, implicit %exec +# GCN: V_SUBBREV_U32_e64 0, undef %vgpr0, killed renamable %vcc, implicit %exec --- name: subbrev @@ -25,7 +25,7 @@ body: | ... # GCN-LABEL: name: subb{{$}} -# GCN: V_SUBB_U32_e64 undef %vgpr0, 0, killed %vcc, implicit %exec +# GCN: V_SUBB_U32_e64 undef %vgpr0, 0, killed renamable %vcc, implicit %exec --- name: subb @@ -49,7 +49,7 @@ body: | ... # GCN-LABEL: name: addc{{$}} -# GCN: V_ADDC_U32_e32 0, undef %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec +# GCN: V_ADDC_U32_e32 0, undef renamable %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec --- name: addc @@ -73,7 +73,7 @@ body: | ... # GCN-LABEL: name: addc2{{$}} -# GCN: V_ADDC_U32_e32 0, undef %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec +# GCN: V_ADDC_U32_e32 0, undef renamable %vgpr0, implicit-def %vcc, implicit killed %vcc, implicit %exec --- name: addc2 diff --git a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll index 7423a4a27538..ce85a6663404 100644 --- a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -55,7 +55,7 @@ unreachable: ; GCN: s_cmp_lg_u32 ; GCN: s_cbranch_scc0 [[UNREACHABLE:BB[0-9]+_[0-9]+]] -; GCN-NEXT: BB#{{[0-9]+}}: ; %ret +; GCN-NEXT: %bb.{{[0-9]+}}: ; %ret ; GCN-NEXT: s_endpgm ; GCN: [[UNREACHABLE]]: diff --git a/test/CodeGen/AMDGPU/sibling-call.ll b/test/CodeGen/AMDGPU/sibling-call.ll index 35254130cad9..f7e8a1d80e9b 100644 --- a/test/CodeGen/AMDGPU/sibling-call.ll +++ b/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,11 +1,13 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s target datalayout = "A5" +; FIXME: Why is this commuted only sometimes? ; GCN-LABEL: {{^}}i32_fastcc_i32_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { %add0 = add i32 %arg0, %arg1 @@ -14,7 +16,8 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN: s_mov_b32 s5, s32 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24 ; GCN: s_waitcnt vmcnt(0) @@ -84,7 +87,10 @@ entry: ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 + +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 + ; GCN-NEXT: s_setpc_b64 s[30:31] define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval align 4 %arg1) #1 { %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 @@ -123,9 +129,16 @@ entry: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4 ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0 + +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_0]], v0 +; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_1]], v0 + + +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9: v_add_u32_e32 v0, v0, [[LOAD_0]] +; GFX9: v_add_u32_e32 v0, v0, [[LOAD_1]] + ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { %val_firststack = extractvalue [32 x i32] %large, 30 diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index ed7e06ee4e24..54fa93ae9c8e 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { call void @llvm.AMDGPU.kill(float 0.0) @@ -9,9 +9,9 @@ define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { } ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: s_mov_b64 exec, 0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { call void @llvm.AMDGPU.kill(float -0.0) @@ -20,11 +20,11 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { ; FIXME: Ideally only one would be emitted ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: s_mov_b64 exec, 0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_mov_b64 exec, 0 -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { call void @llvm.AMDGPU.kill(float -0.0) @@ -33,9 +33,9 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { } ; CHECK-LABEL: {{^}}test_kill_depth_var: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) @@ -44,11 +44,11 @@ define amdgpu_ps void @test_kill_depth_var(float %x) #0 { ; FIXME: Ideally only one would be emitted ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) @@ -57,11 +57,11 @@ define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { } ; CHECK-LABEL: {{^}}test_kill_depth_var_x2: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { call void @llvm.AMDGPU.kill(float %x) @@ -70,12 +70,12 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { } ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: -; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: ; %bb.0: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK: v_mov_b32_e64 v7, -1 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) @@ -90,7 +90,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { ; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 ; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] -; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: ; %bb.1: ; CHECK: v_mov_b32_e64 v7, -1 ; CHECK: v_nop_e64 ; CHECK: v_nop_e64 @@ -105,7 +105,7 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm @@ -141,7 +141,7 @@ exit: ; CHECK-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] -; CHECK-NEXT: ; BB#1: ; %bb +; CHECK-NEXT: ; %bb.1: ; %bb ; CHECK: v_mov_b32_e64 v7, -1 ; CHECK: v_nop_e64 ; CHECK: v_nop_e64 @@ -157,7 +157,7 @@ exit: ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] -; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: exp null off, off, off, off done vm ; CHECK-NEXT: s_endpgm @@ -215,7 +215,7 @@ exit: ; CHECK: v_nop_e64 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 -; CHECK-NEXT: ; BB#3: +; CHECK-NEXT: ; %bb.3: ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] ; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]] ; CHECK-NEXT: s_and_b64 vcc, exec, vcc @@ -267,7 +267,7 @@ exit: ; CHECK: [[PHIBB]]: ; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] -; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] +; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb10 ; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 @@ -302,14 +302,14 @@ end: ; CHECK-LABEL: {{^}}no_skip_no_successors: ; CHECK: v_cmp_nge_f32 -; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] +; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] ; CHECK: ; %bb6 ; CHECK: s_mov_b64 exec, 0 ; CHECK: [[SKIPKILL]]: ; CHECK: v_cmp_nge_f32_e32 vcc -; CHECK-NEXT: BB#3: ; %bb5 +; CHECK: %bb.3: ; %bb5 ; CHECK-NEXT: .Lfunc_end{{[0-9]+}} define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { bb: @@ -335,7 +335,7 @@ bb7: ; preds = %bb4 } ; CHECK-LABEL: {{^}}if_after_kill_block: -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK: s_and_saveexec_b64 ; CHECK: s_xor_b64 ; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AMDGPU/smed3.ll b/test/CodeGen/AMDGPU/smed3.ll index 8665ab697265..9e10f049c607 100644 --- a/test/CodeGen/AMDGPU/smed3.ll +++ b/test/CodeGen/AMDGPU/smed3.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll index 10b85d3a94f9..96a318fef024 100644 --- a/test/CodeGen/AMDGPU/sminmax.ll +++ b/test/CodeGen/AMDGPU/sminmax.ll @@ -1,6 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}s_abs_i32: ; GCN: s_abs_i32 @@ -17,9 +18,13 @@ define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind } ; FUNC-LABEL: {{^}}v_abs_i32: -; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] + ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]] -; GCN: v_add_{{[iu]}}32 + +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2 ; EG: MAX_INT define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -33,7 +38,8 @@ define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % } ; GCN-LABEL: {{^}}v_abs_i32_repeat_user: -; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; SIVI: v_sub_{{i|u}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GFX9: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] ; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]] define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -68,14 +74,20 @@ define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: {{^}}v_abs_v2i32: -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] + +; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] +; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] -; GCN: v_add_{{[iu]}}32 -; GCN: v_add_{{[iu]}}32 +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc + +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, ; EG: MAX_INT ; EG: MAX_INT @@ -127,20 +139,31 @@ define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: {{^}}v_abs_v4i32: -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] -; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] + +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] +; SIVI-DAG: v_sub_{{i|u}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] + +; GFX9-DAG: v_sub_u32_e32 [[NEG0:v[0-9]+]], 0, [[SRC0:v[0-9]+]] +; GFX9-DAG: v_sub_u32_e32 [[NEG1:v[0-9]+]], 0, [[SRC1:v[0-9]+]] +; GFX9-DAG: v_sub_u32_e32 [[NEG2:v[0-9]+]], 0, [[SRC2:v[0-9]+]] +; GFX9-DAG: v_sub_u32_e32 [[NEG3:v[0-9]+]], 0, [[SRC3:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]] -; GCN: v_add_{{[iu]}}32 -; GCN: v_add_{{[iu]}}32 -; GCN: v_add_{{[iu]}}32 -; GCN: v_add_{{[iu]}}32 +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, +; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, + +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 2, ; EG: MAX_INT ; EG: MAX_INT @@ -181,8 +204,8 @@ define amdgpu_kernel void @s_min_max_i32(i32 addrspace(1)* %out0, i32 addrspace( } ; FUNC-LABEL: {{^}}v_min_max_i32: -; GCN: {{buffer|flat}}_load_dword [[VAL0:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[VAL1:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[VAL0:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[VAL1:v[0-9]+]] ; GCN-DAG: v_min_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] ; GCN-DAG: v_max_i32_e32 v{{[0-9]+}}, [[VAL0]], [[VAL1]] diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index afa273bb7b47..ffad4f6a4969 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s diff --git a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll index 333113e8a9b6..0eaa28b39bc1 100644 --- a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -4,7 +4,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 -; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}} ; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc ; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index 5220c26803c3..9fd20fd67b8c 100644 --- a/test/CodeGen/AMDGPU/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll @@ -193,8 +193,12 @@ main_body: } ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm: -; GCN-NEXT: BB# -; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; +; GCN-NEXT: %bb. + +; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; + +; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0 +; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ; define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: %off = add i32 %offset, 4095 @@ -203,8 +207,8 @@ main_body: } ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large: -; GCN-NEXT: BB# -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 0x1000, v0 +; GCN-NEXT: %bb. +; GCN-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0 ; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: @@ -214,7 +218,7 @@ main_body: } ; GCN-LABEL: {{^}}smrd_imm_merged: -; GCN-NEXT: BB# +; GCN-NEXT: %bb. ; SICI-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x1 ; SICI-NEXT: s_buffer_load_dwordx2 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x7 ; VI-NEXT: s_buffer_load_dwordx4 s[{{[0-9]}}:{{[0-9]}}], s[0:3], 0x4 @@ -239,9 +243,17 @@ main_body: } ; GCN-LABEL: {{^}}smrd_vgpr_merged: -; GCN-NEXT: BB# -; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 -; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 +; GCN-NEXT: %bb. + +; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 +; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 + +; GFX9: buffer_load_dword +; GFX9: buffer_load_dword +; GFX9: buffer_load_dword +; GFX9: buffer_load_dword +; GFX9: buffer_load_dword +; GFX9: buffer_load_dword define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 { main_body: %a1 = add i32 %a, 4 diff --git a/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir index aceac34f286a..1e9b6b5dd8d2 100644 --- a/test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -2,7 +2,7 @@ # https://bugs.llvm.org/show_bug.cgi?id=33620 --- -# This would assert due to the empty live interval created for %vreg9 +# This would assert due to the empty live interval created for %9 # on the last S_NOP with an undef subreg use. # CHECK-LABEL: name: expecting_non_empty_interval diff --git a/test/CodeGen/AMDGPU/splitkit.mir b/test/CodeGen/AMDGPU/splitkit.mir index 41782af40e3c..45a9c41c3815 100644 --- a/test/CodeGen/AMDGPU/splitkit.mir +++ b/test/CodeGen/AMDGPU/splitkit.mir @@ -37,13 +37,13 @@ body: | # CHECK: [[REG0:%sgpr[0-9]+]] = COPY %sgpr0 # CHECK: [[REG1:%sgpr[0-9]+]] = COPY %sgpr2 # CHECK: S_NOP 0 -# CHECK: S_NOP 0, implicit [[REG0]] -# CHECK: S_NOP 0, implicit [[REG1]] -# CHECK: %sgpr0 = COPY [[REG0]] -# CHECK: %sgpr2 = COPY [[REG1]] +# CHECK: S_NOP 0, implicit renamable [[REG0]] +# CHECK: S_NOP 0, implicit renamable [[REG1]] +# CHECK: %sgpr0 = COPY renamable [[REG0]] +# CHECK: %sgpr2 = COPY renamable [[REG1]] # CHECK: S_NOP -# CHECK: S_NOP 0, implicit %sgpr0 -# CHECK: S_NOP 0, implicit %sgpr2 +# CHECK: S_NOP 0, implicit renamable %sgpr0 +# CHECK: S_NOP 0, implicit renamable %sgpr2 name: func1 tracksRegLiveness: true body: | @@ -67,8 +67,8 @@ body: | # Check that copy hoisting out of loops works. This mainly should not crash the # compiler when it hoists a subreg copy sequence. # CHECK-LABEL: name: splitHoist -# CHECK: S_NOP 0, implicit-def %sgpr0 -# CHECK: S_NOP 0, implicit-def %sgpr3 +# CHECK: S_NOP 0, implicit-def renamable %sgpr0 +# CHECK: S_NOP 0, implicit-def renamable %sgpr3 # CHECK-NEXT: SI_SPILL_S128_SAVE name: splitHoist tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/ssubo.ll b/test/CodeGen/AMDGPU/ssubo.ll index d4b22d605503..fee14b48b44a 100644 --- a/test/CodeGen/AMDGPU/ssubo.ll +++ b/test/CodeGen/AMDGPU/ssubo.ll @@ -1,5 +1,6 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs< %s declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone @@ -39,8 +40,8 @@ define amdgpu_kernel void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}s_ssubo_i64: -; SI: s_sub_u32 -; SI: s_subb_u32 +; GCN: s_sub_u32 +; GCN: s_subb_u32 define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %ssub, 0 @@ -51,8 +52,14 @@ define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_ssubo_i64: -; SI: v_sub_{{[iu]}}32_e32 -; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4 diff --git a/test/CodeGen/AMDGPU/stack-size-overflow.ll b/test/CodeGen/AMDGPU/stack-size-overflow.ll index 45a399b058cc..322e5ca62199 100644 --- a/test/CodeGen/AMDGPU/stack-size-overflow.ll +++ b/test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -1,7 +1,7 @@ ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1 ; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit ; GCN: ; ScratchSize: 4294967296 @@ -9,6 +9,6 @@ define amdgpu_kernel void @stack_size_limit() #0 { entry: %alloca = alloca [1073741823 x i32], align 4 %bc = bitcast [1073741823 x i32]* %alloca to i8* - call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i32 1, i1 true) + call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true) ret void } diff --git a/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir b/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir index b41e6ac6fd50..d5bf6a1eb8c9 100644 --- a/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir +++ b/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir @@ -9,10 +9,10 @@ # CHECK: - { id: 1, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, # CHECK-NEXT: stack-id: 1, -# CHECK: SI_SPILL_V32_SAVE killed %vgpr0, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 4 into %stack.0) +# CHECK: SI_SPILL_V32_SAVE killed renamable %vgpr0, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 4 into %stack.0) # CHECK: %vgpr0 = SI_SPILL_V32_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 4 from %stack.0) -# CHECK: SI_SPILL_S32_SAVE killed %sgpr6, %stack.1, implicit %exec, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr5, implicit-def dead %m0 :: (store 4 into %stack.1) +# CHECK: SI_SPILL_S32_SAVE killed renamable %sgpr6, %stack.1, implicit %exec, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr5, implicit-def dead %m0 :: (store 4 into %stack.1) # CHECK: %sgpr6 = SI_SPILL_S32_RESTORE %stack.1, implicit %exec, implicit %sgpr0_sgpr1_sgpr2_sgpr3, implicit %sgpr5, implicit-def dead %m0 :: (load 4 from %stack.1) name: no_merge_sgpr_vgpr_spill_slot diff --git a/test/CodeGen/AMDGPU/store-hi16.ll b/test/CodeGen/AMDGPU/store-hi16.ll index 85cc00ad93d6..d988ea3549c1 100644 --- a/test/CodeGen/AMDGPU/store-hi16.ll +++ b/test/CodeGen/AMDGPU/store-hi16.ll @@ -289,7 +289,7 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset: ; GCN: s_waitcnt -; GCN: v_add_{{[_cou]*}}32_e32 +; GCN: v_add{{(_co)?}}_{{i|u}}32_e32 ; VI: v_addc_u32_e32 ; GFX9: v_addc_co_u32_e32 @@ -328,7 +328,7 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset: ; GCN: s_waitcnt -; GCN-DAG: v_add_{{[_cou]*}}32_e32 +; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32 ; VI-DAG: v_addc_u32_e32 ; GFX9-DAG: v_addc_co_u32_e32 diff --git a/test/CodeGen/AMDGPU/store-local.ll b/test/CodeGen/AMDGPU/store-local.ll index 53fc250bc84d..96d5e06a9e96 100644 --- a/test/CodeGen/AMDGPU/store-local.ll +++ b/test/CodeGen/AMDGPU/store-local.ll @@ -1,9 +1,13 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s ; FUNC-LABEL: {{^}}store_local_i1: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_BYTE_WRITE ; CM: LDS_BYTE_WRITE @@ -16,6 +20,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_i8: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_BYTE_WRITE ; CM: LDS_BYTE_WRITE @@ -27,6 +34,9 @@ define amdgpu_kernel void @store_local_i8(i8 addrspace(3)* %out, i8 %in) { } ; FUNC-LABEL: {{^}}store_local_i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_SHORT_WRITE ; CM: LDS_SHORT_WRITE @@ -38,6 +48,9 @@ define amdgpu_kernel void @store_local_i16(i16 addrspace(3)* %out, i16 %in) { } ; FUNC-LABEL: {{^}}store_local_v2i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRITE ; CM: LDS_WRITE @@ -50,6 +63,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v4i8: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRITE ; CM: LDS_WRITE @@ -62,6 +78,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v4i8_unaligned: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_BYTE_WRITE ; EG: LDS_BYTE_WRITE ; EG: LDS_BYTE_WRITE @@ -85,6 +104,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v4i8_halfaligned: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_SHORT_WRITE ; EG: LDS_SHORT_WRITE ; EG-NOT: LDS_WRITE @@ -102,6 +124,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v2i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRITE ; EG: LDS_WRITE ; EG-NOT: LDS_WRITE @@ -118,6 +143,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v4i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRITE ; EG: LDS_WRITE ; EG: LDS_WRITE @@ -136,6 +164,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_v4i32_align4: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRITE ; EG: LDS_WRITE ; EG: LDS_WRITE @@ -155,6 +186,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_i64_i8: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_BYTE_WRITE ; GCN: ds_write_b8 define amdgpu_kernel void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) { @@ -165,6 +199,9 @@ entry: } ; FUNC-LABEL: {{^}}store_local_i64_i16: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_SHORT_WRITE ; GCN: ds_write_b16 define amdgpu_kernel void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) { diff --git a/test/CodeGen/AMDGPU/store-weird-sizes.ll b/test/CodeGen/AMDGPU/store-weird-sizes.ll new file mode 100644 index 000000000000..fd82dccb0fac --- /dev/null +++ b/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}local_store_i56: +; GCN-DAG: ds_write_b8 v0, v{{[0-9]+}} offset:6 +; GCN-DAG: ds_write_b16 v0, v{{[0-9]+}} offset:4 +; GCN-DAG: ds_write_b32 v0, v{{[0-9]+$}} +define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 { + store i56 %arg, i56 addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: {{^}}local_store_i55: +; GCN-DAG: ds_write_b8 v0, v{{[0-9]+}} offset:6 +; GCN-DAG: ds_write_b16 v0, v{{[0-9]+}} offset:4 +; GCN-DAG: ds_write_b32 v0, v{{[0-9]+$}} +define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 { + store i55 %arg, i55 addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: {{^}}local_store_i48: +; GCN-DAG: ds_write_b16 v0, v{{[0-9]+}} offset:4 +; GCN-DAG: ds_write_b32 v0, v{{[0-9]+$}} +define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 { + store i48 %arg, i48 addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: {{^}}local_store_i65: +; GCN-DAG: ds_write_b8 v{{[0-9]+}}, v{{[0-9]+}} offset:8 +; GCN-DAG: ds_write_b64 +define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 { + store i65 %arg, i65 addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: {{^}}local_store_i13: +; GCN: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x1fff, v1 +; GCN: ds_write_b16 v0, [[TRUNC]] +define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 { + store i13 %arg, i13 addrspace(3)* %ptr, align 8 + ret void +} + +; GCN-LABEL: {{^}}local_store_i17: +; GCN: ds_write_b16 v0 +; CIVI: ds_write_b8 v0, v{{[0-9]+}} offset:2 +; GFX9: ds_write_b8_d16_hi v0, v{{[0-9]+}} offset:2 +define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 { + store i17 %arg, i17 addrspace(3)* %ptr, align 8 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll index 4c573acdbab5..908d13eb0178 100644 --- a/test/CodeGen/AMDGPU/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll @@ -1,13 +1,34 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s declare i32 @llvm.r600.read.tidig.x() readnone +; FUNC-LABEL: {{^}}s_sub_i32: +; GCN: s_load_dword [[A:s[0-9]+]] +; GCN: s_load_dword [[B:s[0-9]+]] +; GCN: s_sub_i32 s{{[0-9]+}}, [[A]], [[B]] +define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_sub_imm_i32: +; GCN: s_load_dword [[A:s[0-9]+]] +; GCN: s_sub_i32 s{{[0-9]+}}, 0x4d2, [[A]] +define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { + %result = sub i32 1234, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}test_sub_i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in @@ -17,6 +38,17 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1) ret void } +; FUNC-LABEL: {{^}}test_sub_imm_i32: +; EG: SUB_INT + +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x7b, v{{[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} +define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = sub i32 123, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} ; FUNC-LABEL: {{^}}test_sub_v2i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -25,6 +57,8 @@ define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1) ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32>, <2 x i32> addrspace(1) * %in @@ -45,6 +79,10 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32 ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} ; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; GFX9: v_sub_u32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32>, <4 x i32> addrspace(1) * %in @@ -54,49 +92,58 @@ define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32 ret void } -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; FUNC-LABEL: {{^}}test_sub_i16: +; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, +; GFX89: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { - %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 - %a = load i16, i16 addrspace(1)* %in - %b = load i16, i16 addrspace(1)* %b_ptr - %result = sub i16 %a, %b - store i16 %result, i16 addrspace(1)* %out - ret void + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 + %a = load volatile i16, i16 addrspace(1)* %gep + %b = load volatile i16, i16 addrspace(1)* %b_ptr + %result = sub i16 %a, %b + store i16 %result, i16 addrspace(1)* %out + ret void } ; FUNC-LABEL: {{^}}test_sub_v2i16: +; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} - +; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { - %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i16 1 - %a = load <2 x i16>, <2 x i16> addrspace(1) * %in - %b = load <2 x i16>, <2 x i16> addrspace(1) * %b_ptr - %result = sub <2 x i16> %a, %b - store <2 x i16> %result, <2 x i16> addrspace(1)* %out - ret void + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 + %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep + %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr + %result = sub <2 x i16> %a, %b + store <2 x i16> %result, <2 x i16> addrspace(1)* %out + ret void } ; FUNC-LABEL: {{^}}test_sub_v4i16: +; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_sub_u16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; VI: v_sub_u16_sdwa v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; VI: v_sub_i16_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} - +; GFX9: v_pk_sub_i16 +; GFX9: v_pk_sub_i16 define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i16 1 - %a = load <4 x i16>, <4 x i16> addrspace(1) * %in - %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr - %result = sub <4 x i16> %a, %b - store <4 x i16> %result, <4 x i16> addrspace(1)* %out - ret void + %tid = call i32 @llvm.r600.read.tidig.x() + %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid + %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 + %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep + %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr + %result = sub <4 x i16> %a, %b + store <4 x i16> %result, <4 x i16> addrspace(1)* %out + ret void } ; FUNC-LABEL: {{^}}s_sub_i64: -; SI: s_sub_u32 -; SI: s_subb_u32 +; GCN: s_sub_u32 +; GCN: s_subb_u32 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY ; EG-DAG: SUB_INT {{[* ]*}} @@ -113,6 +160,12 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 +; VI: v_sub_u32_e32 +; VI: v_subb_u32_e32 + +; GFX9: v_sub_co_u32_e32 +; GFX9: v_subb_co_u32_e32 + ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY ; EG-DAG: SUB_INT {{[* ]*}} ; EG-DAG: SUBB_UINT @@ -130,10 +183,20 @@ define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspa } ; FUNC-LABEL: {{^}}v_test_sub_v2i64: -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { %tid = call i32 @llvm.r600.read.tidig.x() readnone %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid @@ -146,14 +209,32 @@ define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i } ; FUNC-LABEL: {{^}}v_test_sub_v4i64: -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 -; SI: v_sub_i32_e32 -; SI: v_subb_u32_e32 +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, +; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, +; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc, + +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, +; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc, define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { %tid = call i32 @llvm.r600.read.tidig.x() readnone %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll index b3f8b10c2f68..998cfdf395c4 100644 --- a/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). diff --git a/test/CodeGen/AMDGPU/subreg-intervals.mir b/test/CodeGen/AMDGPU/subreg-intervals.mir index 62816da25b2c..2d353b8138e3 100644 --- a/test/CodeGen/AMDGPU/subreg-intervals.mir +++ b/test/CodeGen/AMDGPU/subreg-intervals.mir @@ -2,11 +2,11 @@ # REQUIRES: asserts # CHECK: INTERVALS -# CHECK: vreg0 +# CHECK: %0 # CHECK-LABEL: Machine code for function test0: # CHECK: INTERVALS -# CHECK: vreg0 +# CHECK: %0 # CHECK-LABEL: Machine code for function test1: --- | diff --git a/test/CodeGen/AMDGPU/subreg_interference.mir b/test/CodeGen/AMDGPU/subreg_interference.mir index 6fc22c8d189f..3575e41c2b78 100644 --- a/test/CodeGen/AMDGPU/subreg_interference.mir +++ b/test/CodeGen/AMDGPU/subreg_interference.mir @@ -12,12 +12,12 @@ # sgpr0-sgpr3. # # CHECK-LABEL: func0 -# CHECK: S_NOP 0, implicit-def %sgpr0 -# CHECK: S_NOP 0, implicit-def %sgpr3 -# CHECK: S_NOP 0, implicit-def %sgpr1 -# CHECK: S_NOP 0, implicit-def %sgpr2 -# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3 -# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2 +# CHECK: S_NOP 0, implicit-def renamable %sgpr0 +# CHECK: S_NOP 0, implicit-def renamable %sgpr3 +# CHECK: S_NOP 0, implicit-def renamable %sgpr1 +# CHECK: S_NOP 0, implicit-def renamable %sgpr2 +# CHECK: S_NOP 0, implicit renamable %sgpr0, implicit renamable %sgpr3 +# CHECK: S_NOP 0, implicit renamable %sgpr1, implicit renamable %sgpr2 name: func0 body: | bb.0: diff --git a/test/CodeGen/AMDGPU/syncscopes.ll b/test/CodeGen/AMDGPU/syncscopes.ll index 6e356f69e05b..5cea1588d4bb 100644 --- a/test/CodeGen/AMDGPU/syncscopes.ll +++ b/test/CodeGen/AMDGPU/syncscopes.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-before=si-debugger-insert-nops < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes -; GCN: FLAT_STORE_DWORD killed %vgpr1_vgpr2, killed %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out) -; GCN: FLAT_STORE_DWORD killed %vgpr4_vgpr5, killed %vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) -; GCN: FLAT_STORE_DWORD killed %vgpr7_vgpr8, killed %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) +; GCN: FLAT_STORE_DWORD killed renamable %vgpr1_vgpr2, killed renamable %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("agent") seq_cst 4 into %ir.agent_out) +; GCN: FLAT_STORE_DWORD killed renamable %vgpr4_vgpr5, killed renamable %vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) +; GCN: FLAT_STORE_DWORD killed renamable %vgpr7_vgpr8, killed renamable %vgpr6, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) define void @syncscopes( i32 %agent, i32 addrspace(4)* %agent_out, diff --git a/test/CodeGen/AMDGPU/uaddo.ll b/test/CodeGen/AMDGPU/uaddo.ll index 26b47dc75a88..0cb2487dd4ac 100644 --- a/test/CodeGen/AMDGPU/uaddo.ll +++ b/test/CodeGen/AMDGPU/uaddo.ll @@ -1,6 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}s_uaddo_i64_zext: ; GCN: s_add_u32 @@ -22,7 +23,10 @@ define amdgpu_kernel void @s_uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_uaddo_i32: -; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -37,7 +41,10 @@ define amdgpu_kernel void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i32: -; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -58,7 +65,10 @@ define amdgpu_kernel void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i32_novcc: -; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -95,8 +105,14 @@ define amdgpu_kernel void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i64: -; GCN: v_add_{{[iu]}}32 -; GCN: v_addc_u32 +; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, + +; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, + +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, ; EG: ADDC_UINT ; EG: ADD_INT @@ -118,6 +134,9 @@ define amdgpu_kernel void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* ; FUNC-LABEL: {{^}}v_uaddo_i16: ; VI: v_add_u16_e32 ; VI: v_cmp_lt_u16_e32 + +; GFX9: v_add_u16_e32 +; GFX9: v_cmp_lt_u16_e32 define amdgpu_kernel void @v_uaddo_i16(i16 addrspace(1)* %out, i1 addrspace(1)* %carryout, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/udivrem64.ll b/test/CodeGen/AMDGPU/udivrem64.ll index 91c27b09b5fa..408fd01c2922 100644 --- a/test/CodeGen/AMDGPU/udivrem64.ll +++ b/test/CodeGen/AMDGPU/udivrem64.ll @@ -1,5 +1,6 @@ ;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s ;FUNC-LABEL: {{^}}test_udiv: diff --git a/test/CodeGen/AMDGPU/umed3.ll b/test/CodeGen/AMDGPU/umed3.ll index 5a579f3575fd..350be19d6e0d 100644 --- a/test/CodeGen/AMDGPU/umed3.ll +++ b/test/CodeGen/AMDGPU/umed3.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/test/CodeGen/AMDGPU/uniform-PHI.ll b/test/CodeGen/AMDGPU/uniform-PHI.ll new file mode 100644 index 000000000000..3cb86b39a65f --- /dev/null +++ b/test/CodeGen/AMDGPU/uniform-PHI.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: BB0_2 +; GCN-NOT: v_readfirstlane + + +target triple = "amdgcn--amdhsa" +define amdgpu_kernel void @uniform-PHI(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) { +bb: + %tmp = sext i32 %arg2 to i64 + %tmp3 = tail call i64 @_Z13get_global_idj(i32 0) #2 + %tmp4 = icmp ugt i64 %tmp3, %tmp + %tmp5 = icmp sgt i32 %arg2, 0 + %tmp6 = and i1 %tmp4, %tmp5 + br i1 %tmp6, label %bb7, label %bb17 + +bb7: ; preds = %bb + br label %bb8 + +bb8: ; preds = %bb8, %bb7 + %tmp9 = phi i32 [ %tmp15, %bb8 ], [ 0, %bb7 ] + %tmp10 = phi i32 [ %tmp14, %bb8 ], [ 0, %bb7 ] + %tmp11 = zext i32 %tmp9 to i64 + %tmp12 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp11 + %tmp13 = load i32, i32 addrspace(1)* %tmp12, align 4 + %tmp14 = add nsw i32 %tmp13, %tmp10 + %tmp15 = add nuw nsw i32 %tmp9, 1 + %tmp16 = icmp eq i32 %tmp15, %arg2 + br i1 %tmp16, label %bb17, label %bb8 + +bb17: ; preds = %bb8, %bb + %tmp18 = phi i32 [ 0, %bb ], [ %tmp14, %bb8 ] + store i32 %tmp18, i32 addrspace(1)* %arg1, align 4 + ret void +} + +declare i64 @_Z13get_global_idj(i32) local_unnamed_addr #1 +attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "target-features"="+16-bit-insts,+dpp,+fp64-fp16-denormals,+s-memrealtime,-fp32-denormals" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind readnone } diff --git a/test/CodeGen/AMDGPU/uniform-cfg.ll b/test/CodeGen/AMDGPU/uniform-cfg.ll index 247b9691aff5..33a420227053 100644 --- a/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -251,7 +251,7 @@ ENDIF: ; preds = %IF, %main_body ; GCN: s_load_dword [[COND:s[0-9]+]] ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] -; GCN: v_cmp_gt_i32_e64 vcc, [[COND]], 0{{$}} +; GCN: v_cmp_gt_i32_e64 {{[^,]*}}, [[COND]], 0{{$}} ; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm @@ -401,7 +401,7 @@ exit: ; GCN: s_cmp_lt_i32 [[COND]], 1 ; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 -; GCN: BB#1: +; GCN: %bb.1: ; GCN-NOT: cmp ; GCN: buffer_load_dword ; GCN: buffer_store_dword diff --git a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll index 82283f39792e..1bbda66fddb1 100644 --- a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll +++ b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -6,11 +6,10 @@ ; CHECK: v_cmp_ne_u32_e32 vcc, 0 ; CHECK: s_and_saveexec_b64 ; CHECK-NEXT: ; mask branch -; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}} ; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader ; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: -; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]] +; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]] ; CHECK: s_endpgm define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) { @@ -35,7 +34,6 @@ out: ; CHECK-LABEL: {{^}}test2: ; CHECK: s_and_saveexec_b64 ; CHECK-NEXT: ; mask branch -; CHECK-NEXT: s_cbranch_execz define amdgpu_kernel void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) { main_body: %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/unpack-half.ll b/test/CodeGen/AMDGPU/unpack-half.ll new file mode 100644 index 000000000000..b2133986ba5b --- /dev/null +++ b/test/CodeGen/AMDGPU/unpack-half.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s + +; On gfx6 and gfx7, this test shows a bug in SelectionDAG where scalarizing the +; extension of a vector of f16 generates an illegal node that errors later. + +; CHECK-LABEL: {{^}}main: +; CHECK: v_cvt_f32_f16 + +define amdgpu_gs void @main(i32 inreg %arg) local_unnamed_addr #0 { +.entry: + %tmp = load volatile float, float addrspace(1)* undef + %tmp1 = bitcast float %tmp to i32 + %im0.i = lshr i32 %tmp1, 16 + %tmp2 = insertelement <2 x i32> undef, i32 %im0.i, i32 1 + %tmp3 = trunc <2 x i32> %tmp2 to <2 x i16> + %tmp4 = bitcast <2 x i16> %tmp3 to <2 x half> + %tmp5 = fpext <2 x half> %tmp4 to <2 x float> + %bc = bitcast <2 x float> %tmp5 to <2 x i32> + %tmp6 = extractelement <2 x i32> %bc, i32 1 + store volatile i32 %tmp6, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/AMDGPU/usubo.ll b/test/CodeGen/AMDGPU/usubo.ll index 10c0d8640f5d..eeb19f86f384 100644 --- a/test/CodeGen/AMDGPU/usubo.ll +++ b/test/CodeGen/AMDGPU/usubo.ll @@ -1,6 +1,7 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SICIVI,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}s_usubo_i64_zext: ; GCN: s_sub_u32 @@ -22,7 +23,10 @@ define amdgpu_kernel void @s_usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_usubo_i32: -; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -37,7 +41,10 @@ define amdgpu_kernel void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i32: -; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -58,7 +65,10 @@ define amdgpu_kernel void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i32_novcc: -; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} + ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -97,8 +107,13 @@ define amdgpu_kernel void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i64: -; GCN: v_sub_{{[iu]}}32 -; GCN: v_subb_u32 +; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_subb_u32 +; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_subb_u32 + +; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_subb_co_u32 ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT @@ -120,8 +135,15 @@ define amdgpu_kernel void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i16: +; SI: v_subrev_i32_e32 +; SI: v_and_b32 +; SI: v_cmp_ne_u32_e32 + ; VI: v_sub_u16_e32 ; VI: v_cmp_gt_u16_e32 + +; GFX9: v_sub_u16_e32 +; GFX9: v_cmp_gt_u16_e32 define amdgpu_kernel void @v_usubo_i16(i16 addrspace(1)* %out, i1 addrspace(1)* %carryout, i16 addrspace(1)* %a.ptr, i16 addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index 7162e818d49f..4a3937e44f36 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -162,8 +162,8 @@ exit: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: buffer_load_dword ; SI-DAG: buffer_store_dword -; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100 -; SI: s_cbranch_vccz [[LABEL_LOOP]] +; SI-DAG: s_cmpk_eq_i32 s{{[0-9]+}}, 0x100 +; SI: s_cbranch_scc0 [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: ; SI: s_endpgm @@ -192,7 +192,7 @@ exit: ; Load loop limit from buffer ; Branch to exit if uniformly not taken -; SI: ; BB#0: +; SI: ; %bb.0: ; SI: buffer_load_dword [[VBOUND:v[0-9]+]] ; SI: v_cmp_lt_i32_e32 vcc ; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc diff --git a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir index 54991d3d953c..ff9826baf48c 100644 --- a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -48,7 +48,7 @@ # CHECK-LABEL: name: vccz_corrupt_workaround # CHECK: %vcc = V_CMP_EQ_F32 # CHECK-NEXT: %vcc = S_MOV_B64 %vcc -# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit killed %vcc +# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed %vcc name: vccz_corrupt_workaround alignment: 0 @@ -82,7 +82,7 @@ body: | %sgpr7 = S_MOV_B32 61440 %sgpr6 = S_MOV_B32 -1 %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, implicit %exec - S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc + S_CBRANCH_VCCZ %bb.1, implicit killed %vcc bb.2.if: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 @@ -90,7 +90,7 @@ body: | %vgpr0 = V_MOV_B32_e32 9, implicit %exec BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) %vgpr0 = V_MOV_B32_e32 0, implicit %exec - S_BRANCH %bb.3.done + S_BRANCH %bb.3 bb.1.else: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 @@ -111,7 +111,7 @@ body: | --- # CHECK-LABEL: name: vccz_corrupt_undef_vcc # CHECK: S_WAITCNT -# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit undef %vcc +# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef %vcc name: vccz_corrupt_undef_vcc alignment: 0 @@ -143,7 +143,7 @@ body: | %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) %sgpr7 = S_MOV_B32 61440 %sgpr6 = S_MOV_B32 -1 - S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc + S_CBRANCH_VCCZ %bb.1, implicit undef %vcc bb.2.if: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 @@ -151,7 +151,7 @@ body: | %vgpr0 = V_MOV_B32_e32 9, implicit %exec BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) %vgpr0 = V_MOV_B32_e32 0, implicit %exec - S_BRANCH %bb.3.done + S_BRANCH %bb.3 bb.1.else: liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll index feae5e9f3792..a0242ec958b3 100644 --- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s -; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s -; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa-amdgiz -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register. diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll index afbd06a00fae..89327fb8f80d 100644 --- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; This ends up using all 255 registers and requires register ; scavenging which will fail to find an unsued register. diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll index 567400318ee0..62a9aa23f29f 100644 --- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll +++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll @@ -59,7 +59,7 @@ bb3: ; preds = %entry %34 = fadd double %31, 0.000000e+00 %35 = fadd double %32, 0.000000e+00 %36 = bitcast %struct.ggPoint3* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 null, i8* align 4 %36, i32 24, i1 false) store double %33, double* null, align 8 br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i @@ -76,4 +76,4 @@ bb7: ; preds = %entry ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index 130221d38c23..c0b94134bec9 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -4,8 +4,8 @@ target triple = "thumbv7-apple-darwin10" ; This tests the fast register allocator's handling of partial redefines: ; -; %reg1028:dsub_0, %reg1028:dsub_1 = VLD1q64 %reg1025... -; %reg1030:dsub_1 = COPY %reg1028:dsub_0 +; %reg1028:dsub_0, %reg1028:dsub_1 = VLD1q64 %reg1025... +; %reg1030:dsub_1 = COPY killed %reg1028:dsub_0 ; ; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial ; redef, it cannot also get %Q0. diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll index c447a1f25b65..30a388bb5877 100644 --- a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll +++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -16,7 +16,7 @@ bb: ; preds = %entry bb1: ; preds = %entry %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind - call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 undef, i8 0, i32 40, i1 false) %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0 store %struct.mo* undef, %struct.mo** %1, align 4 %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5 @@ -40,7 +40,7 @@ bb6: ; preds = %bb3 declare %struct.ui* @vn_pp_to_ui(i32*) -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll index c3b7c4ea86c7..8d6ce34c26d9 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -14,8 +14,8 @@ target triple = "thumbv7-apple-ios5.0.0" ; CHECK-UNALIGNED: str define void @foo(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index c8e08c22ab19..7024a653b6c9 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -5,8 +5,8 @@ ; CHECK: vst1.64 define void @f_0_40(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %c, i8 0, i64 40, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll index 7f4057143a07..38fc3bcd8873 100644 --- a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll +++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll @@ -5,11 +5,11 @@ target triple = "thumbv7-apple-ios" ; This test calls shrinkToUses with an early-clobber redefined live range during ; spilling. ; -; Shrink: %vreg47,1.158257e-02 = [384r,400e:0)[400e,420r:1) 0@384r 1@400e +; Shrink: %47,1.158257e-02 = [384r,400e:0)[400e,420r:1) 0@384r 1@400e ; ; The early-clobber instruction is an str: ; -; %vreg12 = t2STR_PRE %vreg6, %vreg12, 32, pred:14, pred:%noreg +; early-clobber %12 = t2STR_PRE %6, %12, 32, 14, %noreg ; ; This tests that shrinkToUses handles the EC redef correctly. diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll index ce0dcc709522..ef33b2f50184 100644 --- a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll +++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -19,7 +19,7 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone declare i8* @__cxa_begin_catch(i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind declare void @__cxa_end_catch() diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll index be87a2fb1c89..279917afaa6c 100644 --- a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll +++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll @@ -4,7 +4,7 @@ define void @vst(i8* %m, [4 x i64] %v) { entry: ; CHECK: vst: -; CHECK: VST1d64Q %R{{[0-9]+}}, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} +; CHECK: VST1d64Q killed %r{{[0-9]+}}, 8, %d{{[0-9]+}}, 14, %noreg, implicit killed %q{{[0-9]+}}_q{{[0-9]+}} %v0 = extractvalue [4 x i64] %v, 0 %v1 = extractvalue [4 x i64] %v, 1 @@ -37,7 +37,7 @@ entry: %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind { ; CHECK: vtbx4: -; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}} +; CHECK: VTBX4 {{.*}}, 14, %noreg, implicit %q{{[0-9]+}}_q{{[0-9]+}} %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 diff --git a/test/CodeGen/ARM/ARMLoadStoreDBG.mir b/test/CodeGen/ARM/ARMLoadStoreDBG.mir index 1ff3bffd3877..86d09ce7b097 100644 --- a/test/CodeGen/ARM/ARMLoadStoreDBG.mir +++ b/test/CodeGen/ARM/ARMLoadStoreDBG.mir @@ -120,40 +120,40 @@ body: | bb.0.entry: liveins: %r0, %r1, %r2, %r3, %lr, %r7 - DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28 - DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28 - t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31 + DBG_VALUE debug-use %r0, debug-use %noreg, !18, !27, debug-location !28 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + DBG_VALUE debug-use %r2, debug-use %noreg, !20, !27, debug-location !28 + DBG_VALUE debug-use %r3, debug-use %noreg, !21, !27, debug-location !28 + t2CMPri %r3, 4, 14, %noreg, implicit-def %cpsr, debug-location !31 t2Bcc %bb.2.if.end, 2, killed %cpsr bb.1: liveins: %lr, %r7 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - %r0 = t2MOVi -1, 14, _, _ - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - tBX_RET 14, _, implicit %r0, debug-location !34 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + %r0 = t2MOVi -1, 14, %noreg, %noreg + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + tBX_RET 14, %noreg, implicit %r0, debug-location !34 bb.2.if.end: liveins: %r0, %r2, %r3, %r7, %lr - %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr + %sp = frame-setup t2STMDB_UPD %sp, 14, %noreg, killed %r7, killed %lr frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset %lr, -4 frame-setup CFI_INSTRUCTION offset %r7, -8 - DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28 - DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28 + DBG_VALUE debug-use %r0, debug-use %noreg, !18, !27, debug-location !28 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + DBG_VALUE debug-use %r2, debug-use %noreg, !20, !27, debug-location !28 + DBG_VALUE debug-use %r3, debug-use %noreg, !21, !27, debug-location !28 %r1 = COPY killed %r2, debug-location !32 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 %r2 = COPY killed %r3, debug-location !32 - tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32 - %r0 = t2MOVi 0, 14, _, _ - %sp = t2LDMIA_UPD %sp, 14, _, def %r7, def %lr - tBX_RET 14, _, implicit %r0, debug-location !34 + tBL 14, %noreg, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32 + %r0 = t2MOVi 0, 14, %noreg, %noreg + %sp = t2LDMIA_UPD %sp, 14, %noreg, def %r7, def %lr + tBX_RET 14, %noreg, implicit %r0, debug-location !34 # Verify that the DBG_VALUE is ignored. -# CHECK: %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0 +# CHECK: %sp = t2LDMIA_RET %sp, 14, %noreg, def %r7, def %pc, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll b/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll index c1dd9276ddd8..ec6ea632591e 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll @@ -7,11 +7,11 @@ define arm_aapcscc void @test_indirect_call(void() *%fptr) { ; V5T: %[[FPTR:[0-9]+]]:gpr(p0) = COPY %r0 ; V4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY %r0 ; NOV4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY %r0 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; V5T: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp ; V4T: BX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp ; NOV4T: BMOVPCRX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp entry: notail call arm_aapcscc void %fptr() ret void @@ -21,9 +21,9 @@ declare arm_aapcscc void @call_target() define arm_aapcscc void @test_direct_call() { ; CHECK-LABEL: name: test_direct_call -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: BL @call_target, csr_aapcs, implicit-def %lr, implicit %sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp entry: notail call arm_aapcscc void @call_target() ret void diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir index e2b6f878e6bf..c8ed142903bb 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir @@ -69,18 +69,18 @@ body: | ; CHECK-LABEL: name: test_icmp_eq_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(eq), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_ne_s32 @@ -99,18 +99,18 @@ body: | ; CHECK-LABEL: name: test_icmp_ne_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(ne), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_ugt_s32 @@ -129,18 +129,18 @@ body: | ; CHECK-LABEL: name: test_icmp_ugt_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(ugt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_uge_s32 @@ -159,18 +159,18 @@ body: | ; CHECK-LABEL: name: test_icmp_uge_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 2, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(uge), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_ult_s32 @@ -189,18 +189,18 @@ body: | ; CHECK-LABEL: name: test_icmp_ult_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 3, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(ult), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_ule_s32 @@ -219,18 +219,18 @@ body: | ; CHECK-LABEL: name: test_icmp_ule_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(ule), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_sgt_s32 @@ -249,18 +249,18 @@ body: | ; CHECK-LABEL: name: test_icmp_sgt_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(sgt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_sge_s32 @@ -279,18 +279,18 @@ body: | ; CHECK-LABEL: name: test_icmp_sge_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(sge), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_slt_s32 @@ -309,18 +309,18 @@ body: | ; CHECK-LABEL: name: test_icmp_slt_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(slt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_sle_s32 @@ -339,18 +339,18 @@ body: | ; CHECK-LABEL: name: test_icmp_sle_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, %noreg, implicit-def %cpsr ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(sle), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_true_s32 @@ -367,16 +367,16 @@ body: | liveins: %s0, %s1 ; CHECK-LABEL: name: test_fcmp_true_s32 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, _, _ - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, %noreg, %noreg + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(true), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_false_s32 @@ -393,16 +393,16 @@ body: | liveins: %s0, %s1 ; CHECK-LABEL: name: test_fcmp_false_s32 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(false), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oeq_s32 @@ -421,19 +421,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_oeq_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(oeq), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ogt_s32 @@ -452,19 +452,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ogt_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ogt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oge_s32 @@ -483,19 +483,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_oge_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(oge), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_olt_s32 @@ -514,19 +514,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_olt_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 4, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(olt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ole_s32 @@ -545,19 +545,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ole_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ole), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ord_s32 @@ -576,19 +576,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ord_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 7, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ord), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ugt_s32 @@ -607,19 +607,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ugt_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ugt), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uge_s32 @@ -638,19 +638,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_uge_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 5, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(uge), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ult_s32 @@ -669,19 +669,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ult_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ult), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ule_s32 @@ -700,19 +700,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ule_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ule), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_une_s32 @@ -731,19 +731,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_une_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(une), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uno_s32 @@ -762,19 +762,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_uno_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 6, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(uno), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_one_s32 @@ -793,22 +793,22 @@ body: | ; CHECK-LABEL: name: test_fcmp_one_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 4, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(one), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ueq_s32 @@ -827,22 +827,22 @@ body: | ; CHECK-LABEL: name: test_fcmp_ueq_s32 ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr - ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 6, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s1) = G_FCMP floatpred(ueq), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_true_s64 @@ -859,16 +859,16 @@ body: | liveins: %d0, %d1 ; CHECK-LABEL: name: test_fcmp_true_s64 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, _, _ - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, %noreg, %noreg + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(true), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_false_s64 @@ -885,16 +885,16 @@ body: | liveins: %d0, %d1 ; CHECK-LABEL: name: test_fcmp_false_s64 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(false), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oeq_s64 @@ -913,19 +913,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_oeq_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(oeq), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ogt_s64 @@ -944,19 +944,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ogt_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ogt), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oge_s64 @@ -975,19 +975,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_oge_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(oge), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_olt_s64 @@ -1006,19 +1006,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_olt_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 4, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(olt), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ole_s64 @@ -1037,19 +1037,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ole_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ole), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ord_s64 @@ -1068,19 +1068,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ord_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 7, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ord), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ugt_s64 @@ -1099,19 +1099,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ugt_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ugt), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uge_s64 @@ -1130,19 +1130,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_uge_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 5, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(uge), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ult_s64 @@ -1161,19 +1161,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ult_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ult), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ule_s64 @@ -1192,19 +1192,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_ule_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ule), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_une_s64 @@ -1223,19 +1223,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_une_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(une), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uno_s64 @@ -1254,19 +1254,19 @@ body: | ; CHECK-LABEL: name: test_fcmp_uno_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 6, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(uno), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_one_s64 @@ -1285,22 +1285,22 @@ body: | ; CHECK-LABEL: name: test_fcmp_one_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 4, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(one), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ueq_s64 @@ -1319,20 +1319,20 @@ body: | ; CHECK-LABEL: name: test_fcmp_ueq_s64 ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 - ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr - ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv - ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, %noreg, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, %noreg, implicit-def %cpsr, implicit %fpscr_nzcv ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 6, %cpsr - ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, %noreg, %noreg ; CHECK: %r0 = COPY [[ANDri]] - ; CHECK: BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s1) = G_FCMP floatpred(ueq), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir index d8da96103fba..3227febb7ead 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -20,9 +20,33 @@ define void @test_bicri_commutative_and() { ret void } define void @test_bicri_commutative_both() { ret void } + define void @test_pkhbt() #0 { ret void } + define void @test_pkhbt_commutative() #0 { ret void } + define void @test_pkhbt_imm16_31() #0 { ret void } + define void @test_pkhbt_unshifted() #0 { ret void } + + define void @test_pkhtb_imm16() #0 { ret void } + define void @test_pkhtb_imm1_15() #0 { ret void } + + define void @test_movti16_0xffff() #2 { ret void } + + define void @test_vnmuls() #3 { ret void } + define void @test_vnmuls_reassociate() #3 { ret void } + define void @test_vnmuld() #3 { ret void } + + define void @test_vfnmas() #4 { ret void } + define void @test_vfnmad() #4 { ret void } + + define void @test_vfmss() #4 { ret void } + define void @test_vfmsd() #4 { ret void } + + define void @test_vfnmss() #4 { ret void } + attributes #0 = { "target-features"="+v6" } attributes #1 = { "target-features"="-v6" } attributes #2 = { "target-features"="+v6t2" } + attributes #3 = { "target-features"="+vfp2" } + attributes #4 = { "target-features"="+vfp4" } ... --- name: test_mla @@ -50,13 +74,13 @@ body: | %3(s32) = G_MUL %0, %1 %4(s32) = G_ADD %3, %2 - ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_mla_commutative @@ -84,13 +108,13 @@ body: | %3(s32) = G_MUL %0, %1 %4(s32) = G_ADD %2, %3 - ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_mla_v5 @@ -118,13 +142,13 @@ body: | %3(s32) = G_MUL %0, %1 %4(s32) = G_ADD %3, %2 - ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_mls @@ -152,13 +176,13 @@ body: | %3(s32) = G_MUL %0, %1 %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_no_mls @@ -186,14 +210,14 @@ body: | %3(s32) = G_MUL %0, %1 %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGM:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, _, _ - ; CHECK: [[VREGR:%[0-9]+]]:gpr = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ + ; CHECK: [[VREGM:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, %noreg, %noreg + ; CHECK: [[VREGR:%[0-9]+]]:gpr = SUBrr [[VREGZ]], [[VREGM]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_shifts_to_revsh @@ -238,8 +262,8 @@ body: | %r0 = COPY %9(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_shifts_to_revsh_commutative @@ -284,8 +308,8 @@ body: | %r0 = COPY %9(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_shifts_no_revsh_features @@ -329,7 +353,7 @@ body: | %r0 = COPY %9(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_shifts_no_revsh_constants @@ -373,7 +397,7 @@ body: | %r0 = COPY %9(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicrr @@ -400,13 +424,13 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %1, %2 %4(s32) = G_AND %0, %3 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICrr [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicrr_commutative @@ -433,13 +457,13 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %1, %2 %4(s32) = G_AND %3, %0 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICrr [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicri @@ -471,13 +495,13 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %1, %2 %4(s32) = G_AND %0, %3 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicri_commutative_xor @@ -504,13 +528,13 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %2, %1 %4(s32) = G_AND %0, %3 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicri_commutative_and @@ -537,13 +561,13 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %1, %2 %4(s32) = G_AND %3, %0 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_bicri_commutative_both @@ -570,11 +594,551 @@ body: | %2(s32) = G_CONSTANT i32 -1 %3(s32) = G_XOR %2, %1 %4(s32) = G_AND %3, %0 - ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = BICri [[VREGX]], 192, 14, %noreg, %noreg %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhbt +# CHECK-LABEL: name: test_pkhbt +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } + - { id: 7, class: gprb } + - { id: 8, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 65535 ; 0xFFFF + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 7 + %5(s32) = G_SHL %1, %4 + %6(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + %7(s32) = G_AND %5, %6 + + %8(s32) = G_OR %3, %7 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHBT [[VREGX]], [[VREGY]], 7, 14, %noreg + + %r0 = COPY %8(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhbt_commutative +# CHECK-LABEL: name: test_pkhbt_commutative +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } + - { id: 7, class: gprb } + - { id: 8, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 65535 ; 0xFFFF + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 7 + %5(s32) = G_SHL %1, %4 + %6(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + %7(s32) = G_AND %5, %6 + + %8(s32) = G_OR %7, %3 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHBT [[VREGX]], [[VREGY]], 7, 14, %noreg + + %r0 = COPY %8(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhbt_imm16_31 +# CHECK-LABEL: name: test_pkhbt_imm16_31 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 65535 ; 0xFFFF + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 17 + %5(s32) = G_SHL %1, %4 + + %6(s32) = G_OR %3, %5 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHBT [[VREGX]], [[VREGY]], 17, 14, %noreg + + %r0 = COPY %6(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhbt_unshifted +# CHECK-LABEL: name: test_pkhbt_unshifted +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 65535 ; 0xFFFF + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + %5(s32) = G_AND %1, %4 + + %6(s32) = G_OR %3, %5 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHBT [[VREGX]], [[VREGY]], 0, 14, %noreg + + %r0 = COPY %6(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhtb_imm16 +# CHECK-LABEL: name: test_pkhtb_imm16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 16 + %5(s32) = G_LSHR %1, %4 + + %6(s32) = G_OR %3, %5 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHTB [[VREGX]], [[VREGY]], 16, 14, %noreg + + %r0 = COPY %6(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pkhtb_imm1_15 +# CHECK-LABEL: name: test_pkhtb_imm1_15 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } + - { id: 6, class: gprb } + - { id: 7, class: gprb } + - { id: 8, class: gprb } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + + %2(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + %3(s32) = G_AND %0, %2 + + %4(s32) = G_CONSTANT i32 7 + %5(s32) = G_LSHR %1, %4 + %6(s32) = G_CONSTANT i32 65535 ; 0xFFFF + %7(s32) = G_AND %5, %6 + + %8(s32) = G_OR %3, %7 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = PKHTB [[VREGX]], [[VREGY]], 7, 14, %noreg + + %r0 = COPY %8(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_movti16_0xffff +# CHECK-LABEL: name: test_movti16_0xffff +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 + + %1(s32) = G_CONSTANT i32 4294901760 ; 0xFFFF0000 + + %2(s32) = G_OR %0, %1 + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MOVTi16 [[VREGX]], 65535, 14, %noreg + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_vnmuls +# CHECK-LABEL: name: test_vnmuls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } +body: | + bb.0: + liveins: %s0, %s1 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:spr = COPY %s0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:spr = COPY %s1 + + %2(s32) = G_FMUL %0, %1 + %3(s32) = G_FNEG %2 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VNMULS [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %3(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_vnmuls_reassociate +# CHECK-LABEL: name: test_vnmuls_reassociate +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } +body: | + bb.0: + liveins: %s0, %s1 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:spr = COPY %s0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:spr = COPY %s1 + + %2(s32) = G_FNEG %0 + %3(s32) = G_FMUL %1, %2 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VNMULS [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %3(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_vnmuld +# CHECK-LABEL: name: test_vnmuld +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } +body: | + bb.0: + liveins: %d0, %d1 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:dpr = COPY %d1 + + %2(s64) = G_FMUL %0, %1 + %3(s64) = G_FNEG %2 + ; CHECK: [[VREGR:%[0-9]+]]:dpr = VNMULD [[VREGX]], [[VREGY]], 14, %noreg + + %d0 = COPY %3(s64) + ; CHECK: %d0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_vfnmas +# CHECK-LABEL: name: test_vfnmas +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } +body: | + bb.0: + liveins: %s0, %s1, %s2 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + %2(s32) = COPY %s2 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:spr = COPY %s0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:spr = COPY %s1 + ; CHECK-DAG: [[VREGZ:%[0-9]+]]:spr = COPY %s2 + + %3(s32) = G_FMA %0, %1, %2 + %4(s32) = G_FNEG %3 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VFNMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %4(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_vfnmad +# CHECK-LABEL: name: test_vfnmad +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } + - { id: 5, class: fprb } +body: | + bb.0: + liveins: %d0, %d1, %d2 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + %2(s64) = COPY %d2 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:dpr = COPY %d1 + ; CHECK-DAG: [[VREGZ:%[0-9]+]]:dpr = COPY %d2 + + %3(s64) = G_FNEG %0 + %4(s64) = G_FNEG %2 + %5(s64) = G_FMA %3, %1, %4 + ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFNMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %d0 = COPY %5(s64) + ; CHECK: %d0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_vfmss +# CHECK-LABEL: name: test_vfmss +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } +body: | + bb.0: + liveins: %s0, %s1, %s2 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + %2(s32) = COPY %s2 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:spr = COPY %s0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:spr = COPY %s1 + ; CHECK-DAG: [[VREGZ:%[0-9]+]]:spr = COPY %s2 + + %3(s32) = G_FNEG %0 + %4(s32) = G_FMA %3, %1, %2 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMSS [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %4(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_vfmsd +# CHECK-LABEL: name: test_vfmsd +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } +body: | + bb.0: + liveins: %d0, %d1, %d2 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + %2(s64) = COPY %d2 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:dpr = COPY %d1 + ; CHECK-DAG: [[VREGZ:%[0-9]+]]:dpr = COPY %d2 + + %3(s64) = G_FNEG %1 + %4(s64) = G_FMA %0, %3, %2 + ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMSD [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %d0 = COPY %4(s64) + ; CHECK: %d0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_vfnmss +# CHECK-LABEL: name: test_vfnmss +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } +body: | + bb.0: + liveins: %s0, %s1, %s2 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + %2(s32) = COPY %s2 + ; CHECK-DAG: [[VREGX:%[0-9]+]]:spr = COPY %s0 + ; CHECK-DAG: [[VREGY:%[0-9]+]]:spr = COPY %s1 + ; CHECK-DAG: [[VREGZ:%[0-9]+]]:spr = COPY %s2 + + %3(s32) = G_FNEG %2 + %4(s32) = G_FMA %0, %1, %3 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VFNMSS [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %4(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index a54430878bed..c55b86485152 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -6,6 +6,7 @@ define void @test_trunc_and_zext_s16() { ret void } define void @test_trunc_and_anyext_s8() { ret void } define void @test_trunc_and_anyext_s16() { ret void } + define void @test_trunc_s64() #0 { ret void } define void @test_add_s32() { ret void } define void @test_add_fold_imm_s32() { ret void } @@ -23,6 +24,15 @@ define void @test_fdiv_s32() #0 { ret void } define void @test_fdiv_s64() #0 { ret void } + define void @test_fneg_s32() #0 { ret void } + define void @test_fneg_s64() #0 { ret void } + + define void @test_fma_s32() #4 { ret void } + define void @test_fma_s64() #4 { ret void } + + define void @test_fpext_s32_to_s64() #0 { ret void } + define void @test_fptrunc_s64_to_s32() #0 {ret void } + define void @test_sub_s32() { ret void } define void @test_sub_imm_s32() { ret void } define void @test_sub_rev_imm_s32() { ret void } @@ -46,18 +56,27 @@ define void @test_gep() { ret void } define void @test_constant_imm() { ret void } define void @test_constant_cimm() { ret void } + define void @test_pointer_constant_unconstrained() { ret void } + define void @test_pointer_constant_constrained() { ret void } + + define void @test_inttoptr_s32() { ret void } + define void @test_ptrtoint_s32() { ret void } define void @test_select_s32() { ret void } define void @test_select_ptr() { ret void } define void @test_br() { ret void } + define void @test_phi_s32() { ret void } + define void @test_phi_s64() #0 { ret void } + define void @test_soft_fp_double() #0 { ret void } attributes #0 = { "target-features"="+vfp2,-neonfp" } attributes #1 = { "target-features"="+v6" } attributes #2 = { "target-features"="+hwdiv-arm" } attributes #3 = { "target-features"="+v6t2" } + attributes #4 = { "target-features"="+vfp4,-neonfp" } ... --- name: test_trunc_and_zext_s1 @@ -81,13 +100,13 @@ body: | ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] %2(s32) = G_ZEXT %1(s1) - ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, _, _ + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_and_sext_s1 @@ -111,14 +130,14 @@ body: | ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] %2(s32) = G_SEXT %1(s1) - ; CHECK: [[VREGAND:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, _, _ - ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = RSBri [[VREGAND]], 0, 14, _, _ + ; CHECK: [[VREGAND:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, %noreg, %noreg + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = RSBri [[VREGAND]], 0, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_and_sext_s8 @@ -142,13 +161,13 @@ body: | ; CHECK: [[VREGTRUNC:%[0-9]+]]:gprnopc = COPY [[VREG]] %2(s32) = G_SEXT %1(s8) - ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = SXTB [[VREGTRUNC]], 0, 14, _ + ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = SXTB [[VREGTRUNC]], 0, 14, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_and_zext_s16 @@ -172,13 +191,13 @@ body: | ; CHECK: [[VREGTRUNC:%[0-9]+]]:gprnopc = COPY [[VREG]] %2(s32) = G_ZEXT %1(s16) - ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = UXTH [[VREGTRUNC]], 0, 14, _ + ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = UXTH [[VREGTRUNC]], 0, 14, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_and_anyext_s8 @@ -207,8 +226,8 @@ body: | %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_and_anyext_s16 @@ -237,8 +256,38 @@ body: | %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_trunc_s64 +# CHECK-LABEL: name: test_trunc_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +body: | + bb.0: + liveins: %r0, %d0 + + %0(s64) = COPY %d0 + ; CHECK: [[VREG:%[0-9]+]]:dpr = COPY %d0 + + %2(p0) = COPY %r0 + ; CHECK: [[PTR:%[0-9]+]]:gpr = COPY %r0 + + %1(s32) = G_TRUNC %0(s64) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr, [[UNINTERESTING:%[0-9]+]]:gpr = VMOVRRD [[VREG]] + + G_STORE %1(s32), %2 :: (store 4) + ; CHECK: STRi12 [[VREGTRUNC]], [[PTR]], 0, 14, %noreg + + BX_RET 14, %noreg + ; CHECK: BX_RET 14, %noreg ... --- name: test_add_s32 @@ -262,13 +311,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDrr [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_add_fold_imm_s32 @@ -290,13 +339,13 @@ body: | %1(s32) = G_CONSTANT i32 255 %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDri [[VREGX]], 255, 14, _, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDri [[VREGX]], 255, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_add_no_fold_imm_s32 @@ -317,16 +366,16 @@ body: | ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = G_CONSTANT i32 65535 - ; CHECK: [[VREGY:%[0-9]+]]:gpr = MOVi16 65535, 14, _ + ; CHECK: [[VREGY:%[0-9]+]]:gpr = MOVi16 65535, 14, %noreg %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDrr [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_fadd_s32 @@ -350,13 +399,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 %2(s32) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14, %noreg %s0 = COPY %2(s32) ; CHECK: %s0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %s0 - ; CHECK: BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_fadd_s64 @@ -380,13 +429,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 %2(s64) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14, %noreg %d0 = COPY %2(s64) ; CHECK: %d0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %d0 - ; CHECK: BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 ... --- name: test_fsub_s32 @@ -410,13 +459,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 %2(s32) = G_FSUB %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14, %noreg %s0 = COPY %2(s32) ; CHECK: %s0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %s0 - ; CHECK: BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_fsub_s64 @@ -440,13 +489,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 %2(s64) = G_FSUB %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14, %noreg %d0 = COPY %2(s64) ; CHECK: %d0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %d0 - ; CHECK: BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 ... --- name: test_fmul_s32 @@ -470,13 +519,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 %2(s32) = G_FMUL %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14, %noreg %s0 = COPY %2(s32) ; CHECK: %s0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %s0 - ; CHECK: BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_fmul_s64 @@ -500,13 +549,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 %2(s64) = G_FMUL %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14, %noreg %d0 = COPY %2(s64) ; CHECK: %d0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %d0 - ; CHECK: BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 ... --- name: test_fdiv_s32 @@ -530,13 +579,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 %2(s32) = G_FDIV %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14, %noreg %s0 = COPY %2(s32) ; CHECK: %s0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %s0 - ; CHECK: BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_fdiv_s64 @@ -560,13 +609,186 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 %2(s64) = G_FDIV %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14, %noreg %d0 = COPY %2(s64) ; CHECK: %d0 = COPY [[VREGSUM]] - BX_RET 14, _, implicit %d0 - ; CHECK: BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fneg_s32 +# CHECK-LABEL: name: test_fneg_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } +body: | + bb.0: + liveins: %s0 + + %0(s32) = COPY %s0 + ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY %s0 + + %1(s32) = G_FNEG %0 + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14, %noreg + + %s0 = COPY %1(s32) + ; CHECK: %s0 = COPY [[VREGSUM]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_fneg_s64 +# CHECK-LABEL: name: test_fneg_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } +body: | + bb.0: + liveins: %d0 + + %0(s64) = COPY %d0 + ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + + %1(s64) = G_FNEG %0 + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14, %noreg + + %d0 = COPY %1(s64) + ; CHECK: %d0 = COPY [[VREGSUM]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fma_s32 +# CHECK-LABEL: name: test_fma_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } +body: | + bb.0: + liveins: %s0, %s1, %s2 + + %0(s32) = COPY %s0 + ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY %s0 + + %1(s32) = COPY %s1 + ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 + + %2(s32) = COPY %s2 + ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY %s2 + + %3(s32) = G_FMA %0, %1, %2 + ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %s0 = COPY %3(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_fma_s64 +# CHECK-LABEL: name: test_fma_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } +body: | + bb.0: + liveins: %d0, %d1, %d2 + + %0(s64) = COPY %d0 + ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + + %1(s64) = COPY %d1 + ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 + + %2(s64) = COPY %d2 + ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY %d2 + + %3(s64) = G_FMA %0, %1, %2 + ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14, %noreg + + %d0 = COPY %3(s64) + ; CHECK: %d0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fpext_s32_to_s64 +# CHECK-LABEL: name: test_fpext_s32_to_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } +body: | + bb.0: + liveins: %s0 + + %0(s32) = COPY %s0 + ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY %s0 + + %1(s64) = G_FPEXT %0(s32) + ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14, %noreg + + %d0 = COPY %1(s64) + ; CHECK: %d0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fptrunc_s64_to_s32 +# CHECK-LABEL: name: test_fptrunc_s64_to_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: fprb } + - { id: 1, class: fprb } +body: | + bb.0: + liveins: %d0 + + %0(s64) = COPY %d0 + ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY %d0 + + %1(s32) = G_FPTRUNC %0(s64) + ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14, %noreg + + %s0 = COPY %1(s32) + ; CHECK: %s0 = COPY [[VREGR]] + + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_sub_s32 @@ -590,13 +812,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_SUB %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SUBrr [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_sub_imm_s32 @@ -618,13 +840,13 @@ body: | %1(s32) = G_CONSTANT i32 17 %2(s32) = G_SUB %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SUBri [[VREGX]], 17, 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SUBri [[VREGX]], 17, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_sub_rev_imm_s32 @@ -646,13 +868,13 @@ body: | %1(s32) = G_CONSTANT i32 17 %2(s32) = G_SUB %1, %0 - ; CHECK: [[VREGRES:%[0-9]+]]:gpr = RSBri [[VREGX]], 17, 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = RSBri [[VREGX]], 17, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_mul_s32 @@ -676,13 +898,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 %2(s32) = G_MUL %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MUL [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MUL [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_mulv5_s32 @@ -706,13 +928,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 %2(s32) = G_MUL %0, %1 - ; CHECK: early-clobber [[VREGRES:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: early-clobber [[VREGRES:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_sdiv_s32 @@ -736,13 +958,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_SDIV %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SDIV [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SDIV [[VREGX]], [[VREGY]], 14, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_udiv_s32 @@ -766,13 +988,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_UDIV %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gpr = UDIV [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = UDIV [[VREGX]], [[VREGY]], 14, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_lshr_s32 @@ -796,13 +1018,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_LSHR %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 3, 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 3, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_ashr_s32 @@ -826,13 +1048,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_ASHR %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 1, 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 1, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_shl_s32 @@ -856,13 +1078,13 @@ body: | ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_SHL %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 2, 14, _, _ + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 2, 14, %noreg, %noreg %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_load_from_stack @@ -888,19 +1110,19 @@ body: | liveins: %r0, %r1, %r2, %r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FI32VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ + ; CHECK: [[FI32VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI32]], 0, 14, %noreg, %noreg %1(s32) = G_LOAD %0(p0) :: (load 4) - ; CHECK: [[LD32VREG:%[0-9]+]]:gpr = LDRi12 [[FI32VREG]], 0, 14, _ + ; CHECK: [[LD32VREG:%[0-9]+]]:gpr = LDRi12 [[FI32VREG]], 0, 14, %noreg %r0 = COPY %1 ; CHECK: %r0 = COPY [[LD32VREG]] %2(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[FI1VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ + ; CHECK: [[FI1VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI1]], 0, 14, %noreg, %noreg %3(s1) = G_LOAD %2(p0) :: (load 1) - ; CHECK: [[LD1VREG:%[0-9]+]]:gprnopc = LDRBi12 [[FI1VREG]], 0, 14, _ + ; CHECK: [[LD1VREG:%[0-9]+]]:gprnopc = LDRBi12 [[FI1VREG]], 0, 14, %noreg %4(s32) = G_ANYEXT %3(s1) ; CHECK: [[RES:%[0-9]+]]:gpr = COPY [[LD1VREG]] @@ -908,8 +1130,8 @@ body: | %r0 = COPY %4 ; CHECK: %r0 = COPY [[RES]] - BX_RET 14, _ - ; CHECK: BX_RET 14, _ + BX_RET 14, %noreg + ; CHECK: BX_RET 14, %noreg ... --- name: test_load_f32 @@ -929,13 +1151,13 @@ body: | ; CHECK: %[[P:[0-9]+]]:gpr = COPY %r0 %1(s32) = G_LOAD %0(p0) :: (load 4) - ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14, _ + ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14, %noreg %s0 = COPY %1 ; CHECK: %s0 = COPY %[[V]] - BX_RET 14, _, implicit %s0 - ; CHECK: BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 + ; CHECK: BX_RET 14, %noreg, implicit %s0 ... --- name: test_load_f64 @@ -955,13 +1177,13 @@ body: | ; CHECK: %[[P:[0-9]+]]:gpr = COPY %r0 %1(s64) = G_LOAD %0(p0) :: (load 8) - ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14, _ + ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14, %noreg %d0 = COPY %1 ; CHECK: %d0 = COPY %[[V]] - BX_RET 14, _, implicit %d0 - ; CHECK: BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + ; CHECK: BX_RET 14, %noreg, implicit %d0 ... --- name: test_stores @@ -995,21 +1217,21 @@ body: | %2(s16) = G_TRUNC %3(s32) G_STORE %1(s8), %0(p0) :: (store 1) - ; CHECK: STRBi12 %[[I8]], %[[P]], 0, 14, _ + ; CHECK: STRBi12 %[[I8]], %[[P]], 0, 14, %noreg G_STORE %2(s16), %0(p0) :: (store 2) - ; CHECK: STRH %[[I16]], %[[P]], _, 0, 14, _ + ; CHECK: STRH %[[I16]], %[[P]], %noreg, 0, 14, %noreg G_STORE %3(s32), %0(p0) :: (store 4) - ; CHECK: STRi12 %[[I32]], %[[P]], 0, 14, _ + ; CHECK: STRi12 %[[I32]], %[[P]], 0, 14, %noreg G_STORE %4(s32), %0(p0) :: (store 4) - ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14, _ + ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14, %noreg G_STORE %5(s64), %0(p0) :: (store 8) - ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14, _ + ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14, %noreg - BX_RET 14, _ + BX_RET 14, %noreg ... --- name: test_gep @@ -1033,10 +1255,10 @@ body: | ; CHECK: %[[OFF:[0-9]+]]:gpr = COPY %r1 %2(p0) = G_GEP %0, %1(s32) - ; CHECK: %[[GEP:[0-9]+]]:gpr = ADDrr %[[PTR]], %[[OFF]], 14, _, _ + ; CHECK: %[[GEP:[0-9]+]]:gpr = ADDrr %[[PTR]], %[[OFF]], 14, %noreg, %noreg %r0 = COPY %2(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_constant_imm @@ -1050,10 +1272,10 @@ registers: body: | bb.0: %0(s32) = G_CONSTANT 42 - ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, _, _ + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, %noreg, %noreg %r0 = COPY %0(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_constant_cimm @@ -1069,10 +1291,93 @@ body: | ; Adding a type on G_CONSTANT changes its operand from an Imm into a CImm. ; We still want to see the same thing in the output though. %0(s32) = G_CONSTANT i32 42 - ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, _, _ + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, %noreg, %noreg %r0 = COPY %0(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pointer_constant_unconstrained +# CHECK-LABEL: name: test_pointer_constant_unconstrained +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } +body: | + bb.0: + %0(p0) = G_CONSTANT i32 0 + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + + ; This leaves %0 unconstrained before the G_CONSTANT is selected. + %r0 = COPY %0(p0) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_pointer_constant_constrained +# CHECK-LABEL: name: test_pointer_constant_constrained +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } +body: | + bb.0: + %0(p0) = G_CONSTANT i32 0 + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 0, 14, %noreg, %noreg + + ; This constrains %0 before the G_CONSTANT is selected. + G_STORE %0(p0), %0(p0) :: (store 4) +... +--- +name: test_inttoptr_s32 +# CHECK-LABEL: name: test_inttoptr_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + %1(p0) = G_INTTOPTR %0(s32) + ; CHECK: [[INT:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[PTR:%[0-9]+]]:gpr = COPY [[INT]] + + %r0 = COPY %1(p0) + ; CHECK: %r0 = COPY [[PTR]] + + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_ptrtoint_s32 +# CHECK-LABEL: name: test_ptrtoint_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(p0) = COPY %r0 + %1(s32) = G_PTRTOINT %0(p0) + ; CHECK: [[PTR:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[INT:%[0-9]+]]:gpr = COPY [[PTR]] + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[INT]] + + BX_RET 14, %noreg, implicit %r0 ... --- name: test_select_s32 @@ -1100,14 +1405,14 @@ body: | ; CHECK: [[VREGC:%[0-9]+]]:gpr = COPY [[VREGY]] %3(s32) = G_SELECT %2(s1), %0, %1 - ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr + ; CHECK: CMPri [[VREGC]], 0, 14, %noreg, implicit-def %cpsr ; CHECK: [[RES:%[0-9]+]]:gpr = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[RES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_select_ptr @@ -1139,14 +1444,14 @@ body: | ; CHECK: [[VREGD:%[0-9]+]]:gpr = COPY [[VREGC]] %4(p0) = G_SELECT %3(s1), %0, %1 - ; CHECK: CMPri [[VREGD]], 0, 14, _, implicit-def %cpsr + ; CHECK: CMPri [[VREGD]], 0, 14, %noreg, implicit-def %cpsr ; CHECK: [[RES:%[0-9]+]]:gpr = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr %r0 = COPY %4(p0) ; CHECK: %r0 = COPY [[RES]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_br @@ -1170,8 +1475,8 @@ body: | ; CHECK: [[COND:%[0-9]+]]:gpr = COPY [[COND32]] G_BRCOND %1(s1), %bb.1 - ; CHECK: TSTri [[COND]], 1, 14, _, implicit-def %cpsr - ; CHECK: Bcc %bb.1, 0, %cpsr + ; CHECK: TSTri [[COND]], 1, 14, %noreg, implicit-def %cpsr + ; CHECK: Bcc %bb.1, 1, %cpsr G_BR %bb.2 ; CHECK: B %bb.2 @@ -1185,8 +1490,100 @@ body: | bb.2: ; CHECK: bb.2 - BX_RET 14, _ - ; CHECK: BX_RET 14, _ + BX_RET 14, %noreg + ; CHECK: BX_RET 14, %noreg +... +--- +name: test_phi_s32 +# CHECK-LABEL: name: test_phi_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + ; CHECK: [[BB1:bb.0]]: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s32) = COPY %r1 + %3(s32) = COPY %r2 + ; CHECK: [[V1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[V2:%[0-9]+]]:gpr = COPY %r2 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + ; CHECK: [[BB2:bb.1]]: + successors: %bb.2(0x80000000) + + G_BR %bb.2 + ; CHECK: B %bb.2 + + bb.2: + ; CHECK: bb.2 + %4(s32) = G_PHI %2(s32), %bb.0, %3(s32), %bb.1 + ; CHECK: {{%[0-9]+}}:gpr = PHI [[V1]], %[[BB1]], [[V2]], %[[BB2]] + + %r0 = COPY %4(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_phi_s64 +# CHECK-LABEL: name: test_phi_s64 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: fprb } + - { id: 3, class: fprb } + - { id: 4, class: fprb } +body: | + bb.0: + ; CHECK: [[BB1:bb.0]]: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %r0, %d0, %d1 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s64) = COPY %d0 + %3(s64) = COPY %d1 + ; CHECK: [[V1:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[V2:%[0-9]+]]:dpr = COPY %d1 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + ; CHECK: [[BB2:bb.1]]: + successors: %bb.2(0x80000000) + + G_BR %bb.2 + ; CHECK: B %bb.2 + + bb.2: + ; CHECK: bb.2 + %4(s64) = G_PHI %2(s64), %bb.0, %3(s64), %bb.1 + ; CHECK: {{%[0-9]+}}:dpr = PHI [[V1]], %[[BB1]], [[V2]], %[[BB2]] + + %d0 = COPY %4(s64) + BX_RET 14, %noreg, implicit %d0 ... --- name: test_soft_fp_double @@ -1223,6 +1620,6 @@ body: | %r1 = COPY %4 ; CHECK: %r1 = COPY [[OUT2]] - BX_RET 14, _, implicit %r0, implicit %r1 - ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 + ; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 0994455916ed..9c070e858b90 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1,9 +1,10 @@ ; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; XFAIL: armeb define void @test_void_return() { ; CHECK-LABEL: name: test_void_return -; CHECK: BX_RET 14, _ +; CHECK: BX_RET 14, %noreg entry: ret void } @@ -18,7 +19,7 @@ define signext i1 @test_add_i1(i1 %x, i1 %y) { ; CHECK: [[SUM:%[0-9]+]]:_(s1) = G_ADD [[VREGX]], [[VREGY]] ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_SEXT [[SUM]] ; CHECK: %r0 = COPY [[EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i1 %x, %y ret i1 %sum @@ -34,7 +35,7 @@ define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGX]], [[VREGY]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] ; CHECK: %r0 = COPY [[SUM_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i8 %x, %y ret i8 %sum @@ -50,7 +51,7 @@ define i8 @test_sub_i8(i8 %x, i8 %y) { ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_SUB [[VREGX]], [[VREGY]] ; CHECK: [[RES_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] ; CHECK: %r0 = COPY [[RES_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %res = sub i8 %x, %y ret i8 %res @@ -63,7 +64,7 @@ define signext i8 @test_return_sext_i8(i8 %x) { ; CHECK: [[VREG:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR0]] ; CHECK: [[VREGEXT:%[0-9]+]]:_(s32) = G_SEXT [[VREG]] ; CHECK: %r0 = COPY [[VREGEXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: ret i8 %x } @@ -78,7 +79,7 @@ define i16 @test_add_i16(i16 %x, i16 %y) { ; CHECK: [[SUM:%[0-9]+]]:_(s16) = G_ADD [[VREGX]], [[VREGY]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] ; CHECK: %r0 = COPY [[SUM_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i16 %x, %y ret i16 %sum @@ -94,7 +95,7 @@ define i16 @test_sub_i16(i16 %x, i16 %y) { ; CHECK: [[RES:%[0-9]+]]:_(s16) = G_SUB [[VREGX]], [[VREGY]] ; CHECK: [[RES_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] ; CHECK: %r0 = COPY [[RES_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %res = sub i16 %x, %y ret i16 %res @@ -107,7 +108,7 @@ define zeroext i16 @test_return_zext_i16(i16 %x) { ; CHECK: [[VREG:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR0]] ; CHECK: [[VREGEXT:%[0-9]+]]:_(s32) = G_ZEXT [[VREG]] ; CHECK: %r0 = COPY [[VREGEXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: ret i16 %x } @@ -119,7 +120,7 @@ define i32 @test_add_i32(i32 %x, i32 %y) { ; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[VREGX]], [[VREGY]] ; CHECK: %r0 = COPY [[SUM]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i32 %x, %y ret i32 %sum @@ -132,7 +133,7 @@ define i32 @test_sub_i32(i32 %x, i32 %y) { ; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SUB [[VREGX]], [[VREGY]] ; CHECK: %r0 = COPY [[RES]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %res = sub i32 %x, %y ret i32 %res @@ -149,7 +150,7 @@ define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5 ; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]]{{.*}}load 4 ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[VREGP2]], [[VREGP5]] ; CHECK: %r0 = COPY [[SUM]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i32 %p2, %p5 ret i32 %sum @@ -170,7 +171,7 @@ define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[SUM:%[0-9]+]]:_(s16) = G_ADD [[VREGP1]], [[VREGP5]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] ; CHECK: %r0 = COPY [[SUM_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i16 %p1, %p5 ret i16 %sum @@ -191,7 +192,7 @@ define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[VREGP4]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] ; CHECK: %r0 = COPY [[SUM_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i8 %p2, %p4 ret i8 %sum @@ -211,7 +212,7 @@ define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[VREGP4]] ; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] ; CHECK: %r0 = COPY [[SUM_EXT]](s32) -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %sum = add i8 %p2, %p4 ret i8 %sum @@ -229,7 +230,7 @@ define zeroext i16 @test_stack_args_extend_the_extended(i32 %p0, i16 %p1, i8 %p2 ; CHECK: [[VREGP5:%[0-9]+]]:_(s16) = G_TRUNC [[VREGP5SEXT]] ; CHECK: [[VREGP5ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[VREGP5]] ; CHECK: %r0 = COPY [[VREGP5ZEXT]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: ret i16 %p5 } @@ -251,7 +252,7 @@ define i32* @test_ptr_ret(i32** %p) { ; CHECK: [[VREGP:%[0-9]+]]:_(p0) = COPY %r0 ; CHECK: [[VREGV:%[0-9]+]]:_(p0) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %v = load i32*, i32** %p ret i32* %v @@ -266,7 +267,7 @@ define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) { ; CHECK: [[VREGP:%[0-9]+]]:_(p0) = G_LOAD [[FIP]](p0){{.*}}load 4 ; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %v = load i32, i32* %p ret i32 %v @@ -284,7 +285,7 @@ define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2, ; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 ; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGP5]] ; CHECK: %r0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %v = fadd float %p1, %p5 ret float %v @@ -313,7 +314,7 @@ define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2, ; CHECK: [[VREGQ1:%[0-9]+]]:_(s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4 ; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %s0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %s0 +; CHECK: BX_RET 14, %noreg, implicit %s0 entry: %v = fadd float %p1, %q1 ret float %v @@ -334,7 +335,7 @@ define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double ; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %d0 +; CHECK: BX_RET 14, %noreg, implicit %d0 entry: %v = fadd double %p1, %q1 ret double %v @@ -360,7 +361,7 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %v = fadd double %p1, %p5 ret double %v @@ -382,7 +383,7 @@ define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler, ; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] -; CHECK: BX_RET 14, _, implicit %d0 +; CHECK: BX_RET 14, %noreg, implicit %d0 entry: %v = fadd double %p1, %q1 ret double %v @@ -405,7 +406,7 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %v = fadd double %p0, %p1 ret double %v @@ -428,7 +429,7 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, ; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %v = fadd double %p0, %p1 ret double %v diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 50c4e7232518..7162815a7f70 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -35,7 +35,7 @@ entry: define zeroext i8 @test_ext_i8(i8 %x) { ; CHECK-LABEL: test_ext_i8: -; CHECK: and r0, r0, #255 +; CHECK: uxtb r0, r0 ; CHECK: bx lr entry: @@ -442,7 +442,7 @@ define arm_aapcscc void @test_brcond(i32 %n) { ; CHECK: cmp r0 ; CHECK-NEXT: movgt [[RCMP:r[0-9]+]], #1 ; CHECK: tst [[RCMP]], #1 -; CHECK-NEXT: bne [[FALSE:.L[[:alnum:]_]+]] +; CHECK-NEXT: beq [[FALSE:.L[[:alnum:]_]+]] ; CHECK: bl brcond1 ; CHECK: [[FALSE]]: ; CHECK: bl brcond2 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir index 6596036ab693..941b7aa55d68 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir @@ -46,16 +46,16 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV %2(s32) = G_SDIV %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_udiv_i32 @@ -82,16 +82,16 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV %2(s32) = G_UDIV %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_sdiv_i16 @@ -133,9 +133,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV @@ -145,7 +145,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_udiv_i16 @@ -185,9 +185,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV @@ -197,7 +197,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_ZEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_sdiv_i8 @@ -239,9 +239,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV @@ -251,7 +251,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_udiv_i8 @@ -291,9 +291,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 - ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV @@ -303,7 +303,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_ZEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_srem_i32 @@ -332,16 +332,16 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM %2(s32) = G_SREM %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_urem_i32 @@ -370,16 +370,16 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM %2(s32) = G_UREM %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_srem_i16 @@ -423,9 +423,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM @@ -435,7 +435,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_urem_i16 @@ -477,9 +477,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM @@ -489,7 +489,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_ZEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_srem_i8 @@ -533,9 +533,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM @@ -545,7 +545,7 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_urem_i8 @@ -587,9 +587,9 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 - ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM @@ -599,5 +599,5 @@ body: | ; CHECK: %r0 = COPY [[R]] %5(s32) = G_ZEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index cd02da286d2a..297eb6f28f6d 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -20,6 +20,15 @@ define void @test_fdiv_float() { ret void } define void @test_fdiv_double() { ret void } + define void @test_fconstant_float() { ret void } + define void @test_fconstant_double() { ret void } + + define void @test_fneg_float() { ret void } + define void @test_fneg_double() { ret void } + + define void @test_fpext_float_to_double() { ret void } + define void @test_fptrunc_double_to_float() { ret void } + define void @test_fcmp_true_s32() { ret void } define void @test_fcmp_false_s32() { ret void } @@ -84,8 +93,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[Y]] ; HARD-DAG: %s0 = COPY [[X]] ; HARD-DAG: %s1 = COPY [[Y]] - ; SOFT: BL $fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; HARD: BL $fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: BL &fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BL &fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; HARD: [[R:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: ADJCALLSTACKUP @@ -93,7 +102,7 @@ body: | %2(s32) = G_FREM %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_frem_double @@ -143,15 +152,15 @@ body: | ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] ; HARD-DAG: %d0 = COPY [[X]] ; HARD-DAG: %d1 = COPY [[Y]] - ; SOFT: BL $fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; HARD: BL $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; SOFT: BL &fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BL &fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FREM %6(s64) = G_FREM %4, %5 %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- name: test_fpow_float @@ -179,8 +188,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[Y]] ; HARD-DAG: %s0 = COPY [[X]] ; HARD-DAG: %s1 = COPY [[Y]] - ; SOFT: BL $powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; HARD: BL $powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: BL &powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BL &powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; HARD: [[R:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: ADJCALLSTACKUP @@ -188,7 +197,7 @@ body: | %2(s32) = G_FPOW %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fpow_double @@ -238,15 +247,15 @@ body: | ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] ; HARD-DAG: %d0 = COPY [[X]] ; HARD-DAG: %d1 = COPY [[Y]] - ; SOFT: BL $pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; HARD: BL $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; SOFT: BL &pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BL &pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FPOW %6(s64) = G_FPOW %4, %5 %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- name: test_fadd_float @@ -273,15 +282,15 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FADD %2(s32) = G_FADD %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fadd_double @@ -324,8 +333,8 @@ body: | ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; SOFT-DEFAULT: BL $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FADD %6(s64) = G_FADD %4, %5 @@ -333,7 +342,7 @@ body: | %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- name: test_fsub_float @@ -360,15 +369,15 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fsub, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__subsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fsub, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__subsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FSUB %2(s32) = G_FSUB %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fsub_double @@ -411,8 +420,8 @@ body: | ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dsub, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; SOFT-DEFAULT: BL $__subdf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_dsub, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__subdf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FSUB %6(s64) = G_FSUB %4, %5 @@ -420,7 +429,7 @@ body: | %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- name: test_fmul_float @@ -447,15 +456,15 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fmul, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__mulsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fmul, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__mulsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FMUL %2(s32) = G_FMUL %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fmul_double @@ -498,8 +507,8 @@ body: | ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dmul, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; SOFT-DEFAULT: BL $__muldf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_dmul, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__muldf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FMUL %6(s64) = G_FMUL %4, %5 @@ -507,7 +516,7 @@ body: | %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- name: test_fdiv_float @@ -534,15 +543,15 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fdiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__divsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fdiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__divsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FDIV %2(s32) = G_FDIV %0, %1 ; CHECK: %r0 = COPY [[R]] %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fdiv_double @@ -585,8 +594,8 @@ body: | ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_ddiv, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; SOFT-DEFAULT: BL $__divdf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL &__aeabi_ddiv, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__divdf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FDIV %6(s64) = G_FDIV %4, %5 @@ -594,9 +603,216 @@ body: | %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 +... +--- +name: test_fconstant_float +# CHECK-LABEL: name: test_fconstant_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } +body: | + bb.0: + liveins: + + ; HARD: [[R:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.25 + ; SOFT-NOT: G_FCONSTANT + ; SOFT: [[R:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1080033280 + ; SOFT-NOT: G_FCONSTANT + %0(s32) = G_FCONSTANT float -1.25 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %0(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_fconstant_double +# CHECK-LABEL: name: test_fconstant_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: + + ; HARD: [[R:%[0-9]+]]:_(s64) = G_FCONSTANT double -2.4 + ; SOFT-NOT: G_FCONSTANT + ; SOFT-DAG: [[HI:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1073532109 + ; SOFT-DAG: [[LO:%[0-9]+]]:_(s32) = G_CONSTANT i32 858993459 + ; SOFT-NOT: G_FCONSTANT + %0(s64) = G_FCONSTANT double -2.4 + ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64) + ; SOFT-DAG: %r0 = COPY [[HI]] + ; SOFT-DAG: %r1 = COPY [[LO]] + %1(s32),%2(s32) = G_UNMERGE_VALUES %0(s64) + %r0 = COPY %2(s32) + %r1 = COPY %1(s32) + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... --- +name: test_fneg_float +# CHECK-LABEL: name: test_fneg_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + %0(s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]]:_(s32) = G_FNEG [[X]] + ; SOFT-NOT: G_FNEG + ; SOFT-DAG: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[ZERO]] + ; SOFT-DAG: %r1 = COPY [[X]] + ; SOFT-AEABI: BL &__aeabi_fsub, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__subsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FNEG + %1(s32) = G_FNEG %0 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %1(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_fneg_double +# CHECK-LABEL: name: test_fneg_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) + ; HARD: [[R:%[0-9]+]]:_(s64) = G_FNEG [[X]] + ; SOFT-NOT: G_FNEG + ; SOFT-DAG: [[NEGATIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; SOFT-DAG: [[POSITIVE_ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r{{[0-1]}} = COPY [[NEGATIVE_ZERO]] + ; SOFT-DAG: %r{{[0-1]}} = COPY [[POSITIVE_ZERO]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[X0]] + ; SOFT-DAG: %r{{[2-3]}} = COPY [[X1]] + ; SOFT-AEABI: BL &__aeabi_dsub, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__subdf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FNEG + %3(s64) = G_FNEG %2 + ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64) + %4(s32),%5(s32) = G_UNMERGE_VALUES %3(s64) + %r0 = COPY %4(s32) + %r1 = COPY %5(s32) + BX_RET 14, %noreg, implicit %r0, implicit %r1 +... +--- +name: test_fpext_float_to_double +# CHECK-LABEL: name: test_fpext_float_to_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0 + + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + %0(s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]]:_(s64) = G_FPEXT [[X]] + ; SOFT-NOT: G_FPEXT + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-AEABI: BL &__aeabi_f2d, {{.*}}, implicit %r0, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL &__extendsfdf2, {{.*}}, implicit %r0, implicit-def %r0, implicit-def %r1 + ; SOFT: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FPEXT + %1(s64) = G_FPEXT %0(s32) + ; HARD: G_UNMERGE_VALUES [[R]](s64) + ; SOFT-DAG: %r{{[0-1]}} = COPY [[R0]] + ; SOFT-DAG: %r{{[0-1]}} = COPY [[R1]] + %2(s32), %3(s32) = G_UNMERGE_VALUES %1(s64) + %r0 = COPY %2(s32) + %r1 = COPY %3(s32) + BX_RET 14, %noreg, implicit %r0, implicit %r1 +... +--- +name: test_fptrunc_double_to_float +# CHECK-LABEL: name: test_fptrunc_double_to_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; HARD: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) + ; HARD: [[R:%[0-9]+]]:_(s32) = G_FPTRUNC [[X]] + ; SOFT-NOT: G_FPTRUNC + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X0]] + ; SOFT-DAG: %r1 = COPY [[X1]] + ; SOFT-AEABI: BL &__aeabi_d2f, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__truncdfsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FPTRUNC + %3(s32) = G_FPTRUNC %2(s64) + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %3(s32) + BX_RET 14, %noreg, implicit %r0 +--- +... name: test_fcmp_true_s32 # CHECK-LABEL: name: test_fcmp_true_s32 legalized: false @@ -618,7 +834,7 @@ body: | %2(s1) = G_FCMP floatpred(true), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ; HARD-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 ; HARD-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(true), [[X]](s32), [[Y]] @@ -655,7 +871,7 @@ body: | %2(s1) = G_FCMP floatpred(false), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ; HARD-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 ; HARD-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(false), [[X]](s32), [[Y]] @@ -698,8 +914,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -714,7 +930,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ogt_s32 @@ -744,8 +960,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -760,7 +976,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oge_s32 @@ -790,8 +1006,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -806,7 +1022,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_olt_s32 @@ -836,8 +1052,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -852,7 +1068,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ole_s32 @@ -882,8 +1098,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -898,7 +1114,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ord_s32 @@ -927,8 +1143,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -938,7 +1154,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ugt_s32 @@ -967,8 +1183,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -979,7 +1195,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uge_s32 @@ -1008,8 +1224,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1020,7 +1236,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ult_s32 @@ -1049,8 +1265,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1061,7 +1277,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ule_s32 @@ -1090,8 +1306,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1102,7 +1318,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_une_s32 @@ -1131,8 +1347,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__nesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__nesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1143,7 +1359,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uno_s32 @@ -1173,8 +1389,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1189,7 +1405,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_one_s32 @@ -1219,8 +1435,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1229,8 +1445,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1249,7 +1465,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ueq_s32 @@ -1279,8 +1495,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1289,8 +1505,8 @@ body: | ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1309,7 +1525,7 @@ body: | %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_true_s64 @@ -1358,7 +1574,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_false_s64 @@ -1408,7 +1624,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oeq_s64 @@ -1452,8 +1668,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1468,7 +1684,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ogt_s64 @@ -1512,8 +1728,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1528,7 +1744,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_oge_s64 @@ -1572,8 +1788,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1588,7 +1804,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_olt_s64 @@ -1632,8 +1848,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1648,7 +1864,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ole_s64 @@ -1692,8 +1908,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -1708,7 +1924,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ord_s64 @@ -1751,8 +1967,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1762,7 +1978,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ugt_s64 @@ -1805,8 +2021,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1817,7 +2033,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uge_s64 @@ -1860,8 +2076,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1872,7 +2088,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ult_s64 @@ -1915,8 +2131,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1927,7 +2143,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ule_s64 @@ -1970,8 +2186,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -1982,7 +2198,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_une_s64 @@ -2025,8 +2241,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__nedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__nedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2037,7 +2253,7 @@ body: | ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_uno_s64 @@ -2081,8 +2297,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; For aeabi, we just need to truncate the result. The combiner changes the @@ -2097,7 +2313,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_one_s64 @@ -2141,8 +2357,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2153,8 +2369,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2173,7 +2389,7 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_fcmp_ueq_s64 @@ -2217,8 +2433,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2229,8 +2445,8 @@ body: | ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-AEABI: BL &__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL &__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -2249,5 +2465,5 @@ body: | %7(s32) = G_ZEXT %6(s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir new file mode 100644 index 000000000000..5fe0d86b2b4c --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir @@ -0,0 +1,121 @@ +# RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp4 -float-abi=hard -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD +# RUN: llc -mtriple arm-linux-gnueabihf -mattr=+vfp2 -float-abi=hard -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix HARD-ABI +# RUN: llc -mtriple arm-linux-gnueabi -mattr=+vfp4,+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-ABI +# RUN: llc -mtriple arm-linux-gnu -mattr=+vfp4,+soft-float -float-abi=soft -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT -check-prefix SOFT-ABI +--- | + define void @test_fma_float() { ret void } + define void @test_fma_double() { ret void } +... +--- +name: test_fma_float +# CHECK-LABEL: name: test_fma_float +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Z:%[0-9]+]]:_(s32) = COPY %r2 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; HARD: [[R:%[0-9]+]]:_(s32) = G_FMA [[X]], [[Y]], [[Z]] + ; SOFT-NOT: G_FMA + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-ABI-DAG: %r0 = COPY [[X]] + ; SOFT-ABI-DAG: %r1 = COPY [[Y]] + ; SOFT-ABI-DAG: %r2 = COPY [[Z]] + ; SOFT-ABI: BL &fmaf, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit-def %r0 + ; SOFT-ABI: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; HARD-ABI-DAG: %s0 = COPY [[X]] + ; HARD-ABI-DAG: %s1 = COPY [[Y]] + ; HARD-ABI-DAG: %s2 = COPY [[Z]] + ; HARD-ABI: BL &fmaf, {{.*}}, implicit %s0, implicit %s1, implicit %s2, implicit-def %s0 + ; HARD-ABI: [[R:%[0-9]+]]:_(s32) = COPY %s0 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FMA + %3(s32) = G_FMA %0, %1, %2 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %3(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_fma_double +# CHECK-LABEL: name: test_fma_double +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2, %r3 + + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + %3(s32) = COPY %r3 + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]] + ; HARD-ABI-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + ; HARD-ABI-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) + ; HARD: [[R:%[0-9]+]]:_(s64) = G_FMA [[X]], [[X]], [[Y]] + ; SOFT-NOT: G_FMA + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-ABI-DAG: %r{{[0-1]}} = COPY [[X0]] + ; SOFT-ABI-DAG: %r{{[0-1]}} = COPY [[X1]] + ; SOFT-ABI-DAG: %r{{[2-3]}} = COPY [[X0]] + ; SOFT-ABI-DAG: %r{{[2-3]}} = COPY [[X1]] + ; SOFT-ABI: [[SP1:%[0-9]+]]:_(p0) = COPY %sp + ; SOFT-ABI: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-ABI: [[FI1:%[0-9]+]]:_(p0) = G_GEP [[SP1]], [[OFF1]](s32) + ; SOFT-ABI: G_STORE [[Y0]](s32), [[FI1]](p0){{.*}}store 8 into stack + ; SOFT-ABI: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; SOFT-ABI: [[FI2:%[0-9]+]]:_(p0) = G_GEP [[FI1]], [[OFF2]](s32) + ; SOFT-ABI: G_STORE [[Y1]](s32), [[FI2]](p0){{.*}}store 8 into stack + ; SOFT-ABI: BL &fma, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-ABI-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-ABI-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; HARD-ABI-DAG: %d0 = COPY [[X]] + ; HARD-ABI-DAG: %d1 = COPY [[X]] + ; HARD-ABI-DAG: %d2 = COPY [[Y]] + ; HARD-ABI: BL &fma, {{.*}}, implicit %d0, implicit %d1, implicit %d2, implicit-def %d0 + ; HARD-ABI: [[R:%[0-9]+]]:_(s64) = COPY %d0 + ; SOFT: ADJCALLSTACKUP + ; SOFT-NOT: G_FMA + %6(s64) = G_FMA %4, %4, %5 + ; HARD: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64) + ; HARD-ABI: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64) + %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) + ; CHECK-DAG: %r0 = COPY [[R0]] + ; CHECK-DAG: %r1 = COPY [[R1]] + %r0 = COPY %7(s32) + %r1 = COPY %8(s32) + BX_RET 14, %noreg, implicit %r0, implicit %r1 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index 816c042a6d5b..d88f48c2654e 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -3,6 +3,9 @@ define void @test_sext_s8() { ret void } define void @test_zext_s16() { ret void } + define void @test_inttoptr_s32() { ret void } + define void @test_ptrtoint_s32() { ret void } + define void @test_add_s8() { ret void } define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } @@ -47,6 +50,11 @@ define void @test_brcond() { ret void } + define void @test_phi_s32() { ret void } + define void @test_phi_p0() { ret void } + define void @test_phi_s64() #0 { ret void } + define void @test_phi_s8() { ret void } + @a_global = global i32 42 define void @test_global_variable() { ret void } @@ -74,7 +82,7 @@ body: | ; G_SEXT with s8 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_SEXT {{%[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_zext_s16 @@ -98,7 +106,51 @@ body: | ; G_ZEXT with s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_ZEXT {{%[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_inttoptr_s32 +# CHECK-LABEL: name: test_inttoptr_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + %1(p0) = G_INTTOPTR %0(s32) + ; G_INTTOPTR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}:_(p0) = G_INTTOPTR {{%[0-9]+}} + %r0 = COPY %1(p0) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_ptrtoint_s32 +# CHECK-LABEL: name: test_ptrtoint_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(p0) = COPY %r0 + %1(s32) = G_PTRTOINT %0(p0) + ; G_PTRTOINT with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}:_(s32) = G_PTRTOINT {{%[0-9]+}} + %r0 = COPY %1(s32) + BX_RET 14, %noreg, implicit %r0 ... --- name: test_add_s8 @@ -130,7 +182,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_ADD {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_add_s16 @@ -162,7 +214,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_ADD {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_add_s32 @@ -186,7 +238,7 @@ body: | ; G_ADD with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -219,7 +271,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_SUB {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_sub_s16 @@ -251,7 +303,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_SUB {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_sub_s32 @@ -275,7 +327,7 @@ body: | ; G_SUB with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_SUB {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -308,7 +360,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_MUL {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_mul_s16 @@ -340,7 +392,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_MUL {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_mul_s32 @@ -364,7 +416,7 @@ body: | ; G_MUL with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_MUL {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -397,7 +449,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_AND {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_and_s16 @@ -429,7 +481,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_AND {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_and_s32 @@ -453,7 +505,7 @@ body: | ; G_AND with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_AND {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -486,7 +538,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_OR {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_or_s16 @@ -518,7 +570,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_OR {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_or_s32 @@ -542,7 +594,7 @@ body: | ; G_OR with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_OR {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -575,7 +627,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_XOR {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s8) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_xor_s16 @@ -607,7 +659,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_XOR {{%[0-9]+, %[0-9]+}} %5(s32) = G_SEXT %4(s16) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_xor_s32 @@ -631,7 +683,7 @@ body: | ; G_XOR with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_XOR {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -656,7 +708,7 @@ body: | ; G_LSHR with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_LSHR {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -681,7 +733,7 @@ body: | ; G_ASHR with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_ASHR {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -706,7 +758,7 @@ body: | ; G_SHL with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_SHL {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -737,7 +789,7 @@ body: | %0(p0) = G_FRAME_INDEX %fixed-stack.2 %1(s32) = G_LOAD %0(p0) :: (load 4) %r0 = COPY %1(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_legal_loads_stores @@ -785,7 +837,7 @@ body: | G_STORE %5(s1), %0(p0) :: (store 1) %6(p0) = G_LOAD %0(p0) :: (load 4) G_STORE %6(p0), %0(p0) :: (store 4) - BX_RET 14, _ + BX_RET 14, %noreg ... --- name: test_gep @@ -810,7 +862,7 @@ body: | %2(p0) = G_GEP %0, %1(s32) %r0 = COPY %2(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_constants @@ -826,6 +878,10 @@ registers: - { id: 2, class: _ } - { id: 3, class: _ } - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } body: | bb.0: liveins: %r0 @@ -856,8 +912,20 @@ body: | ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 + %5(p0) = G_CONSTANT 0 + G_STORE %5(p0), %4(p0) :: (store 4) + ; CHECK: {{%[0-9]+}}:_(p0) = G_CONSTANT 0 + + %6(s64) = G_CONSTANT i64 17179869200 ; = 4 * 2 ^ 32 + 16 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) + G_STORE %7(s32), %4(p0) :: (store 4) + G_STORE %8(s32), %4(p0) :: (store 4) + ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 4 + ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 16 + ; CHECK-NOT: G_CONSTANT i64 + %r0 = COPY %0(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_s8 @@ -888,7 +956,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s8), {{%[0-9]+}} %5(s32) = G_ZEXT %4(s1) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_s16 @@ -919,7 +987,7 @@ body: | ; CHECK-NOT: {{%[0-9]+}}:_(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s16), {{%[0-9]+}} %5(s32) = G_ZEXT %4(s1) %r0 = COPY %5(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_icmp_s32 @@ -945,7 +1013,7 @@ body: | ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), {{%[0-9]+}} %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_select_s32 @@ -971,7 +1039,7 @@ body: | ; G_SELECT with s32 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s32) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_select_ptr @@ -997,7 +1065,7 @@ body: | ; G_SELECT with p0 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(p0) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} %r0 = COPY %3(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_brcond @@ -1026,12 +1094,177 @@ body: | bb.1: %r0 = COPY %1(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 bb.2: %r0 = COPY %0(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + +... +--- +name: test_phi_s32 +# CHECK-LABEL: name: test_phi_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s32) = COPY %r1 + %3(s32) = COPY %r2 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + G_BR %bb.2 + + bb.2: + %4(s32) = G_PHI %2(s32), %bb.0, %3(s32), %bb.1 + ; G_PHI with s32 is legal, so we should find it unchanged in the output + ; CHECK: G_PHI {{%[0-9]+}}(s32), %bb.0, {{%[0-9]+}}(s32), %bb.1 + %r0 = COPY %4(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_phi_p0 +# CHECK-LABEL: name: test_phi_p0 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(p0) = COPY %r1 + %3(p0) = COPY %r2 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + G_BR %bb.2 + + bb.2: + %4(p0) = G_PHI %2(p0), %bb.0, %3(p0), %bb.1 + ; G_PHI with p0 is legal, so we should find it unchanged in the output + ; CHECK: G_PHI {{%[0-9]+}}(p0), %bb.0, {{%[0-9]+}}(p0), %bb.1 + %r0 = COPY %4(p0) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_phi_s64 +# CHECK-LABEL: name: test_phi_s64 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +body: | + bb.0: + liveins: %r0, %d0, %d1 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s64) = COPY %d0 + %3(s64) = COPY %d1 + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + G_BR %bb.2 + + bb.2: + %4(s64) = G_PHI %2(s64), %bb.0, %3(s64), %bb.1 + ; G_PHI with s64 is legal when we have floating point support, so we should + ; find it unchanged in the output + ; CHECK: G_PHI {{%[0-9]+}}(s64), %bb.0, {{%[0-9]+}}(s64), %bb.1 + %d0 = COPY %4(s64) + BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_phi_s8 +# CHECK-LABEL: name: test_phi_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s32) = COPY %r1 + %3(s8) = G_TRUNC %2(s32) + ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + + %4(s32) = COPY %r2 + %5(s8) = G_TRUNC %4(s32) + ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %r2 + + ; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY [[R1]] + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + ; CHECK: [[V2:%[0-9]+]]:_(s32) = COPY [[R2]] + G_BR %bb.2 + + bb.2: + %6(s8) = G_PHI %3(s8), %bb.0, %5(s8), %bb.1 + ; G_PHI with s8 should widen, and all the truncs and exts should be combined + ; away into a bunch of redundant copies + ; CHECK: [[V:%[0-9]+]]:_(s32) = G_PHI [[V1]](s32), %bb.0, [[V2]](s32), %bb.1 + + %7(s32) = G_ANYEXT %6(s8) + %r0 = COPY %7(s32) + ; CHECK: [[R:%[0-9]+]]:_(s32) = COPY [[V]] + ; CHECK: %r0 = COPY [[R]](s32) + BX_RET 14, %noreg, implicit %r0 ... --- name: test_global_variable @@ -1053,6 +1286,6 @@ body: | ; G_GLOBAL_VALUE is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(p0) = G_GLOBAL_VALUE @a_global %r0 = COPY %1(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll index 92c4e2905d88..e7aaa74fb982 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -global-isel-abort=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; XFAIL: armeb declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*) @@ -7,14 +8,14 @@ define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) { ; CHECK-LABEL: name: test_call_simple_reg_params ; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY %r0 ; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] ; CHECK-DAG: %r1 = COPY [[AVREG]] ; CHECK: BL @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0 ; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY %r0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: %r0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %r = notail call arm_aapcscc i32 *@simple_reg_params_target(i32 %b, i32 *%a) ret i32 *%r @@ -26,7 +27,7 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { ; CHECK-LABEL: name: test_call_simple_stack_params ; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY %r0 ; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] ; CHECK-DAG: %r1 = COPY [[AVREG]] ; CHECK-DAG: %r2 = COPY [[BVREG]] @@ -41,9 +42,9 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { ; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 ; CHECK: BL @simple_stack_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY %r0 -; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 8, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: %r0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %r = notail call arm_aapcscc i32 *@simple_stack_params_target(i32 %b, i32 *%a, i32 %b, i32 *%a, i32 %b, i32 *%a) ret i32 *%r @@ -59,7 +60,7 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { ; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s16) = G_TRUNC [[R1VREG]] ; CHECK-DAG: [[R2VREG:%[0-9]+]]:_(s32) = COPY %r2 ; CHECK-DAG: [[CVREG:%[0-9]+]]:_(s1) = G_TRUNC [[R2VREG]] -; CHECK: ADJCALLSTACKDOWN 20, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 20, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[SEXTA:%[0-9]+]]:_(s32) = G_SEXT [[AVREG]](s8) ; CHECK: %r0 = COPY [[SEXTA]] ; CHECK: [[ZEXTA:%[0-9]+]]:_(s32) = G_ZEXT [[AVREG]](s8) @@ -96,10 +97,10 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { ; CHECK: BL @ext_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 ; CHECK: [[R0VREG:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK: [[RVREG:%[0-9]+]]:_(s16) = G_TRUNC [[R0VREG]] -; CHECK: ADJCALLSTACKUP 20, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 20, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[RExtVREG:%[0-9]+]]:_(s32) = G_SEXT [[RVREG]] ; CHECK: %r0 = COPY [[RExtVREG]] -; CHECK: BX_RET 14, _, implicit %r0 +; CHECK: BX_RET 14, %noreg, implicit %r0 entry: %r = notail call arm_aapcscc signext i16 @ext_target(i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i1 zeroext %c) ret i16 %r @@ -111,14 +112,14 @@ define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) { ; CHECK-LABEL: name: test_call_vfpcc_fp_params ; CHECK-DAG: [[AVREG:%[0-9]+]]:_(s64) = COPY %d0 ; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %s2 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK-DAG: %s0 = COPY [[BVREG]] ; CHECK-DAG: %d1 = COPY [[AVREG]] ; CHECK: BL @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0 ; CHECK: [[RVREG:%[0-9]+]]:_(s64) = COPY %d0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: %d0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %d0 +; CHECK: BX_RET 14, %noreg, implicit %d0 entry: %r = notail call arm_aapcs_vfpcc double @vfpcc_fp_target(float %b, double %a) ret double %r @@ -133,7 +134,7 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; LITTLE-DAG: [[AVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32) ; BIG-DAG: [[AVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32) ; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r2 -; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 16, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] ; CHECK-DAG: [[A1:%[0-9]+]]:_(s32), [[A2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AVREG]](s64) ; LITTLE-DAG: %r2 = COPY [[A1]] @@ -153,13 +154,13 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY %r1 ; LITTLE: [[RVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32) ; BIG: [[RVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 16, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RVREG]](s64) ; LITTLE-DAG: %r0 = COPY [[R1]] ; LITTLE-DAG: %r1 = COPY [[R2]] ; BIG-DAG: %r0 = COPY [[R2]] ; BIG-DAG: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %r = notail call arm_aapcscc double @aapcscc_fp_target(float %b, double %a, float %b, double %a) ret double %r @@ -170,13 +171,13 @@ declare arm_aapcscc float @different_call_conv_target(float) define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) { ; CHECK-LABEL: name: test_call_different_call_conv ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY %s0 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: %r0 = COPY [[X]] ; CHECK: BL @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0 ; CHECK: [[R:%[0-9]+]]:_(s32) = COPY %r0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: %s0 = COPY [[R]] -; CHECK: BX_RET 14, _, implicit %s0 +; CHECK: BX_RET 14, %noreg, implicit %s0 entry: %r = notail call arm_aapcscc float @different_call_conv_target(float %x) ret float %r @@ -190,7 +191,7 @@ define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64) ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] @@ -199,7 +200,7 @@ define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %r2 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32) -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s96) ; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say ; that composite types larger than 4 bytes should be passed through memory), @@ -207,7 +208,7 @@ define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] ; CHECK: %r2 = COPY [[R2]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1, implicit %r2 entry: %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) ret [3 x i32] %r @@ -224,7 +225,7 @@ define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %ar ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY %r3 ; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) ; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64) ; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64) ; CHECK: %r0 = COPY [[R0]] @@ -232,8 +233,8 @@ define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %ar ; CHECK: %r2 = COPY [[R2]] ; CHECK: %r3 = COPY [[R3]] ; CHECK: BL @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, %noreg entry: notail call arm_aapcscc void @multiple_int_arrays_target([2 x i32] %arr0, [2 x i32] %arr1) ret void @@ -258,7 +259,7 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: ADJCALLSTACKDOWN 64, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 64, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640) ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] @@ -275,8 +276,8 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { ; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) ; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 ; CHECK: BL @large_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 -; CHECK: ADJCALLSTACKUP 64, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ +; CHECK: ADJCALLSTACKUP 64, 0, 14, %noreg, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, %noreg entry: notail call arm_aapcscc void @large_int_arrays_target([20 x i32] %arr) ret void @@ -300,7 +301,7 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] ; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) -; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192) ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64) ; LITTLE: %r0 = COPY [[ARR0_0]](s32) @@ -320,11 +321,11 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 8, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s64) ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) ret [2 x float] %r @@ -357,7 +358,7 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 ; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) ; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) ; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) -; CHECK: ADJCALLSTACKDOWN 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 32, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[X_ARR]](s192) ; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[Y_ARR]](s96) ; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[Z_ARR]](s256) @@ -389,13 +390,13 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %s2 ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY %s3 ; CHECK: [[R_MERGED:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32) -; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 32, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s128) ; CHECK: %s0 = COPY [[R0]] ; CHECK: %s1 = COPY [[R1]] ; CHECK: %s2 = COPY [[R2]] ; CHECK: %s3 = COPY [[R3]] -; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3 +; CHECK: BX_RET 14, %noreg, implicit %s0, implicit %s1, implicit %s2, implicit %s3 entry: %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) ret [4 x float] %r @@ -420,7 +421,7 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] ; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: ADJCALLSTACKDOWN 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 80, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768) ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] @@ -440,11 +441,11 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[RES_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 80, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s64) ; CHECK: %r0 = COPY [[R0]] ; CHECK: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 entry: %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) ret [2 x i32*] %r @@ -458,7 +459,7 @@ define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[X]](s64) ; CHECK-DAG: %r0 = COPY [[X0]](s32) ; CHECK-DAG: %r1 = COPY [[X1]](s32) @@ -466,11 +467,11 @@ define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 ; CHECK: [[R:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, %noreg, implicit-def %sp, implicit %sp ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64) ; CHECK: %r0 = COPY [[R0]](s32) ; CHECK: %r1 = COPY [[R1]](s32) -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +; CHECK: BX_RET 14, %noreg, implicit %r0, implicit %r1 %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x) ret {i32, i32} %r } diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index 986f4a5ae489..6273e7a72c31 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -24,6 +24,9 @@ define void @test_constants() { ret void } + define void @test_inttoptr_s32() { ret void } + define void @test_ptrtoint_s32() { ret void } + @a_global = global float 1.0 define void @test_globals() { ret void } @@ -31,6 +34,7 @@ define void @test_anyext_s16_32() { ret void } define void @test_trunc_s32_16() { ret void } + define void @test_trunc_s64_32() #0 { ret void } define void @test_icmp_eq_s32() { ret void } define void @test_fcmp_one_s32() #0 { ret void } @@ -40,6 +44,9 @@ define void @test_br() { ret void } + define void @test_phi_s32() { ret void } + define void @test_phi_s64() #0 { ret void } + define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } @@ -52,10 +59,20 @@ define void @test_fdiv_s32() #0 { ret void } define void @test_fdiv_s64() #0 { ret void } + define void @test_fneg_s32() #0 { ret void } + define void @test_fneg_s64() #0 { ret void } + + define void @test_fma_s32() #2 { ret void } + define void @test_fma_s64() #2 { ret void } + + define void @test_fpext_s32_to_s64() #0 { ret void } + define void @test_fptrunc_s64_to_s32() #0 { ret void } + define void @test_soft_fp_s64() #0 { ret void } attributes #0 = { "target-features"="+vfp2"} attributes #1 = { "target-features"="+hwdiv-arm" } + attributes #2 = { "target-features"="+vfp4"} ... --- name: test_add_s32 @@ -80,7 +97,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_ADD %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -106,7 +123,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_SUB %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -132,7 +149,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_MUL %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -158,7 +175,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_SDIV %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -184,7 +201,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_UDIV %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -210,7 +227,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_AND %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -236,7 +253,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_OR %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -262,7 +279,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_XOR %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -288,7 +305,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_LSHR %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -314,7 +331,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_ASHR %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -340,7 +357,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_SHL %0, %1 %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -376,7 +393,7 @@ body: | %3(s8) = G_LOAD %0 :: (load 1) %4(s1) = G_LOAD %0 :: (load 1) %5(p0) = G_LOAD %0 :: (load 4) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -418,7 +435,7 @@ body: | G_STORE %5(p0), %0 :: (store 4) %6(s64) = COPY %d6 G_STORE %6(s64), %0 :: (store 8) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -451,7 +468,7 @@ body: | %4(p0) = G_GEP %2, %3(s32) G_STORE %1(s32), %4(p0) :: (store 4) - BX_RET 14, _ + BX_RET 14, %noreg ... --- @@ -477,7 +494,7 @@ body: | %1(s32) = COPY %r1 %2(p0) = G_GEP %0, %1(s32) %r0 = COPY %2(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_constants @@ -493,7 +510,45 @@ body: | bb.0: %0(s32) = G_CONSTANT 42 %r0 = COPY %0(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_inttoptr_s32 +# CHECK-LABEL: name: test_inttoptr_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + %0(s32) = COPY %r0 + %1(p0) = G_INTTOPTR %0(s32) + %r0 = COPY %1(p0) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_ptrtoint_s32 +# CHECK-LABEL: name: test_ptrtoint_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + %0(p0) = COPY %r0 + %1(s32) = G_PTRTOINT %0(p0) + %r0 = COPY %1(s32) + BX_RET 14, %noreg, implicit %r0 ... --- name: test_globals @@ -509,7 +564,7 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @a_global %r0 = COPY %0(p0) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_anyext_s8_32 @@ -533,7 +588,7 @@ body: | %1(s8) = G_TRUNC %0(s32) %2(s32) = G_ANYEXT %1(s8) %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_anyext_s16_32 @@ -557,7 +612,7 @@ body: | %1(s16) = G_TRUNC %0(s32) %2(s32) = G_ANYEXT %1(s16) %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- name: test_trunc_s32_16 @@ -581,7 +636,31 @@ body: | %2(p0) = COPY %r1 %1(s16) = G_TRUNC %0(s32) G_STORE %1(s16), %2 :: (store 2) - BX_RET 14, _ + BX_RET 14, %noreg +... +--- +name: test_trunc_s64_32 +# CHECK-LABEL: name: test_trunc_s64_32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %d0 + + %0(s64) = COPY %d0 + %2(p0) = COPY %r0 + %1(s32) = G_TRUNC %0(s64) + G_STORE %1(s32), %2 :: (store 4) + BX_RET 14, %noreg ... --- name: test_icmp_eq_s32 @@ -609,7 +688,7 @@ body: | %2(s1) = G_ICMP intpred(eq), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -638,7 +717,7 @@ body: | %2(s1) = G_FCMP floatpred(one), %0(s32), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -667,7 +746,7 @@ body: | %2(s1) = G_FCMP floatpred(ugt), %0(s64), %1 %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -699,7 +778,7 @@ body: | %3(s1) = G_TRUNC %2(s32) %4(s32) = G_SELECT %3(s1), %0, %1 %r0 = COPY %4(s32) - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... --- @@ -727,11 +806,93 @@ body: | G_BR %bb.2 bb.1: - BX_RET 14, _ + BX_RET 14, %noreg + + bb.2: + BX_RET 14, %noreg + +... +--- +name: test_phi_s32 +# CHECK-LABEL: name: test_phi_s32 +legalized: true +regBankSelected: false +# CHECK: regBankSelected: true +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +# CHECK: { id: 0, class: gprb, preferred-register: '' } +# CHECK: { id: 1, class: gprb, preferred-register: '' } +# CHECK: { id: 2, class: gprb, preferred-register: '' } +# CHECK: { id: 3, class: gprb, preferred-register: '' } +# CHECK: { id: 4, class: gprb, preferred-register: '' } +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s32) = COPY %r1 + %3(s32) = COPY %r2 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2(0x80000000) bb.2: - BX_RET 14, _ + %4(s32) = G_PHI %2(s32), %bb.0, %3(s32), %bb.1 + %r0 = COPY %4(s32) + BX_RET 14, %noreg, implicit %r0 +... +--- +name: test_phi_s64 +# CHECK-LABEL: name: test_phi_s64 +legalized: true +regBankSelected: false +# CHECK: regBankSelected: true +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } +# CHECK: { id: 0, class: gprb, preferred-register: '' } +# CHECK: { id: 1, class: gprb, preferred-register: '' } +# CHECK: { id: 2, class: fprb, preferred-register: '' } +# CHECK: { id: 3, class: fprb, preferred-register: '' } +# CHECK: { id: 4, class: fprb, preferred-register: '' } +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: %r0, %d0, %d1 + + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + + %2(s64) = COPY %d0 + %3(s64) = COPY %d1 + + G_BRCOND %1(s1), %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2(0x80000000) + bb.2: + %4(s64) = G_PHI %2(s64), %bb.0, %3(s64), %bb.1 + %d0 = COPY %4(s64) + BX_RET 14, %noreg, implicit %d0 ... --- name: test_fadd_s32 @@ -756,7 +917,7 @@ body: | %1(s32) = COPY %s1 %2(s32) = G_FADD %0, %1 %s0 = COPY %2(s32) - BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 ... --- @@ -782,7 +943,7 @@ body: | %1(s64) = COPY %d1 %2(s64) = G_FADD %0, %1 %d0 = COPY %2(s64) - BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 ... --- @@ -808,7 +969,7 @@ body: | %1(s32) = COPY %s1 %2(s32) = G_FSUB %0, %1 %s0 = COPY %2(s32) - BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 ... --- @@ -834,7 +995,7 @@ body: | %1(s64) = COPY %d1 %2(s64) = G_FSUB %0, %1 %d0 = COPY %2(s64) - BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 ... --- @@ -860,7 +1021,7 @@ body: | %1(s32) = COPY %s1 %2(s32) = G_FMUL %0, %1 %s0 = COPY %2(s32) - BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 ... --- @@ -886,7 +1047,7 @@ body: | %1(s64) = COPY %d1 %2(s64) = G_FMUL %0, %1 %d0 = COPY %2(s64) - BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 ... --- @@ -912,7 +1073,7 @@ body: | %1(s32) = COPY %s1 %2(s32) = G_FDIV %0, %1 %s0 = COPY %2(s32) - BX_RET 14, _, implicit %s0 + BX_RET 14, %noreg, implicit %s0 ... --- @@ -938,8 +1099,148 @@ body: | %1(s64) = COPY %d1 %2(s64) = G_FDIV %0, %1 %d0 = COPY %2(s64) - BX_RET 14, _, implicit %d0 + BX_RET 14, %noreg, implicit %d0 + +... +--- +name: test_fneg_s32 +# CHECK-LABEL: name: test_fneg_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %s0 + + %0(s32) = COPY %s0 + %1(s32) = G_FNEG %0 + %s0 = COPY %1(s32) + BX_RET 14, %noreg, implicit %s0 + +... +--- +name: test_fneg_s64 +# CHECK-LABEL: name: test_fneg_s64 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %d0 + + %0(s64) = COPY %d0 + %1(s64) = G_FNEG %0 + %d0 = COPY %1(s64) + BX_RET 14, %noreg, implicit %d0 + +... +--- +name: test_fma_s32 +# CHECK-LABEL: name: test_fma_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } +# CHECK: - { id: 3, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %s0, %s1, %s2 + + %0(s32) = COPY %s0 + %1(s32) = COPY %s1 + %2(s32) = COPY %s2 + %3(s32) = G_FMA %0, %1, %2 + %s0 = COPY %3(s32) + BX_RET 14, %noreg, implicit %s0 +... +--- +name: test_fma_s64 +# CHECK-LABEL: name: test_fma_s64 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } +# CHECK: - { id: 3, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +body: | + bb.0: + liveins: %d0, %d1, %d2 + + %0(s64) = COPY %d0 + %1(s64) = COPY %d1 + %2(s64) = COPY %d2 + %3(s64) = G_FMA %0, %1, %2 + %d0 = COPY %3(s64) + BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fpext_s32_to_s64 +# CHECK-LABEL: name: test_fpext_s32_to_s64 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %s0 + + %0(s32) = COPY %s0 + %1(s64) = G_FPEXT %0 + %d0 = COPY %1(s64) + BX_RET 14, %noreg, implicit %d0 +... +--- +name: test_fptrunc_s64_to_s32 +# CHECK-LABEL: name: test_fptrunc_s64_to_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %d0 + %0(s64) = COPY %d0 + %1(s32) = G_FPTRUNC %0 + %s0 = COPY %1(s32) + BX_RET 14, %noreg, implicit %s0 ... --- name: test_soft_fp_s64 @@ -970,6 +1271,6 @@ body: | %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) %r0 = COPY %3(s32) %r1 = COPY %4(s32) - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir index 448a7f86e889..60568d5bbeb6 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir @@ -33,13 +33,13 @@ body: | ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @internal_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_external_global @@ -56,16 +56,16 @@ body: | %0(p0) = G_GLOBAL_VALUE @external_global ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_global :: (load 4 from got) ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_global :: (load 4 from got) - ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags() @external_global :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_global :: (load 4 from got) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @external_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_internal_constant @@ -85,13 +85,13 @@ body: | ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_constant %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @internal_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_external_constant @@ -108,14 +108,14 @@ body: | %0(p0) = G_GLOBAL_VALUE @external_constant ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_constant :: (load 4 from got) ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_constant :: (load 4 from got) - ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags() @external_constant :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr target-flags(arm-got) @external_constant :: (load 4 from got) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @external_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir index e80700317e00..dc48dee00c88 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir @@ -37,19 +37,19 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm {{.*}} @internal_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, _, _ + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) + ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, %noreg, %noreg %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @internal_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_external_global @@ -71,19 +71,19 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm {{.*}} @external_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, _, _ + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) + ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, %noreg, %noreg %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @external_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_internal_constant @@ -104,16 +104,16 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @internal_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @internal_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @internal_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_external_constant @@ -134,14 +134,14 @@ body: | ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @external_constant ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @external_constant ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg :: (load 4 from @external_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir index 034b88296dc1..cd03d42e4a54 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir @@ -26,18 +26,18 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @internal_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... --- name: test_external_global @@ -56,16 +56,16 @@ body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global - ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, %noreg :: (load 4 from constant-pool) ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @external_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, %noreg %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 + ; CHECK: BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll index bdba53563905..f9d41d9a38f0 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll @@ -113,4 +113,19 @@ define i32 @test_thread_local_global() { ret i32 %v } +%byval.class = type { i32 } + +define void @test_byval_arg(%byval.class* byval %x) { +; CHECK: remark: {{.*}} unable to lower arguments: void (%byval.class*)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_byval + ret void +} + +define void @test_byval_param(%byval.class* %x) { +; CHECK: remark: {{.*}} unable to translate instruction: call +; CHECK-LABEL: warning: Instruction selection used fallback path for test_byval_param + call void @test_byval_arg(%byval.class* byval %x) + ret void +} + attributes #0 = { "target-features"="+thumb-mode" } diff --git a/test/CodeGen/ARM/GlobalISel/pr35375.ll b/test/CodeGen/ARM/GlobalISel/pr35375.ll new file mode 100644 index 000000000000..ebef54542bc7 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/pr35375.ll @@ -0,0 +1,12 @@ +; RUN: llc -O0 -mtriple armv7-- -stop-before=expand-isel-pseudos < %s +; RUN: llc -O0 -mtriple armv7-- -stop-before=expand-isel-pseudos -global-isel < %s + +; CHECK: PKHBT + +define arm_aapcscc i32 @pkh(i32 %x, i32 %y) { + %andx = and i32 %x, 65535 + %shl = shl i32 %y, 1 + %andy = and i32 %shl, 4294901760 ; same as -65536 + %or = or i32 %andx, %andy + ret i32 %or +} diff --git a/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/test/CodeGen/ARM/GlobalISel/select-pr35926.mir new file mode 100644 index 000000000000..d2b4ffa893c5 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/select-pr35926.mir @@ -0,0 +1,40 @@ +# RUN: llc -mtriple arm-gnueabihf -mattr=+vfp4 -run-pass=instruction-select -global-isel -o - %s | FileCheck %s +--- | + declare double @llvm.fma.f64(double, double, double) #0 + + define double @vfnmsd(double %x, double %y, double %z) #1 { + %minus.y = fsub double -0.000000e+00, %y + %fma = tail call double @llvm.fma.f64(double %x, double %minus.y, double %z) + %minus.fma = fsub double -0.000000e+00, %fma + ret double %minus.fma + } + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { nounwind readnone speculatable "target-features"="+vfp4" } + attributes #1 = { "target-features"="+vfp4" } + attributes #2 = { nounwind } + +... +--- +name: vfnmsd +legalized: true +regBankSelected: true +selected: false +body: | + bb.1 (%ir-block.0): + liveins: %d0, %d1, %d2 + + %0:fprb(s64) = COPY %d0 + %1:fprb(s64) = COPY %d1 + %2:fprb(s64) = COPY %d2 + %3:fprb(s64) = G_FNEG %1 + %4:fprb(s64) = G_FMA %0, %3, %2 + %5:fprb(s64) = G_FNEG %4 + %d0 = COPY %5(s64) + MOVPCLR 14, %noreg, implicit %d0 + +# CHECK: %{{[0-9]+}}:dpr = VFNMSD %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, 14, %noreg + +... diff --git a/test/CodeGen/MIR/ARM/PR32721_ifcvt_triangle_unanalyzable.mir b/test/CodeGen/ARM/PR32721_ifcvt_triangle_unanalyzable.mir similarity index 100% rename from test/CodeGen/MIR/ARM/PR32721_ifcvt_triangle_unanalyzable.mir rename to test/CodeGen/ARM/PR32721_ifcvt_triangle_unanalyzable.mir diff --git a/test/CodeGen/ARM/PR35379.ll b/test/CodeGen/ARM/PR35379.ll new file mode 100644 index 000000000000..b99ca40e29ef --- /dev/null +++ b/test/CodeGen/ARM/PR35379.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=armv7a-eabi < %s | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=armv6m-eabi < %s | FileCheck %s --check-prefix=CHECK-THM + +; Function Attrs: minsize optsize +declare void @g(i32*) local_unnamed_addr #0 + +; Function Attrs: minsize optsize +define void @f() local_unnamed_addr #0 { +entry: + %i = alloca i32, align 4 + %0 = bitcast i32* %i to i8* + store i32 1, i32* %i, align 4 + call void @g(i32* nonnull %i) + ret void +} + +; Check unwind info does not mention the registers used for padding, and +; the amount of stack adjustment is the same as in the actual +; instructions. + +; CHECK-ARM: .save {r11, lr} +; CHECK-ARM-NEXT: .pad #8 +; CHECK-ARM-NEXT: push {r9, r10, r11, lr} +; CHECK-ARM: pop {r2, r3, r11, pc} + +; CHECK-THM: .save {r7, lr} +; CHECK-THM-NEXT: .pad #8 +; CHECK-THM-NEXT: push {r5, r6, r7, lr} +; CHECK-THM: pop {r2, r3, r7, pc} + + +define void @f1() local_unnamed_addr #1 { +entry: + %i = alloca i32, align 4 + %0 = bitcast i32* %i to i8* + store i32 1, i32* %i, align 4 + call void @g(i32* nonnull %i) + ret void +} + +; Check that unwind info is the same whether or not using -Os (minsize attr) + +; CHECK-ARM: .save {r11, lr} +; CHECK-ARM-NEXT: push {r11, lr} +; CHECK-ARM-NEXT: .pad #8 + +; CHECK-THM: .save {r7, lr} +; CHECK-THM-NEXT: push {r7, lr} +; CHECK-THM-NEXT: .pad #8 + +attributes #0 = { minsize optsize } +attributes #1 = { optsize } diff --git a/test/CodeGen/ARM/Windows/dbzchk.ll b/test/CodeGen/ARM/Windows/dbzchk.ll index aea37992de4e..18e6e5280579 100644 --- a/test/CodeGen/ARM/Windows/dbzchk.ll +++ b/test/CodeGen/ARM/Windows/dbzchk.ll @@ -32,13 +32,13 @@ return: ret i32 %2 } -; CHECK-DIV-DAG: BB#0 -; CHECK-DIV-DAG: Successors according to CFG: BB#1({{.*}}) BB#2 -; CHECK-DIV-DAG: BB#1 -; CHECK-DIV-DAG: Successors according to CFG: BB#3 -; CHECK-DIV-DAG: BB#2 -; CHECK-DIV-DAG: Successors according to CFG: BB#3 -; CHECK-DIV-DAG: BB#3 +; CHECK-DIV-DAG: %bb.0 +; CHECK-DIV-DAG: Successors according to CFG: %bb.1({{.*}}) %bb.2 +; CHECK-DIV-DAG: %bb.1 +; CHECK-DIV-DAG: Successors according to CFG: %bb.3 +; CHECK-DIV-DAG: %bb.2 +; CHECK-DIV-DAG: Successors according to CFG: %bb.3 +; CHECK-DIV-DAG: %bb.3 ; RUN: llc -mtriple thumbv7--windows-itanium -print-machineinstrs=expand-isel-pseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s -check-prefix CHECK-MOD @@ -66,13 +66,13 @@ return: ret i32 %retval.0 } -; CHECK-MOD-DAG: BB#0 -; CHECK-MOD-DAG: Successors according to CFG: BB#2({{.*}}) BB#1 -; CHECK-MOD-DAG: BB#1 -; CHECK-MOD-DAG: Successors according to CFG: BB#3 -; CHECK-MOD-DAG: BB#3 -; CHECK-MOD-DAG: Successors according to CFG: BB#2 -; CHECK-MOD-DAG: BB#2 +; CHECK-MOD-DAG: %bb.0 +; CHECK-MOD-DAG: Successors according to CFG: %bb.2({{.*}}) %bb.1 +; CHECK-MOD-DAG: %bb.1 +; CHECK-MOD-DAG: Successors according to CFG: %bb.3 +; CHECK-MOD-DAG: %bb.3 +; CHECK-MOD-DAG: Successors according to CFG: %bb.2 +; CHECK-MOD-DAG: %bb.2 ; RUN: llc -mtriple thumbv7--windows-itanium -print-machineinstrs=expand-isel-pseudos -verify-machineinstrs -filetype asm -o /dev/null %s 2>&1 | FileCheck %s -check-prefix CHECK-CFG ; RUN: llc -mtriple thumbv7--windows-itanium -verify-machineinstrs -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-CFG-ASM @@ -111,23 +111,23 @@ if.end: attributes #0 = { optsize } -; CHECK-CFG-DAG: BB#0 -; CHECK-CFG-DAG: t2Bcc -; CHECK-CFG-DAG: t2B +; CHECK-CFG-DAG: %bb.0 +; CHECK-CFG-DAG: t2Bcc %bb.2 +; CHECK-CFG-DAG: t2B %bb.1 -; CHECK-CFG-DAG: BB#1 -; CHECK-CFG-DAG: t2B +; CHECK-CFG-DAG: %bb.1 +; CHECK-CFG-DAG: t2B %bb.3 -; CHECK-CFG-DAG: BB#2 -; CHECK-CFG-DAG: tCMPi8 %vreg{{[0-9]}}, 0 -; CHECK-CFG-DAG: t2Bcc +; CHECK-CFG-DAG: %bb.2 +; CHECK-CFG-DAG: tCMPi8 %{{[0-9]}}, 0 +; CHECK-CFG-DAG: t2Bcc %bb.5 -; CHECK-CFG-DAG: BB#4 +; CHECK-CFG-DAG: %bb.4 -; CHECK-CFG-DAG: BB#3 +; CHECK-CFG-DAG: %bb.3 ; CHECK-CFG-DAG: tBX_RET -; CHECK-CFG-DAG: BB#5 +; CHECK-CFG-DAG: %bb.5 ; CHECK-CFG-DAG: t__brkdiv0 ; CHECK-CFG-ASM-LABEL: h: diff --git a/test/CodeGen/ARM/Windows/dllexport.ll b/test/CodeGen/ARM/Windows/dllexport.ll index 27496208862e..4f2e21baeb90 100644 --- a/test/CodeGen/ARM/Windows/dllexport.ll +++ b/test/CodeGen/ARM/Windows/dllexport.ll @@ -41,35 +41,34 @@ define weak_odr dllexport void @l() { ; CHECK: .section .drectve ; CHECK-GNU-NOT: -export:f -; CHECK-GNU: -export:g -; CHECK-GNU-SAME: -export:h +; CHECK-GNU: .ascii " -export:g" +; CHECK-GNU: .ascii " -export:h" ; CHECK-GNU-NOT: -export:i -; CHECK-GNU-SAME: -export:j -; CHECK-GNU-SAME: -export:k -; CHECK-GNU-SAME: -export:l -; CHECK-GNU-SAME: -export:m,data -; CHECK-GNU-SAME: -export:n,data -; CHECK-GNU-SAME: -export:o,data -; CHECK-GNU-SAME: -export:p,data -; CHECK-GNU-SAME: -export:q,data -; CHECK-GNU-SAME: -export:r -; CHECK-GNU-SAME: -export:s -; CHECK-GNU-SAME: -export:t -; CHECK-GNU-SAME: -export:u +; CHECK-GNU: .ascii " -export:j" +; CHECK-GNU: .ascii " -export:k" +; CHECK-GNU: .ascii " -export:l" +; CHECK-GNU: .ascii " -export:m,data" +; CHECK-GNU: .ascii " -export:n,data" +; CHECK-GNU: .ascii " -export:o,data" +; CHECK-GNU: .ascii " -export:p,data" +; CHECK-GNU: .ascii " -export:q,data" +; CHECK-GNU: .ascii " -export:r" +; CHECK-GNU: .ascii " -export:s" +; CHECK-GNU: .ascii " -export:t" +; CHECK-GNU: .ascii " -export:u" ; CHECK-MSVC-NOT: /EXPORT:f -; CHECK-MSVC: /EXPORT:g -; CHECK-MSVC-SAME: /EXPORT:h +; CHECK-MSVC: .ascii " /EXPORT:g" +; CHECK-MSVC: .ascii " /EXPORT:h" ; CHECK-MSVC-NOT: /EXPORT:i -; CHECK-MSVC-SAME: /EXPORT:j -; CHECK-MSVC-SAME: /EXPORT:k -; CHECK-MSVC-SAME: /EXPORT:l -; CHECK-MSVC-SAME: /EXPORT:m,DATA -; CHECK-MSVC-SAME: /EXPORT:n,DATA -; CHECK-MSVC-SAME: /EXPORT:o,DATA -; CHECK-MSVC-SAME: /EXPORT:p,DATA -; CHECK-MSVC-SAME: /EXPORT:q,DATA -; CHECK-MSVC-SAME: /EXPORT:r -; CHECK-MSVC-SAME: /EXPORT:s -; CHECK-MSVC-SAME: /EXPORT:t -; CHECK-MSVC-SAME: /EXPORT:u - +; CHECK-MSVC: .ascii " /EXPORT:j" +; CHECK-MSVC: .ascii " /EXPORT:k" +; CHECK-MSVC: .ascii " /EXPORT:l" +; CHECK-MSVC: .ascii " /EXPORT:m,DATA" +; CHECK-MSVC: .ascii " /EXPORT:n,DATA" +; CHECK-MSVC: .ascii " /EXPORT:o,DATA" +; CHECK-MSVC: .ascii " /EXPORT:p,DATA" +; CHECK-MSVC: .ascii " /EXPORT:q,DATA" +; CHECK-MSVC: .ascii " /EXPORT:r" +; CHECK-MSVC: .ascii " /EXPORT:s" +; CHECK-MSVC: .ascii " /EXPORT:t" +; CHECK-MSVC: .ascii " /EXPORT:u" diff --git a/test/CodeGen/ARM/Windows/memset.ll b/test/CodeGen/ARM/Windows/memset.ll index 500e25e259c6..c9b22f47a152 100644 --- a/test/CodeGen/ARM/Windows/memset.ll +++ b/test/CodeGen/ARM/Windows/memset.ll @@ -2,11 +2,11 @@ @source = common global [512 x i8] zeroinitializer, align 4 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @function() { entry: - call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false) unreachable } diff --git a/test/CodeGen/ARM/Windows/no-aeabi.ll b/test/CodeGen/ARM/Windows/no-aeabi.ll index a4103b0a676e..a5f7fc8daf6e 100644 --- a/test/CodeGen/ARM/Windows/no-aeabi.ll +++ b/test/CodeGen/ARM/Windows/no-aeabi.ll @@ -1,14 +1,14 @@ ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -verify-machineinstrs -o - %s | FileCheck %s -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind @source = common global [512 x i8] zeroinitializer, align 4 @target = common global [512 x i8] zeroinitializer, align 4 define void @move() nounwind { entry: - call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } @@ -16,7 +16,7 @@ entry: define void @copy() nounwind { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } diff --git a/test/CodeGen/ARM/Windows/vla-cpsr.ll b/test/CodeGen/ARM/Windows/vla-cpsr.ll index de0f0b68a4d2..0ec20c8b5490 100644 --- a/test/CodeGen/ARM/Windows/vla-cpsr.ll +++ b/test/CodeGen/ARM/Windows/vla-cpsr.ll @@ -9,5 +9,5 @@ entry: ret void } -; CHECK: tBL pred:14, pred:%noreg, , %LR, %SP, %R4, %R4, %R12, %CPSR +; CHECK: tBL 14, %noreg, &__chkstk, implicit-def %lr, implicit %sp, implicit killed %r4, implicit-def %r4, implicit-def dead %r12, implicit-def dead %cpsr diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll index 5e5ca4b873f3..625c40eb4162 100644 --- a/test/CodeGen/ARM/a15-SD-dep.ll +++ b/test/CodeGen/ARM/a15-SD-dep.ll @@ -114,4 +114,4 @@ sw.bb1: ; preds = %entry, %sw.bb sw.epilog: ; preds = %entry, %sw.bb1 ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/addsubcarry-promotion.ll b/test/CodeGen/ARM/addsubcarry-promotion.ll new file mode 100644 index 000000000000..8b99b2ada7c3 --- /dev/null +++ b/test/CodeGen/ARM/addsubcarry-promotion.ll @@ -0,0 +1,60 @@ +; RUN: llc -O2 -mtriple armv7a < %s | FileCheck --check-prefix=ARM %s + +; RUN: llc -O2 -mtriple thumbv6m < %s | FileCheck --check-prefix=THUMB1 %s +; RUN: llc -O2 -mtriple thumbv8m.base < %s | FileCheck --check-prefix=THUMB1 %s + +; RUN: llc -O2 -mtriple thumbv7a < %s | FileCheck --check-prefix=THUMB %s +; RUN: llc -O2 -mtriple thumbv8m.main < %s | FileCheck --check-prefix=THUMB %s + +define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { +entry: + +; ARM: rsb r2, r2, #1 +; ARM: adds r0, r1, r0 +; ARM: movw r1, #65535 +; ARM: sxth r2, r2 +; ARM: adc r0, r2, #0 +; ARM: tst r0, r1 +; ARM: bxeq lr +; ARM: .LBB0_1: +; ARM: b .LBB0_1 + +; THUMB1: movs r3, #1 +; THUMB1: subs r2, r3, r2 +; THUMB1: sxth r2, r2 +; THUMB1: movs r3, #0 +; THUMB1: adds r0, r1, r0 +; THUMB1: adcs r3, r2 +; THUMB1: lsls r0, r3, #16 +; THUMB1: beq .LBB0_2 +; THUMB1: .LBB0_1: +; THUMB1: b .LBB0_1 + +; THUMB: rsb.w r2, r2, #1 +; THUMB: adds r0, r0, r1 +; THUMB: sxth r2, r2 +; THUMB: adc r0, r2, #0 +; THUMB: lsls r0, r0, #16 +; THUMB: it eq +; THUMB: bxeq lr +; THUMB: .LBB0_1: +; THUMB: b .LBB0_1 + + %add = add i32 %b, %a + %cmp = icmp ult i32 %add, %b + %conv = zext i1 %cmp to i32 + %sub = sub i32 1, %c + %add1 = add i32 %sub, %conv + %conv2 = trunc i32 %add1 to i16 + %tobool = icmp eq i16 %conv2, 0 + br i1 %tobool, label %if.end, label %for.cond.preheader + +for.cond.preheader: ; preds = %entry + br label %for.cond + +for.cond: ; preds = %for.cond.preheader, %for.cond + br label %for.cond + +if.end: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/and-load-combine.ll b/test/CodeGen/ARM/and-load-combine.ll new file mode 100644 index 000000000000..69b00ed4853a --- /dev/null +++ b/test/CodeGen/ARM/and-load-combine.ll @@ -0,0 +1,1065 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s --check-prefix=ARM +; RUN: llc -mtriple=armv7eb %s -o - | FileCheck %s --check-prefix=ARMEB +; RUN: llc -mtriple=armv6m %s -o - | FileCheck %s --check-prefix=THUMB1 +; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2 + +define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { +; ARM-LABEL: cmp_xor8_short_short: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_xor8_short_short: +; ARMEB: ldrb r2, [r0, #1] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_xor8_short_short: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: eors r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB0_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB0_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_xor8_short_short: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %1 = load i16, i16* %b, align 2 + %xor2 = xor i16 %1, %0 + %2 = and i16 %xor2, 255 + %cmp = icmp eq i16 %2, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_xor8_short_int: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_xor8_short_int: +; ARMEB: ldrb r2, [r0, #1] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_xor8_short_int: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: eors r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB1_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB1_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_xor8_short_int: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %conv = zext i16 %0 to i32 + %1 = load i32, i32* %b, align 4 + %xor = xor i32 %1, %conv + %and = and i32 %xor, 255 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_xor8_int_int: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_xor8_int_int: +; ARMEB: ldrb r2, [r0, #3] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_xor8_int_int: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: eors r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB2_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB2_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_xor8_int_int: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %xor = xor i32 %1, %0 + %and = and i32 %xor, 255 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_xor16: +; ARM: ldrh r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: teq r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_xor16: +; ARMEB: ldrh r2, [r0, #2] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: teq r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_xor16: +; THUMB1: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1-NEXT: eors r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB3_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB3_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_xor16: +; THUMB2: ldrh r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %xor = xor i32 %1, %0 + %and = and i32 %xor, 65535 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { +; ARM-LABEL: cmp_or8_short_short: +; ARM: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_or8_short_short: +; ARMEB: ldrb r0, [r0, #1] +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_or8_short_short: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: orrs r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB4_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB4_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_or8_short_short: +; THUMB2: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %1 = load i16, i16* %b, align 2 + %or2 = or i16 %1, %0 + %2 = and i16 %or2, 255 + %cmp = icmp eq i16 %2, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_or8_short_int: +; ARM: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_or8_short_int: +; ARMEB: ldrb r0, [r0, #1] +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_or8_short_int: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: orrs r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB5_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB5_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_or8_short_int: +; THUMB2: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %conv = zext i16 %0 to i32 + %1 = load i32, i32* %b, align 4 + %or = or i32 %1, %conv + %and = and i32 %or, 255 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_or8_int_int: +; ARM: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_or8_int_int: +; ARMEB: ldrb r0, [r0, #3] +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_or8_int_int: +; THUMB1: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: orrs r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB6_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB6_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_or8_int_int: +; THUMB2: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %or = or i32 %1, %0 + %and = and i32 %or, 255 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_or16: +; ARM: ldrh r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_or16: +; ARMEB: ldrh r0, [r0, #2] +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: orrs r0, r1, r0 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_or16: +; THUMB1: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1-NEXT: orrs r2, r0 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r2, #0 +; THUMB1-NEXT: beq .LBB7_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB7_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_or16: +; THUMB2: ldrh r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %or = or i32 %1, %0 + %and = and i32 %or, 65535 + %cmp = icmp eq i32 %and, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { +; ARM-LABEL: cmp_and8_short_short: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r2, r1 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_and8_short_short: +; ARMEB: ldrb r2, [r0, #1] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_and8_short_short: +; THUMB1: ldrb r2, [r1] +; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: beq .LBB8_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB8_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_and8_short_short: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r2, r1 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %1 = load i16, i16* %b, align 2 + %and3 = and i16 %0, 255 + %2 = and i16 %and3, %1 + %cmp = icmp eq i16 %2, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_and8_short_int: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_and8_short_int: +; ARMEB: ldrb r2, [r0, #1] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: tst r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_and8_short_int: +; THUMB1: ldrb r2, [r0] +; THUMB1-NEXT: ldrb r3, [r1] +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: beq .LBB9_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB9_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_and8_short_int: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %a, align 2 + %1 = load i32, i32* %b, align 4 + %2 = and i16 %0, 255 + %and = zext i16 %2 to i32 + %and1 = and i32 %1, %and + %cmp = icmp eq i32 %and1, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_and8_int_int: +; ARM: ldrb r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r2, r1 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_and8_int_int: +; ARMEB: ldrb r2, [r0, #3] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_and8_int_int: +; THUMB1: ldrb r2, [r1] +; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: beq .LBB10_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB10_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_and8_int_int: +; THUMB2: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r2, r1 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %and = and i32 %0, 255 + %and1 = and i32 %and, %1 + %cmp = icmp eq i32 %and1, 0 + ret i1 %cmp +} + +define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { +; ARM-LABEL: cmp_and16: +; ARM: ldrh r2, [r0] +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: tst r2, r1 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: cmp_and16: +; ARMEB: ldrh r2, [r0, #2] +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: tst r2, r1 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: cmp_and16: +; THUMB1: ldrh r2, [r1] +; THUMB1-NEXT: ldrh r3, [r0] +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: tst r3, r2 +; THUMB1-NEXT: beq .LBB11_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB11_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: cmp_and16: +; THUMB2: ldrh r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: tst r2, r1 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %and = and i32 %0, 65535 + %and1 = and i32 %and, %1 + %cmp = icmp eq i32 %and1, 0 + ret i1 %cmp +} + +define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y, i32 %z) { +; ARM-LABEL: add_and16: +; ARM: add r1, r1, r2 +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: uxth r1, r1 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: add_and16: +; ARMEB: add r1, r1, r2 +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: uxth r1, r1 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: add_and16: +; THUMB1: adds r1, r1, r2 +; THUMB1-NEXT: uxth r1, r1 +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: orrs r0, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: add_and16: +; THUMB2: add r1, r2 +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: bx lr +entry: + %x = load i32, i32* %a, align 4 + %add = add i32 %y, %z + %or = or i32 %x, %add + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i32 @test1(i32* %a, i32* %b, i32 %x, i32 %y) { +; ARM-LABEL: test1: +; ARM: mul r2, r2, r3 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: eor r0, r0, r1 +; ARM-NEXT: uxth r1, r2 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test1: +; ARMEB: mul r2, r2, r3 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: eor r0, r0, r1 +; ARMEB-NEXT: uxth r1, r2 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test1: +; THUMB1: ldrh r1, [r1] +; THUMB1-NEXT: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r1 +; THUMB1-NEXT: muls r2, r3, r2 +; THUMB1-NEXT: uxth r0, r2 +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop +; +; THUMB2-LABEL: test1: +; THUMB2: ldrh r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: mul r1, r2, r3 +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul i32 %x, %y + %xor = xor i32 %0, %1 + %or = or i32 %xor, %mul + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i32 @test2(i32* %a, i32* %b, i32 %x, i32 %y) { +; ARM-LABEL: test2: +; ARM: ldr r1, [r1] +; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: mul r1, r2, r1 +; ARM-NEXT: eor r0, r0, r3 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test2: +; ARMEB: ldr r1, [r1] +; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: mul r1, r2, r1 +; ARMEB-NEXT: eor r0, r0, r3 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: uxth r0, r0 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test2: +; THUMB1: ldr r1, [r1] +; THUMB1-NEXT: muls r1, r2, r1 +; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: eors r0, r3 +; THUMB1-NEXT: orrs r0, r1 +; THUMB1-NEXT: uxth r0, r0 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test2: +; THUMB2: ldr r1, [r1] +; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: muls r1, r2, r1 +; THUMB2-NEXT: eors r0, r3 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: uxth r0, r0 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul i32 %x, %1 + %xor = xor i32 %0, %y + %or = or i32 %xor, %mul + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i32 @test3(i32* %a, i32* %b, i32 %x, i16* %y) { +; ARM-LABEL: test3: +; ARM: ldr r0, [r0] +; ARM-NEXT: mul r1, r2, r0 +; ARM-NEXT: ldrh r2, [r3] +; ARM-NEXT: eor r0, r0, r2 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test3: +; ARMEB: ldr r0, [r0] +; ARMEB-NEXT: mul r1, r2, r0 +; ARMEB-NEXT: ldrh r2, [r3] +; ARMEB-NEXT: eor r0, r0, r2 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: uxth r0, r0 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test3: +; THUMB1: ldr r0, [r0] +; THUMB1-NEXT: muls r2, r0, r2 +; THUMB1-NEXT: ldrh r1, [r3] +; THUMB1-NEXT: eors r1, r0 +; THUMB1-NEXT: orrs r1, r2 +; THUMB1-NEXT: uxth r0, r1 +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test3: +; THUMB2: ldr r0, [r0] +; THUMB2-NEXT: mul r1, r2, r0 +; THUMB2-NEXT: ldrh r2, [r3] +; THUMB2-NEXT: eors r0, r2 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: uxth r0, r0 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i16, i16* %y, align 4 + %2 = zext i16 %1 to i32 + %mul = mul i32 %x, %0 + %xor = xor i32 %0, %2 + %or = or i32 %xor, %mul + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i32 @test4(i32* %a, i32* %b, i32 %x, i32 %y) { +; ARM-LABEL: test4: +; ARM: mul r2, r2, r3 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: eor r0, r0, r1 +; ARM-NEXT: uxth r1, r2 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test4: +; ARMEB: mul r2, r2, r3 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: eor r0, r0, r1 +; ARMEB-NEXT: uxth r1, r2 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test4: +; THUMB1: ldrh r1, [r1] +; THUMB1-NEXT: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r1 +; THUMB1-NEXT: muls r2, r3, r2 +; THUMB1-NEXT: uxth r0, r2 +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop +; +; THUMB2-LABEL: test4: +; THUMB2: ldrh r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: eors r0, r1 +; THUMB2-NEXT: mul r1, r2, r3 +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul i32 %x, %y + %xor = xor i32 %0, %1 + %or = or i32 %xor, %mul + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i32 @test5(i32* %a, i32* %b, i32 %x, i16 zeroext %y) { +; ARM-LABEL: test5: +; ARM: ldr r1, [r1] +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: mul r1, r2, r1 +; ARM-NEXT: eor r0, r0, r3 +; ARM-NEXT: uxth r1, r1 +; ARM-NEXT: orr r0, r0, r1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test5: +; ARMEB: ldr r1, [r1] +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: mul r1, r2, r1 +; ARMEB-NEXT: eor r0, r0, r3 +; ARMEB-NEXT: uxth r1, r1 +; ARMEB-NEXT: orr r0, r0, r1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test5: +; THUMB1: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r3 +; THUMB1-NEXT: ldr r0, [r1] +; THUMB1-NEXT: muls r0, r2, r0 +; THUMB1-NEXT: uxth r0, r0 +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop +; +; THUMB2-LABEL: test5: +; THUMB2: ldr r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: muls r1, r2, r1 +; THUMB2-NEXT: eors r0, r3 +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %mul = mul i32 %x, %1 + %ext = zext i16 %y to i32 + %xor = xor i32 %0, %ext + %or = or i32 %xor, %mul + %and = and i32 %or, 65535 + ret i32 %and +} + +define arm_aapcscc i1 @test6(i8* %x, i8 %y, i8 %z) { +; ARM-LABEL: test6: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: uxtb r2, r2 +; ARM-NEXT: and r1, r0, r1 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test6: +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r0, [r0] +; ARMEB-NEXT: uxtb r2, r2 +; ARMEB-NEXT: and r1, r0, r1 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: cmp r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test6: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1-NEXT: ands r3, r1 +; THUMB1-NEXT: uxtb r2, r2 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r3, r2 +; THUMB1-NEXT: beq .LBB18_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB18_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test6: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrb r0, [r0] +; THUMB2-NEXT: uxtb r2, r2 +; THUMB2-NEXT: ands r1, r0 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: cmp r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i8, i8* %x, align 4 + %1 = and i8 %0, %y + %2 = icmp eq i8 %1, %z + ret i1 %2 +} + +define arm_aapcscc i1 @test7(i16* %x, i16 %y, i8 %z) { +; ARM-LABEL: test7: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: uxtb r2, r2 +; ARM-NEXT: and r1, r0, r1 +; ARM-NEXT: mov r0, #0 +; ARM-NEXT: cmp r1, r2 +; ARM-NEXT: movweq r0, #1 +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test7: +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: uxtb r2, r2 +; ARMEB-NEXT: and r1, r0, r1 +; ARMEB-NEXT: mov r0, #0 +; ARMEB-NEXT: cmp r1, r2 +; ARMEB-NEXT: movweq r0, #1 +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test7: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r3, [r0] +; THUMB1-NEXT: ands r3, r1 +; THUMB1-NEXT: uxtb r2, r2 +; THUMB1-NEXT: movs r0, #1 +; THUMB1-NEXT: movs r1, #0 +; THUMB1-NEXT: cmp r3, r2 +; THUMB1-NEXT: beq .LBB19_2 +; THUMB1-NEXT: @ %bb.1: @ %entry +; THUMB1-NEXT: mov r0, r1 +; THUMB1-NEXT: .LBB19_2: @ %entry +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test7: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrb r0, [r0] +; THUMB2-NEXT: uxtb r2, r2 +; THUMB2-NEXT: ands r1, r0 +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: cmp r1, r2 +; THUMB2-NEXT: it eq +; THUMB2-NEXT: moveq r0, #1 +; THUMB2-NEXT: bx lr +entry: + %0 = load i16, i16* %x, align 4 + %1 = and i16 %0, %y + %2 = trunc i16 %1 to i8 + %3 = icmp eq i8 %2, %z + ret i1 %3 +} + +define arm_aapcscc void @test8(i32* nocapture %p) { +; ARM-LABEL: test8: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrb r1, [r0] +; ARM-NEXT: eor r1, r1, #255 +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test8: +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r1, [r0, #3] +; ARMEB-NEXT: eor r1, r1, #255 +; ARMEB-NEXT: str r1, [r0] +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test8: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r1, [r0] +; THUMB1-NEXT: movs r2, #255 +; THUMB1-NEXT: eors r2, r1 +; THUMB1-NEXT: str r2, [r0] +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test8: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrb r1, [r0] +; THUMB2-NEXT: eor r1, r1, #255 +; THUMB2-NEXT: str r1, [r0] +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %p, align 4 + %neg = and i32 %0, 255 + %and = xor i32 %neg, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +define arm_aapcscc void @test9(i32* nocapture %p) { +; ARM-LABEL: test9: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrb r1, [r0] +; ARM-NEXT: eor r1, r1, #255 +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test9: +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r1, [r0, #3] +; ARMEB-NEXT: eor r1, r1, #255 +; ARMEB-NEXT: str r1, [r0] +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test9: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r1, [r0] +; THUMB1-NEXT: movs r2, #255 +; THUMB1-NEXT: eors r2, r1 +; THUMB1-NEXT: str r2, [r0] +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test9: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrb r1, [r0] +; THUMB2-NEXT: eor r1, r1, #255 +; THUMB2-NEXT: str r1, [r0] +; THUMB2-NEXT: bx lr +entry: + %0 = load i32, i32* %p, align 4 + %neg = xor i32 %0, -1 + %and = and i32 %neg, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; ARM-LABEL: test10: +; ARM: @ %bb.0: @ %entry +; ARM-NEXT: ldrb r1, [r0] +; ARM-NEXT: eor r1, r1, #255 +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test10: +; ARMEB: @ %bb.0: @ %entry +; ARMEB-NEXT: ldrb r1, [r0, #3] +; ARMEB-NEXT: eor r1, r1, #255 +; ARMEB-NEXT: str r1, [r0] +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test10: +; THUMB1: @ %bb.0: @ %entry +; THUMB1-NEXT: ldrb r1, [r0] +; THUMB1-NEXT: movs r2, #255 +; THUMB1-NEXT: eors r2, r1 +; THUMB1-NEXT: str r2, [r0] +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test10: +; THUMB2: @ %bb.0: @ %entry +; THUMB2-NEXT: ldrb r1, [r0] +; THUMB2-NEXT: eor r1, r1, #255 +; THUMB2-NEXT: str r1, [r0] +; THUMB2-NEXT: bx lr +define arm_aapcscc void @test10(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %neg = and i32 %0, 255 + %and = xor i32 %neg, 255 + store i32 %and, i32* %p, align 4 + ret void +} + diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index a24808004ef1..c6ca6a624b11 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -142,27 +142,27 @@ return: ; preds = %bb2, %bb, %entry define i32 @test_tst_assessment(i32 %a, i32 %b) { ; ARM-LABEL: test_tst_assessment: -; ARM: @ BB#0: +; ARM: @ %bb.0: ; ARM-NEXT: and r0, r0, #1 ; ARM-NEXT: tst r1, #1 ; ARM-NEXT: subne r0, r0, #1 ; ARM-NEXT: mov pc, lr ; ; THUMB-LABEL: test_tst_assessment: -; THUMB: @ BB#0: +; THUMB: @ %bb.0: ; THUMB-NEXT: movs r2, r0 ; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: ands r0, r2 ; THUMB-NEXT: subs r2, r0, #1 ; THUMB-NEXT: lsls r1, r1, #31 ; THUMB-NEXT: beq .LBB2_2 -; THUMB-NEXT: @ BB#1: +; THUMB-NEXT: @ %bb.1: ; THUMB-NEXT: movs r0, r2 ; THUMB-NEXT: .LBB2_2: ; THUMB-NEXT: bx lr ; ; T2-LABEL: test_tst_assessment: -; T2: @ BB#0: +; T2: @ %bb.0: ; T2-NEXT: lsls r1, r1, #31 ; T2-NEXT: and r0, r0, #1 ; T2-NEXT: it ne @@ -170,7 +170,7 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) { ; T2-NEXT: bx lr ; ; V8-LABEL: test_tst_assessment: -; V8: @ BB#0: +; V8: @ %bb.0: ; V8-NEXT: and r0, r0, #1 ; V8-NEXT: lsls r1, r1, #31 ; V8-NEXT: it ne diff --git a/test/CodeGen/ARM/arm-eabi.ll b/test/CodeGen/ARM/arm-eabi.ll index 898055dd1092..c2f364ab92b2 100644 --- a/test/CodeGen/ARM/arm-eabi.ll +++ b/test/CodeGen/ARM/arm-eabi.ll @@ -39,7 +39,7 @@ define void @foo(i32* %t) { %4 = bitcast %struct.my_s* %3 to i8* ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %4, i8* inttoptr (i32 1 to i8*), i32 72, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %4, i8* align 4 inttoptr (i32 1 to i8*), i32 72, i1 false) ret void } @@ -50,22 +50,22 @@ entry: ; memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memset ; CHECK-EABI: mov r2, #1 ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/test/CodeGen/ARM/arm-storebytesmerge.ll b/test/CodeGen/ARM/arm-storebytesmerge.ll new file mode 100644 index 000000000000..edc25302f7c5 --- /dev/null +++ b/test/CodeGen/ARM/arm-storebytesmerge.ll @@ -0,0 +1,347 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7em-arm-none-eabi" + +; Function Attrs: nounwind +define arm_aapcs_vfpcc void @test(i8* %v50) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #35722 +; CHECK-NEXT: movt r1, #36236 +; CHECK-NEXT: str.w r1, [r0, #394] +; CHECK-NEXT: movw r1, #36750 +; CHECK-NEXT: movt r1, #37264 +; CHECK-NEXT: str.w r1, [r0, #398] +; CHECK-NEXT: movw r1, #37778 +; CHECK-NEXT: movt r1, #38292 +; CHECK-NEXT: str.w r1, [r0, #402] +; CHECK-NEXT: movw r1, #38806 +; CHECK-NEXT: movt r1, #39320 +; CHECK-NEXT: str.w r1, [r0, #406] +; CHECK-NEXT: movw r1, #39834 +; CHECK-NEXT: strh.w r1, [r0, #410] +; CHECK-NEXT: movw r1, #40348 +; CHECK-NEXT: movt r1, #40862 +; CHECK-NEXT: str.w r1, [r0, #412] +; CHECK-NEXT: movw r1, #41376 +; CHECK-NEXT: movt r1, #41890 +; CHECK-NEXT: str.w r1, [r0, #416] +; CHECK-NEXT: movw r1, #42404 +; CHECK-NEXT: movt r1, #42918 +; CHECK-NEXT: str.w r1, [r0, #420] +; CHECK-NEXT: movw r1, #43432 +; CHECK-NEXT: movt r1, #43946 +; CHECK-NEXT: str.w r1, [r0, #424] +; CHECK-NEXT: movw r1, #44460 +; CHECK-NEXT: movt r1, #44974 +; CHECK-NEXT: str.w r1, [r0, #428] +; CHECK-NEXT: movw r1, #45488 +; CHECK-NEXT: strh.w r1, [r0, #432] +; CHECK-NEXT: movw r1, #46002 +; CHECK-NEXT: movt r1, #46516 +; CHECK-NEXT: str.w r1, [r0, #434] +; CHECK-NEXT: movw r1, #47030 +; CHECK-NEXT: strh.w r1, [r0, #438] +; CHECK-NEXT: movw r1, #47544 +; CHECK-NEXT: movt r1, #48058 +; CHECK-NEXT: str.w r1, [r0, #440] +; CHECK-NEXT: movw r1, #48572 +; CHECK-NEXT: movt r1, #49086 +; CHECK-NEXT: str.w r1, [r0, #444] +; CHECK-NEXT: movw r1, #49600 +; CHECK-NEXT: strh.w r1, [r0, #448] +; CHECK-NEXT: movs r1, #194 +; CHECK-NEXT: strb.w r1, [r0, #450] +; CHECK-NEXT: movw r1, #50371 +; CHECK-NEXT: movt r1, #50885 +; CHECK-NEXT: str.w r1, [r0, #451] +; CHECK-NEXT: movw r1, #51399 +; CHECK-NEXT: movt r1, #51913 +; CHECK-NEXT: str.w r1, [r0, #455] +; CHECK-NEXT: movw r1, #52427 +; CHECK-NEXT: movt r1, #52941 +; CHECK-NEXT: str.w r1, [r0, #459] +; CHECK-NEXT: movw r1, #53455 +; CHECK-NEXT: movt r1, #53969 +; CHECK-NEXT: str.w r1, [r0, #463] +; CHECK-NEXT: movw r1, #54483 +; CHECK-NEXT: strh.w r1, [r0, #467] +; CHECK-NEXT: movw r1, #54997 +; CHECK-NEXT: movt r1, #55511 +; CHECK-NEXT: str.w r1, [r0, #469] +; CHECK-NEXT: movw r1, #56025 +; CHECK-NEXT: movt r1, #56539 +; CHECK-NEXT: str.w r1, [r0, #473] +; CHECK-NEXT: movw r1, #57053 +; CHECK-NEXT: movt r1, #57567 +; CHECK-NEXT: str.w r1, [r0, #477] +; CHECK-NEXT: movw r1, #58081 +; CHECK-NEXT: movt r1, #58595 +; CHECK-NEXT: str.w r1, [r0, #481] +; CHECK-NEXT: movw r1, #59109 +; CHECK-NEXT: movt r1, #59623 +; CHECK-NEXT: str.w r1, [r0, #485] +; CHECK-NEXT: movw r1, #60137 +; CHECK-NEXT: strh.w r1, [r0, #489] +; CHECK-NEXT: movw r1, #60651 +; CHECK-NEXT: movt r1, #61165 +; CHECK-NEXT: str.w r1, [r0, #491] +; CHECK-NEXT: movw r1, #61679 +; CHECK-NEXT: strh.w r1, [r0, #495] +; CHECK-NEXT: movw r1, #62193 +; CHECK-NEXT: movt r1, #62707 +; CHECK-NEXT: str.w r1, [r0, #497] +; CHECK-NEXT: movw r1, #63221 +; CHECK-NEXT: movt r1, #63735 +; CHECK-NEXT: str.w r1, [r0, #501] +; CHECK-NEXT: movw r1, #64249 +; CHECK-NEXT: strh.w r1, [r0, #505] +; CHECK-NEXT: movs r1, #251 +; CHECK-NEXT: strb.w r1, [r0, #507] +; CHECK-NEXT: movw r1, #65020 +; CHECK-NEXT: movt r1, #65534 +; CHECK-NEXT: str.w r1, [r0, #508] +; CHECK-NEXT: bx lr + %v190 = getelementptr inbounds i8, i8* %v50, i32 394 + store i8 -118, i8* %v190, align 1 + %v191 = getelementptr inbounds i8, i8* %v50, i32 395 + store i8 -117, i8* %v191, align 1 + %v192 = getelementptr inbounds i8, i8* %v50, i32 396 + store i8 -116, i8* %v192, align 1 + %v193 = getelementptr inbounds i8, i8* %v50, i32 397 + store i8 -115, i8* %v193, align 1 + %v194 = getelementptr inbounds i8, i8* %v50, i32 398 + store i8 -114, i8* %v194, align 1 + %v195 = getelementptr inbounds i8, i8* %v50, i32 399 + store i8 -113, i8* %v195, align 1 + %v196 = getelementptr inbounds i8, i8* %v50, i32 400 + store i8 -112, i8* %v196, align 1 + %v197 = getelementptr inbounds i8, i8* %v50, i32 401 + store i8 -111, i8* %v197, align 1 + %v198 = getelementptr inbounds i8, i8* %v50, i32 402 + store i8 -110, i8* %v198, align 1 + %v199 = getelementptr inbounds i8, i8* %v50, i32 403 + store i8 -109, i8* %v199, align 1 + %v200 = getelementptr inbounds i8, i8* %v50, i32 404 + store i8 -108, i8* %v200, align 1 + %v201 = getelementptr inbounds i8, i8* %v50, i32 405 + store i8 -107, i8* %v201, align 1 + %v202 = getelementptr inbounds i8, i8* %v50, i32 406 + store i8 -106, i8* %v202, align 1 + %v203 = getelementptr inbounds i8, i8* %v50, i32 407 + store i8 -105, i8* %v203, align 1 + %v204 = getelementptr inbounds i8, i8* %v50, i32 408 + store i8 -104, i8* %v204, align 1 + %v205 = getelementptr inbounds i8, i8* %v50, i32 409 + store i8 -103, i8* %v205, align 1 + %v206 = getelementptr inbounds i8, i8* %v50, i32 410 + store i8 -102, i8* %v206, align 1 + %v207 = getelementptr inbounds i8, i8* %v50, i32 411 + store i8 -101, i8* %v207, align 1 + %v208 = getelementptr inbounds i8, i8* %v50, i32 412 + store i8 -100, i8* %v208, align 1 + %v209 = getelementptr inbounds i8, i8* %v50, i32 413 + store i8 -99, i8* %v209, align 1 + %v210 = getelementptr inbounds i8, i8* %v50, i32 414 + store i8 -98, i8* %v210, align 1 + %v211 = getelementptr inbounds i8, i8* %v50, i32 415 + store i8 -97, i8* %v211, align 1 + %v212 = getelementptr inbounds i8, i8* %v50, i32 416 + store i8 -96, i8* %v212, align 1 + %v213 = getelementptr inbounds i8, i8* %v50, i32 417 + store i8 -95, i8* %v213, align 1 + %v214 = getelementptr inbounds i8, i8* %v50, i32 418 + store i8 -94, i8* %v214, align 1 + %v215 = getelementptr inbounds i8, i8* %v50, i32 419 + store i8 -93, i8* %v215, align 1 + %v216 = getelementptr inbounds i8, i8* %v50, i32 420 + store i8 -92, i8* %v216, align 1 + %v217 = getelementptr inbounds i8, i8* %v50, i32 421 + store i8 -91, i8* %v217, align 1 + %v218 = getelementptr inbounds i8, i8* %v50, i32 422 + store i8 -90, i8* %v218, align 1 + %v219 = getelementptr inbounds i8, i8* %v50, i32 423 + store i8 -89, i8* %v219, align 1 + %v220 = getelementptr inbounds i8, i8* %v50, i32 424 + store i8 -88, i8* %v220, align 1 + %v221 = getelementptr inbounds i8, i8* %v50, i32 425 + store i8 -87, i8* %v221, align 1 + %v222 = getelementptr inbounds i8, i8* %v50, i32 426 + store i8 -86, i8* %v222, align 1 + %v223 = getelementptr inbounds i8, i8* %v50, i32 427 + store i8 -85, i8* %v223, align 1 + %v224 = getelementptr inbounds i8, i8* %v50, i32 428 + store i8 -84, i8* %v224, align 1 + %v225 = getelementptr inbounds i8, i8* %v50, i32 429 + store i8 -83, i8* %v225, align 1 + %v226 = getelementptr inbounds i8, i8* %v50, i32 430 + store i8 -82, i8* %v226, align 1 + %v227 = getelementptr inbounds i8, i8* %v50, i32 431 + store i8 -81, i8* %v227, align 1 + %v228 = getelementptr inbounds i8, i8* %v50, i32 432 + store i8 -80, i8* %v228, align 1 + %v229 = getelementptr inbounds i8, i8* %v50, i32 433 + store i8 -79, i8* %v229, align 1 + %v230 = getelementptr inbounds i8, i8* %v50, i32 434 + store i8 -78, i8* %v230, align 1 + %v231 = getelementptr inbounds i8, i8* %v50, i32 435 + store i8 -77, i8* %v231, align 1 + %v232 = getelementptr inbounds i8, i8* %v50, i32 436 + store i8 -76, i8* %v232, align 1 + %v233 = getelementptr inbounds i8, i8* %v50, i32 437 + store i8 -75, i8* %v233, align 1 + %v234 = getelementptr inbounds i8, i8* %v50, i32 438 + store i8 -74, i8* %v234, align 1 + %v235 = getelementptr inbounds i8, i8* %v50, i32 439 + store i8 -73, i8* %v235, align 1 + %v236 = getelementptr inbounds i8, i8* %v50, i32 440 + store i8 -72, i8* %v236, align 1 + %v237 = getelementptr inbounds i8, i8* %v50, i32 441 + store i8 -71, i8* %v237, align 1 + %v238 = getelementptr inbounds i8, i8* %v50, i32 442 + store i8 -70, i8* %v238, align 1 + %v239 = getelementptr inbounds i8, i8* %v50, i32 443 + store i8 -69, i8* %v239, align 1 + %v240 = getelementptr inbounds i8, i8* %v50, i32 444 + store i8 -68, i8* %v240, align 1 + %v241 = getelementptr inbounds i8, i8* %v50, i32 445 + store i8 -67, i8* %v241, align 1 + %v242 = getelementptr inbounds i8, i8* %v50, i32 446 + store i8 -66, i8* %v242, align 1 + %v243 = getelementptr inbounds i8, i8* %v50, i32 447 + store i8 -65, i8* %v243, align 1 + %v244 = getelementptr inbounds i8, i8* %v50, i32 448 + store i8 -64, i8* %v244, align 1 + %v245 = getelementptr inbounds i8, i8* %v50, i32 449 + store i8 -63, i8* %v245, align 1 + %v246 = getelementptr inbounds i8, i8* %v50, i32 450 + store i8 -62, i8* %v246, align 1 + %v247 = getelementptr inbounds i8, i8* %v50, i32 451 + store i8 -61, i8* %v247, align 1 + %v248 = getelementptr inbounds i8, i8* %v50, i32 452 + store i8 -60, i8* %v248, align 1 + %v249 = getelementptr inbounds i8, i8* %v50, i32 453 + store i8 -59, i8* %v249, align 1 + %v250 = getelementptr inbounds i8, i8* %v50, i32 454 + store i8 -58, i8* %v250, align 1 + %v251 = getelementptr inbounds i8, i8* %v50, i32 455 + store i8 -57, i8* %v251, align 1 + %v252 = getelementptr inbounds i8, i8* %v50, i32 456 + store i8 -56, i8* %v252, align 1 + %v253 = getelementptr inbounds i8, i8* %v50, i32 457 + store i8 -55, i8* %v253, align 1 + %v254 = getelementptr inbounds i8, i8* %v50, i32 458 + store i8 -54, i8* %v254, align 1 + %v255 = getelementptr inbounds i8, i8* %v50, i32 459 + store i8 -53, i8* %v255, align 1 + %v256 = getelementptr inbounds i8, i8* %v50, i32 460 + store i8 -52, i8* %v256, align 1 + %v257 = getelementptr inbounds i8, i8* %v50, i32 461 + store i8 -51, i8* %v257, align 1 + %v258 = getelementptr inbounds i8, i8* %v50, i32 462 + store i8 -50, i8* %v258, align 1 + %v259 = getelementptr inbounds i8, i8* %v50, i32 463 + store i8 -49, i8* %v259, align 1 + %v260 = getelementptr inbounds i8, i8* %v50, i32 464 + store i8 -48, i8* %v260, align 1 + %v261 = getelementptr inbounds i8, i8* %v50, i32 465 + store i8 -47, i8* %v261, align 1 + %v262 = getelementptr inbounds i8, i8* %v50, i32 466 + store i8 -46, i8* %v262, align 1 + %v263 = getelementptr inbounds i8, i8* %v50, i32 467 + store i8 -45, i8* %v263, align 1 + %v264 = getelementptr inbounds i8, i8* %v50, i32 468 + store i8 -44, i8* %v264, align 1 + %v265 = getelementptr inbounds i8, i8* %v50, i32 469 + store i8 -43, i8* %v265, align 1 + %v266 = getelementptr inbounds i8, i8* %v50, i32 470 + store i8 -42, i8* %v266, align 1 + %v267 = getelementptr inbounds i8, i8* %v50, i32 471 + store i8 -41, i8* %v267, align 1 + %v268 = getelementptr inbounds i8, i8* %v50, i32 472 + store i8 -40, i8* %v268, align 1 + %v269 = getelementptr inbounds i8, i8* %v50, i32 473 + store i8 -39, i8* %v269, align 1 + %v270 = getelementptr inbounds i8, i8* %v50, i32 474 + store i8 -38, i8* %v270, align 1 + %v271 = getelementptr inbounds i8, i8* %v50, i32 475 + store i8 -37, i8* %v271, align 1 + %v272 = getelementptr inbounds i8, i8* %v50, i32 476 + store i8 -36, i8* %v272, align 1 + %v273 = getelementptr inbounds i8, i8* %v50, i32 477 + store i8 -35, i8* %v273, align 1 + %v274 = getelementptr inbounds i8, i8* %v50, i32 478 + store i8 -34, i8* %v274, align 1 + %v275 = getelementptr inbounds i8, i8* %v50, i32 479 + store i8 -33, i8* %v275, align 1 + %v276 = getelementptr inbounds i8, i8* %v50, i32 480 + store i8 -32, i8* %v276, align 1 + %v277 = getelementptr inbounds i8, i8* %v50, i32 481 + store i8 -31, i8* %v277, align 1 + %v278 = getelementptr inbounds i8, i8* %v50, i32 482 + store i8 -30, i8* %v278, align 1 + %v279 = getelementptr inbounds i8, i8* %v50, i32 483 + store i8 -29, i8* %v279, align 1 + %v280 = getelementptr inbounds i8, i8* %v50, i32 484 + store i8 -28, i8* %v280, align 1 + %v281 = getelementptr inbounds i8, i8* %v50, i32 485 + store i8 -27, i8* %v281, align 1 + %v282 = getelementptr inbounds i8, i8* %v50, i32 486 + store i8 -26, i8* %v282, align 1 + %v283 = getelementptr inbounds i8, i8* %v50, i32 487 + store i8 -25, i8* %v283, align 1 + %v284 = getelementptr inbounds i8, i8* %v50, i32 488 + store i8 -24, i8* %v284, align 1 + %v285 = getelementptr inbounds i8, i8* %v50, i32 489 + store i8 -23, i8* %v285, align 1 + %v286 = getelementptr inbounds i8, i8* %v50, i32 490 + store i8 -22, i8* %v286, align 1 + %v287 = getelementptr inbounds i8, i8* %v50, i32 491 + store i8 -21, i8* %v287, align 1 + %v288 = getelementptr inbounds i8, i8* %v50, i32 492 + store i8 -20, i8* %v288, align 1 + %v289 = getelementptr inbounds i8, i8* %v50, i32 493 + store i8 -19, i8* %v289, align 1 + %v290 = getelementptr inbounds i8, i8* %v50, i32 494 + store i8 -18, i8* %v290, align 1 + %v291 = getelementptr inbounds i8, i8* %v50, i32 495 + store i8 -17, i8* %v291, align 1 + %v292 = getelementptr inbounds i8, i8* %v50, i32 496 + store i8 -16, i8* %v292, align 1 + %v293 = getelementptr inbounds i8, i8* %v50, i32 497 + store i8 -15, i8* %v293, align 1 + %v294 = getelementptr inbounds i8, i8* %v50, i32 498 + store i8 -14, i8* %v294, align 1 + %v295 = getelementptr inbounds i8, i8* %v50, i32 499 + store i8 -13, i8* %v295, align 1 + %v296 = getelementptr inbounds i8, i8* %v50, i32 500 + store i8 -12, i8* %v296, align 1 + %v297 = getelementptr inbounds i8, i8* %v50, i32 501 + store i8 -11, i8* %v297, align 1 + %v298 = getelementptr inbounds i8, i8* %v50, i32 502 + store i8 -10, i8* %v298, align 1 + %v299 = getelementptr inbounds i8, i8* %v50, i32 503 + store i8 -9, i8* %v299, align 1 + %v300 = getelementptr inbounds i8, i8* %v50, i32 504 + store i8 -8, i8* %v300, align 1 + %v301 = getelementptr inbounds i8, i8* %v50, i32 505 + store i8 -7, i8* %v301, align 1 + %v302 = getelementptr inbounds i8, i8* %v50, i32 506 + store i8 -6, i8* %v302, align 1 + %v303 = getelementptr inbounds i8, i8* %v50, i32 507 + store i8 -5, i8* %v303, align 1 + %v304 = getelementptr inbounds i8, i8* %v50, i32 508 + store i8 -4, i8* %v304, align 1 + %v305 = getelementptr inbounds i8, i8* %v50, i32 509 + store i8 -3, i8* %v305, align 1 + %v306 = getelementptr inbounds i8, i8* %v50, i32 510 + store i8 -2, i8* %v306, align 1 + %v307 = getelementptr inbounds i8, i8* %v50, i32 511 + store i8 -1, i8* %v307, align 1 + ret void + } + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m7" "target-features"="+d16,+dsp,+fp-armv8,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fp-only-sp,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll index a136e44fc196..fec116677085 100644 --- a/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -49,9 +49,10 @@ entry: ; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1 ; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1 ; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0 +; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]] ; CHECK-THUMBV6-NEXT: movs r0, #1 ; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0 -; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]] +; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]] ; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]] ; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]] ; CHECK-THUMBV6-NEXT: [[END]]: diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll index d1575ed12e4e..192ed8f8db7e 100644 --- a/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -1046,7 +1046,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: BB#2: +; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. ; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -1080,7 +1080,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw ; CHECK-ARM-NEXT: cmp r[[OLD]], r0 ; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: BB#2: +; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -1113,7 +1113,7 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; function there. ; CHECK-NEXT: cmp r[[OLD]], r0 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: BB#2: +; CHECK-NEXT: %bb.2: ; As above, r1 is a reasonable guess. ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -1152,7 +1152,7 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-NEXT: bne .LBB{{[0-9]+}}_4 -; CHECK-NEXT: BB#2: +; CHECK-NEXT: %bb.2: ; As above, r2, r3 is a reasonable guess. ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index 78d3ebf371a4..9373c5d44210 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift -simplifycfg-sink-common=false | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false ; dependency) when it isn't dependent on last CPSR defining instruction. ; rdar://8928208 diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll index ca9c9ab079db..00a7fcdee3ca 100644 --- a/test/CodeGen/ARM/bool-ext-inc.ll +++ b/test/CodeGen/ARM/bool-ext-inc.ll @@ -3,7 +3,7 @@ define i32 @sext_inc(i1 zeroext %x) { ; CHECK-LABEL: sext_inc: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: mov pc, lr %ext = sext i1 %x to i32 @@ -13,7 +13,7 @@ define i32 @sext_inc(i1 zeroext %x) { define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ; CHECK-LABEL: sext_inc_vec: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.i16 d16, #0x1 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: veor d16, d17, d16 @@ -30,7 +30,7 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) { define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmpgt_sext_inc_vec: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: mov r0, sp @@ -49,7 +49,7 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmpne_sext_inc_vec: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vld1.64 {d18, d19}, [r12] diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index bef7bbe01bff..a84bc9deecdd 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -191,6 +191,9 @@ ; ARMv7r ; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; ARMv7em +; RUN: llc < %s -mtriple=thumbv7em-none-linux-gnueabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=thumbv7em-none-linux-gnueabi -mcpu=cortex-m4 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN ; ARMv7m ; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN diff --git a/test/CodeGen/ARM/cmp1-peephole-thumb.mir b/test/CodeGen/ARM/cmp1-peephole-thumb.mir index 3e87ced0ee57..62675b4a77c8 100644 --- a/test/CodeGen/ARM/cmp1-peephole-thumb.mir +++ b/test/CodeGen/ARM/cmp1-peephole-thumb.mir @@ -49,9 +49,9 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false -# CHECK: tMOVi8 1, 14, _ -# CHECK: tMOVi8 0, 14, _ -# CHECK: tMUL %1, %0, 14, _ +# CHECK: tMOVi8 1, 14, %noreg +# CHECK: tMOVi8 0, 14, %noreg +# CHECK: tMUL %1, %0, 14, %noreg # CHECK-NOT: tCMPi8 body: | bb.0.entry: @@ -59,10 +59,10 @@ body: | %1 = COPY %r1 %0 = COPY %r0 - %2, %cpsr = tMUL %1, %0, 14, _ - %3, %cpsr = tMOVi8 1, 14, _ - %4, %cpsr = tMOVi8 0, 14, _ - tCMPi8 killed %2, 0, 14, _, implicit-def %cpsr + %2, %cpsr = tMUL %1, %0, 14, %noreg + %3, %cpsr = tMOVi8 1, 14, %noreg + %4, %cpsr = tMOVi8 0, 14, %noreg + tCMPi8 killed %2, 0, 14, %noreg, implicit-def %cpsr tBcc %bb.2.entry, 0, %cpsr bb.1.entry: @@ -70,6 +70,6 @@ body: | bb.2.entry: %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry %r0 = COPY %5 - tBX_RET 14, _, implicit %r0 + tBX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/test/CodeGen/ARM/cmp2-peephole-thumb.mir index a31086d2113e..12569b53fde1 100644 --- a/test/CodeGen/ARM/cmp2-peephole-thumb.mir +++ b/test/CodeGen/ARM/cmp2-peephole-thumb.mir @@ -80,24 +80,24 @@ body: | %1 = COPY %r1 %0 = COPY %r0 - %2, %cpsr = tMUL %0, %1, 14, _ - tSTRspi %2, %stack.1.mul, 0, 14, _ :: (store 4 into %ir.mul) - tCMPi8 %2, 0, 14, _, implicit-def %cpsr + %2, %cpsr = tMUL %0, %1, 14, %noreg + tSTRspi %2, %stack.1.mul, 0, 14, %noreg :: (store 4 into %ir.mul) + tCMPi8 %2, 0, 14, %noreg, implicit-def %cpsr tBcc %bb.2.if.end, 12, %cpsr - tB %bb.1.if.then, 14, _ + tB %bb.1.if.then, 14, %noreg bb.1.if.then: - %4, %cpsr = tMOVi8 42, 14, _ - tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) - tB %bb.3.return, 14, _ + %4, %cpsr = tMOVi8 42, 14, %noreg + tSTRspi killed %4, %stack.0.retval, 0, 14, %noreg :: (store 4 into %ir.retval) + tB %bb.3.return, 14, %noreg bb.2.if.end: - %3, %cpsr = tMOVi8 1, 14, _ - tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) + %3, %cpsr = tMOVi8 1, 14, %noreg + tSTRspi killed %3, %stack.0.retval, 0, 14, %noreg :: (store 4 into %ir.retval) bb.3.return: - %5 = tLDRspi %stack.0.retval, 0, 14, _ :: (dereferenceable load 4 from %ir.retval) + %5 = tLDRspi %stack.0.retval, 0, 14, %noreg :: (dereferenceable load 4 from %ir.retval) %r0 = COPY %5 - tBX_RET 14, _, implicit %r0 + tBX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll index f8ad2bbbbe0e..b49378d6702e 100644 --- a/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/test/CodeGen/ARM/cmpxchg-O0.ll @@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]] +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]] ; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 ; CHECK: dmb ish %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic @@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]] +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]] ; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1 ; CHECK: dmb ish %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll index 29d97fef0606..5ee07828526c 100644 --- a/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/test/CodeGen/ARM/cmpxchg-weak.ll @@ -5,16 +5,16 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-NEXT: BB#0: +; CHECK-NEXT: %bb.0: ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0] ; CHECK-NEXT: cmp [[LOADED]], r1 ; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB#1: +; CHECK-NEXT: %bb.1: ; CHECK-NEXT: dmb ish ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-NEXT: cmp [[SUCCESS]], #0 ; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB#2: +; CHECK-NEXT: %bb.2: ; CHECK-NEXT: str r3, [r0] ; CHECK-NEXT: bx lr ; CHECK-NEXT: [[LDFAILBB]]: @@ -37,11 +37,11 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %success = extractvalue { i32, i1 } %pair, 1 -; CHECK-NEXT: BB#0: +; CHECK-NEXT: %bb.0: ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1] ; CHECK-NEXT: cmp [[LOADED]], r2 ; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] -; CHECK-NEXT: BB#1: +; CHECK-NEXT: %bb.1: ; CHECK-NEXT: dmb ish ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] diff --git a/test/CodeGen/ARM/coff-no-dead-strip.ll b/test/CodeGen/ARM/coff-no-dead-strip.ll new file mode 100644 index 000000000000..def81644bd5d --- /dev/null +++ b/test/CodeGen/ARM/coff-no-dead-strip.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s + +@i = global i32 0 +@j = weak global i32 0 +@k = internal global i32 0 + +@llvm.used = appending global [3 x i8*] [i8* bitcast (i32* @i to i8*), i8* bitcast (i32* @j to i8*), i8* bitcast (i32* @k to i8*)] + +; CHECK: .section .drectve +; CHECK: .ascii " /INCLUDE:i" +; CHECK: .ascii " /INCLUDE:j" +; CHECK-NOT: .ascii " /INCLUDE:k" + diff --git a/test/CodeGen/ARM/constant-islands-cfg.mir b/test/CodeGen/ARM/constant-islands-cfg.mir index 66d854393b54..140ef727e432 100644 --- a/test/CodeGen/ARM/constant-islands-cfg.mir +++ b/test/CodeGen/ARM/constant-islands-cfg.mir @@ -48,17 +48,17 @@ fixedStack: body: | bb.0: liveins: %r0 - tCMPi8 killed %r0, 0, 14, _, implicit-def %cpsr + tCMPi8 killed %r0, 0, 14, %noreg, implicit-def %cpsr tBcc %bb.2, 1, killed %cpsr - tB %bb.3, 14, _ + tB %bb.3, 14, %noreg bb.1: dead %r0 = SPACE 256, undef %r0 bb.2: - tPOP_RET 14, _, def %pc + tPOP_RET 14, %noreg, def %pc bb.3: - tPOP_RET 14, _, def %pc + tPOP_RET 14, %noreg, def %pc ... diff --git a/test/CodeGen/ARM/constantpool-promote-ldrh.ll b/test/CodeGen/ARM/constantpool-promote-ldrh.ll index 59970495874b..0767d729a0ae 100644 --- a/test/CodeGen/ARM/constantpool-promote-ldrh.ll +++ b/test/CodeGen/ARM/constantpool-promote-ldrh.ll @@ -12,10 +12,10 @@ target triple = "thumbv6m-arm-linux-gnueabi" ; CHECK: ldrh r{{[0-9]+}}, {{\[}}[[base]]] define hidden i32 @fn1() #0 { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 undef, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret i32 undef } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) attributes #0 = { "target-features"="+strict-align" } diff --git a/test/CodeGen/ARM/constantpool-promote.ll b/test/CodeGen/ARM/constantpool-promote.ll index d5361f33a98b..ccd86257dd36 100644 --- a/test/CodeGen/ARM/constantpool-promote.ll +++ b/test/CodeGen/ARM/constantpool-promote.ll @@ -120,7 +120,7 @@ define void @fn1() "target-features"="+strict-align" { entry: %a = alloca [4 x i16], align 2 %0 = bitcast [4 x i16]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %0, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret void } @@ -128,7 +128,7 @@ define void @fn2() "target-features"="+strict-align" { entry: %a = alloca [8 x i8], align 2 %0 = bitcast [8 x i8]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i1 false) ret void } @@ -156,7 +156,7 @@ define void @pr32130() #0 { ; CHECK-V7: [[x]]: ; CHECK-V7: .asciz "s\000\000" define void @test10(i8* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i1 false) ret void } @@ -174,16 +174,16 @@ define void @test10(i8* %a) local_unnamed_addr #0 { ; CHECK-V7ARM: .short 3 ; CHECK-V7ARM: .short 4 define void @test11(i16* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i16.p0i16.i32(i16* %a, i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i32 2, i1 false) + call void @llvm.memmove.p0i16.p0i16.i32(i16* align 2 %a, i16* align 2 getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i1 false) ret void } declare void @b(i8*) #1 declare void @c(i16*) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) -declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) local_unnamed_addr -declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i32, i1) local_unnamed_addr +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) local_unnamed_addr +declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i1) local_unnamed_addr attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/ARM/cortex-a57-misched-alu.ll b/test/CodeGen/ARM/cortex-a57-misched-alu.ll index 2ced60fbf0d3..7d50a2023ed8 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-alu.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-alu.ll @@ -5,7 +5,7 @@ ; Check the latency for ALU shifted operand variants. ; ; CHECK: ********** MI Scheduling ********** -; CHECK: foo:BB#0 entry +; CHECK: foo:%bb.0 entry ; ALU, basic - 1 cyc I0/I1 ; CHECK: EORrr diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll index cfbef7bd4293..ad729c2ff2a3 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-basic.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll @@ -6,7 +6,7 @@ ; SDIV should be scheduled at the block's begin (20 cyc of independent M unit). ; ; CHECK: ********** MI Scheduling ********** -; CHECK: foo:BB#0 entry +; CHECK: foo:%bb.0 entry ; GENERIC: LDRi12 ; GENERIC: Latency : 1 @@ -30,7 +30,7 @@ ; A57_SCHED: SUBrr ; A57_SCHED: Latency : 1 -; CHECK: ** Final schedule for BB#0 *** +; CHECK: ** Final schedule for %bb.0 *** ; GENERIC: LDRi12 ; GENERIC: SDIV ; A57_SCHED: SDIV diff --git a/test/CodeGen/ARM/cortex-a57-misched-vadd.ll b/test/CodeGen/ARM/cortex-a57-misched-vadd.ll index eb8d1c85523f..cb7490856aba 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-vadd.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-vadd.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s -; CHECK-LABEL: addv_i32:BB#0 +; CHECK-LABEL: addv_i32:%bb.0 ; CHECK: SU(8): {{.*}} VADDv4i32 ; CHECK-NEXT: # preds left ; CHECK-NEXT: # succs left @@ -13,7 +13,7 @@ define <4 x i32> @addv_i32(<4 x i32>, <4 x i32>) { ret <4 x i32> %3 } -; CHECK-LABEL: addv_f32:BB#0 +; CHECK-LABEL: addv_f32:%bb.0 ; CHECK: SU(8): {{.*}} VADDfq ; CHECK-NEXT: # preds left ; CHECK-NEXT: # succs left diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll index 372b2e2f5dc9..a3e07ba17b9a 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -5,7 +5,7 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test1:BB#0 +; CHECK: Test1:%bb.0 ; CHECK: VMULS ; > VMULS common latency = 5 @@ -44,7 +44,7 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float ; ASIMD form define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test2:BB#0 +; CHECK: Test2:%bb.0 ; CHECK: VMULfd ; > VMULfd common latency = 5 @@ -82,7 +82,7 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test3:BB#0 +; CHECK: Test3:%bb.0 ; CHECK: VMULS ; > VMULS common latency = 5 @@ -121,7 +121,7 @@ define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float ; ASIMD form define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test4:BB#0 +; CHECK: Test4:%bb.0 ; CHECK: VMULfd ; > VMULfd common latency = 5 @@ -159,7 +159,7 @@ define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 define float @Test5(float %f1, float %f2, float %f3) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test5:BB#0 +; CHECK: Test5:%bb.0 ; CHECK-DEFAULT: VNMLS ; CHECK-FAST: VFNMS @@ -178,7 +178,7 @@ define float @Test5(float %f1, float %f2, float %f3) { define float @Test6(float %f1, float %f2, float %f3) { ; CHECK: ********** MI Scheduling ********** -; CHECK: Test6:BB#0 +; CHECK: Test6:%bb.0 ; CHECK-DEFAULT: VNMLA ; CHECK-FAST: VFNMA diff --git a/test/CodeGen/ARM/cortex-a57-misched-vsub.ll b/test/CodeGen/ARM/cortex-a57-misched-vsub.ll index c3c445d3f0e1..fe14c861f8e6 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-vsub.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-vsub.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s -; CHECK-LABEL: subv_i32:BB#0 +; CHECK-LABEL: subv_i32:%bb.0 ; CHECK: SU(8): {{.*}} VSUBv4i32 ; CHECK-NEXT: # preds left ; CHECK-NEXT: # succs left @@ -13,7 +13,7 @@ define <4 x i32> @subv_i32(<4 x i32>, <4 x i32>) { ret <4 x i32> %3 } -; CHECK-LABEL: subv_f32:BB#0 +; CHECK-LABEL: subv_f32:%bb.0 ; CHECK: SU(8): {{.*}} VSUBfq ; CHECK-NEXT: # preds left ; CHECK-NEXT: # succs left diff --git a/test/CodeGen/ARM/cortexr52-misched-basic.ll b/test/CodeGen/ARM/cortexr52-misched-basic.ll index 614157eb0e10..0edc6653a033 100644 --- a/test/CodeGen/ARM/cortexr52-misched-basic.ll +++ b/test/CodeGen/ARM/cortexr52-misched-basic.ll @@ -7,7 +7,7 @@ ; as div takes more cycles to compute than eor. ; ; CHECK: ********** MI Scheduling ********** -; CHECK: foo:BB#0 entry +; CHECK: foo:%bb.0 entry ; CHECK: EORrr ; GENERIC: Latency : 1 ; R52_SCHED: Latency : 3 @@ -17,7 +17,7 @@ ; CHECK: SDIV ; GENERIC: Latency : 0 ; R52_SCHED: Latency : 8 -; CHECK: ** Final schedule for BB#0 *** +; CHECK: ** Final schedule for %bb.0 *** ; GENERIC: EORrr ; GENERIC: SDIV ; R52_SCHED: SDIV diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll index f92af999be51..bfbab8a99336 100644 --- a/test/CodeGen/ARM/crash-O0.ll +++ b/test/CodeGen/ARM/crash-O0.ll @@ -12,7 +12,7 @@ entry: } @.str523 = private constant [256 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1] -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind ; This function uses the scavenger for an ADDri instruction. ; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit. @@ -21,8 +21,8 @@ entry: %letter = alloca i8 ; [#uses=0] %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1] %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %prodvers2, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) unreachable } diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll index 6a58bb871d35..5320a163c0b8 100644 --- a/test/CodeGen/ARM/crash-greedy.ll +++ b/test/CodeGen/ARM/crash-greedy.ll @@ -61,7 +61,7 @@ for.end: ; preds = %cond.end ; CHECK: insert_elem ; This test has a sub-register copy with a kill flag: -; %vreg6:ssub_3 = COPY %vreg6:ssub_2; QPR_VFP2:%vreg6 +; %6:ssub_3 = COPY killed %6:ssub_2; QPR_VFP2:%6 ; The rewriter must do something sensible with that, or the scavenger crashes. define void @insert_elem() nounwind { entry: diff --git a/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll b/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll index 8395674e880d..4f6055dee62a 100644 --- a/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll +++ b/test/CodeGen/ARM/crash-on-pow2-shufflevector.ll @@ -6,7 +6,7 @@ define i32 @foo(%struct.desc* %descs, i32 %num, i32 %cw) local_unnamed_addr #0 { ; CHECK-LABEL: foo: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: mov r1, #32 ; CHECK-NEXT: vld1.32 {d16, d17}, [r0], r1 ; CHECK-NEXT: vld1.32 {d18, d19}, [r0] diff --git a/test/CodeGen/ARM/dbg-range-extension.mir b/test/CodeGen/ARM/dbg-range-extension.mir index a79607705c1c..02105eabc6df 100644 --- a/test/CodeGen/ARM/dbg-range-extension.mir +++ b/test/CodeGen/ARM/dbg-range-extension.mir @@ -23,37 +23,37 @@ # CHECK: [[VAR_I:![0-9]+]] = !DILocalVariable(name: "i", # CHECK: bb.0.entry -# CHECK: DBG_VALUE debug-use %r0, debug-use _, [[VAR_A]] -# CHECK: DBG_VALUE debug-use [[REG_A:%r[0-9]+]], debug-use _, [[VAR_A]] -# CHECK: DBG_VALUE debug-use [[REG_B:%r[0-9]+]], debug-use _, [[VAR_B]] +# CHECK: DBG_VALUE debug-use %r0, debug-use %noreg, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_A:%r[0-9]+]], debug-use %noreg, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_B:%r[0-9]+]], debug-use %noreg, [[VAR_B]] # CHECK: bb.1.if.then -# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use _, [[VAR_B]] -# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use _, [[VAR_A]] -# CHECK: DBG_VALUE debug-use [[REG_C:%r[0-9]+]], debug-use _, [[VAR_C]] +# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use %noreg, [[VAR_B]] +# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use %noreg, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_C:%r[0-9]+]], debug-use %noreg, [[VAR_C]] # CHECK: DBG_VALUE 1, 0, [[VAR_I]] # CHECK: bb.2.for.body -# CHECK: DBG_VALUE debug-use [[REG_I:%r[0-9]+]], debug-use _, [[VAR_I]] -# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use _, [[VAR_C]] -# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use _, [[VAR_B]] -# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use _, [[VAR_A]] -# CHECK: DBG_VALUE debug-use [[REG_I]], debug-use _, [[VAR_I]] +# CHECK: DBG_VALUE debug-use [[REG_I:%r[0-9]+]], debug-use %noreg, [[VAR_I]] +# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use %noreg, [[VAR_C]] +# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use %noreg, [[VAR_B]] +# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use %noreg, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_I]], debug-use %noreg, [[VAR_I]] # CHECK: bb.3.for.cond -# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use _, [[VAR_C]] -# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use _, [[VAR_B]] -# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use _, [[VAR_A]] -# CHECK: DBG_VALUE debug-use [[REG_I]], debug-use _, [[VAR_I]] +# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use %noreg, [[VAR_C]] +# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use %noreg, [[VAR_B]] +# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use %noreg, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_I]], debug-use %noreg, [[VAR_I]] # CHECK: bb.4.for.cond.cleanup -# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use _, [[VAR_C]] -# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use _, [[VAR_B]] -# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use _, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_C]], debug-use %noreg, [[VAR_C]] +# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use %noreg, [[VAR_B]] +# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use %noreg, [[VAR_A]] # CHECK: bb.5.if.end -# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use _, [[VAR_B]] -# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use _, [[VAR_A]] +# CHECK: DBG_VALUE debug-use [[REG_B]], debug-use %noreg, [[VAR_B]] +# CHECK: DBG_VALUE debug-use [[REG_A]], debug-use %noreg, [[VAR_A]] --- | ; ModuleID = '/data/kwalker/work/OpenSource-llvm/llvm/test/CodeGen/ARM/dbg-range-extension.ll' source_filename = "/data/kwalker/work/OpenSource-llvm/llvm/test/CodeGen/ARM/dbg-range-extension.ll" @@ -211,7 +211,7 @@ body: | bb.0.entry: liveins: %r0, %r4, %r5, %r6, %r7, %r11, %lr - %sp = frame-setup STMDB_UPD %sp, 14, _, killed %r4, killed %r5, killed %r6, killed %r7, killed %r11, killed %lr + %sp = frame-setup STMDB_UPD %sp, 14, %noreg, killed %r4, killed %r5, killed %r6, killed %r7, killed %r11, killed %lr frame-setup CFI_INSTRUCTION def_cfa_offset 24 frame-setup CFI_INSTRUCTION offset %lr, -4 frame-setup CFI_INSTRUCTION offset %r11, -8 @@ -219,58 +219,58 @@ body: | frame-setup CFI_INSTRUCTION offset %r6, -16 frame-setup CFI_INSTRUCTION offset %r5, -20 frame-setup CFI_INSTRUCTION offset %r4, -24 - DBG_VALUE debug-use %r0, debug-use _, !13, !20, debug-location !21 - %r4 = MOVr killed %r0, 14, _, _ - DBG_VALUE debug-use %r4, debug-use _, !13, !20, debug-location !21 - %r0 = MOVi 10, 14, _, _, debug-location !22 - %r1 = MOVi 11, 14, _, _, debug-location !22 + DBG_VALUE debug-use %r0, debug-use %noreg, !13, !20, debug-location !21 + %r4 = MOVr killed %r0, 14, %noreg, %noreg + DBG_VALUE debug-use %r4, debug-use %noreg, !13, !20, debug-location !21 + %r0 = MOVi 10, 14, %noreg, _, debug-location !22 + %r1 = MOVi 11, 14, %noreg, _, debug-location !22 BL @func2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %r0, implicit killed %r1, implicit-def %sp, implicit-def %r0, debug-location !22 - %r5 = MOVr killed %r0, 14, _, _, debug-location !22 - DBG_VALUE debug-use %r5, debug-use _, !14, !20, debug-location !23 - CMPri %r4, 0, 14, _, implicit-def %cpsr, debug-location !25 + %r5 = MOVr killed %r0, 14, %noreg, _, debug-location !22 + DBG_VALUE debug-use %r5, debug-use %noreg, !14, !20, debug-location !23 + CMPri %r4, 0, 14, %noreg, implicit-def %cpsr, debug-location !25 Bcc %bb.5.if.end, 0, killed %cpsr bb.1.if.then: liveins: %r4, %r5 - %r0 = MOVi 12, 14, _, _, debug-location !26 - %r1 = MOVi 13, 14, _, _, debug-location !26 + %r0 = MOVi 12, 14, %noreg, _, debug-location !26 + %r1 = MOVi 13, 14, %noreg, _, debug-location !26 BL @func2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %r0, implicit killed %r1, implicit-def %sp, implicit-def %r0, debug-location !26 - %r6 = MOVr killed %r0, 14, _, _, debug-location !26 - DBG_VALUE debug-use %r6, debug-use _, !15, !20, debug-location !27 - %r7 = MOVi 1, 14, _, _ + %r6 = MOVr killed %r0, 14, %noreg, _, debug-location !26 + DBG_VALUE debug-use %r6, debug-use %noreg, !15, !20, debug-location !27 + %r7 = MOVi 1, 14, %noreg, %noreg DBG_VALUE 1, 0, !18, !20, debug-location !28 B %bb.3.for.cond bb.2.for.body: liveins: %r4, %r5, %r6, %r7 - %r1 = ADDrr %r5, %r7, 14, _, _, debug-location !36 - %r0 = MOVr %r7, 14, _, _, debug-location !36 + %r1 = ADDrr %r5, %r7, 14, %noreg, _, debug-location !36 + %r0 = MOVr %r7, 14, %noreg, _, debug-location !36 BL @func2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %r0, implicit killed %r1, implicit-def %sp, implicit-def dead %r0, debug-location !36 - %r7 = ADDri killed %r7, 1, 14, _, _, debug-location !38 - DBG_VALUE debug-use %r7, debug-use _, !18, !20, debug-location !28 + %r7 = ADDri killed %r7, 1, 14, %noreg, _, debug-location !38 + DBG_VALUE debug-use %r7, debug-use %noreg, !18, !20, debug-location !28 bb.3.for.cond: liveins: %r4, %r5, %r6, %r7 - DBG_VALUE debug-use %r7, debug-use _, !18, !20, debug-location !28 - CMPrr %r7, %r4, 14, _, implicit-def %cpsr, debug-location !33 + DBG_VALUE debug-use %r7, debug-use %noreg, !18, !20, debug-location !28 + CMPrr %r7, %r4, 14, %noreg, implicit-def %cpsr, debug-location !33 Bcc %bb.2.for.body, 11, killed %cpsr, debug-location !33 bb.4.for.cond.cleanup: liveins: %r4, %r5, %r6 - %r0 = MOVr %r5, 14, _, _, debug-location !34 - %r1 = MOVr killed %r6, 14, _, _, debug-location !34 + %r0 = MOVr %r5, 14, %noreg, _, debug-location !34 + %r1 = MOVr killed %r6, 14, %noreg, _, debug-location !34 BL @func2, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %r0, implicit killed %r1, implicit-def %sp, implicit-def dead %r0, debug-location !34 bb.5.if.end: liveins: %r4, %r5 - %r0 = MOVr killed %r5, 14, _, _, debug-location !43 - %r1 = MOVr killed %r4, 14, _, _, debug-location !43 - %sp = LDMIA_UPD %sp, 14, _, def %r4, def %r5, def %r6, def %r7, def %r11, def %lr, debug-location !43 + %r0 = MOVr killed %r5, 14, %noreg, _, debug-location !43 + %r1 = MOVr killed %r4, 14, %noreg, _, debug-location !43 + %sp = LDMIA_UPD %sp, 14, %noreg, def %r4, def %r5, def %r6, def %r7, def %r11, def %lr, debug-location !43 TAILJMPd @func2, implicit %sp, implicit %sp, implicit killed %r0, implicit killed %r1, debug-location !43 ... diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll index 026d45853d7d..b72dc5f1d74c 100644 --- a/test/CodeGen/ARM/debug-info-arg.ll +++ b/test/CodeGen/ARM/debug-info-arg.ll @@ -11,7 +11,7 @@ define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, metadata !13, metadata !DIExpression()), !dbg !21 tail call void @llvm.dbg.value(metadata i64 %x, metadata !14, metadata !DIExpression()), !dbg !22 tail call void @llvm.dbg.value(metadata i64 %y, metadata !17, metadata !DIExpression()), !dbg !23 -;CHECK: @DEBUG_VALUE: foo:y <- [DW_OP_plus_uconst 8] [%R7+0] +;CHECK: @DEBUG_VALUE: foo:y <- [DW_OP_plus_uconst 8] [%r7+0] tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, metadata !18, metadata !DIExpression()), !dbg !24 tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, metadata !19, metadata !DIExpression()), !dbg !25 %1 = icmp eq %struct.tag_s* %c, null, !dbg !26 diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll index 4ddb08a215bc..cc1a45f23da0 100644 --- a/test/CodeGen/ARM/debug-info-blocks.ll +++ b/test/CodeGen/ARM/debug-info-blocks.ll @@ -6,8 +6,8 @@ ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] -; CHECK-NEXT: 0x{{.*}} - 0x{{.*}}: {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 -; CHECK-NEXT: 0x{{.*}} - 0x{{.*}}: {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 ; CHECK-NEXT: DW_AT_name {{.*}} "mydata" ; Radar 9331779 @@ -35,7 +35,7 @@ declare i8* @objc_msgSend(i8*, i8*, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { %1 = alloca %0*, align 4 @@ -77,7 +77,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %24 = bitcast i8* %23 to %struct.CR*, !dbg !143 %25 = bitcast %struct.CR* %24 to i8*, !dbg !143 %26 = bitcast %struct.CR* %data to i8*, !dbg !143 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 16, i1 false), !dbg !143 %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144 %28 = load %3*, %3** %27, align 4, !dbg !144 %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 @@ -86,7 +86,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %32 = bitcast i8* %31 to %struct.CR*, !dbg !144 %33 = bitcast %struct.CR* %32 to i8*, !dbg !144 %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %33, i8* align 4 %34, i32 16, i1 false), !dbg !144 %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145 %36 = load %3*, %3** %35, align 4, !dbg !145 %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145 diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll index 336fc27caacc..15c153b720bd 100644 --- a/test/CodeGen/ARM/debug-info-branch-folding.ll +++ b/test/CodeGen/ARM/debug-info-branch-folding.ll @@ -5,8 +5,8 @@ target triple = "thumbv7-apple-macosx10.6.7" ;CHECK: vadd.f32 q4, q8, q8 ;CHECK-NEXT: LBB0_1 -;CHECK: @DEBUG_VALUE: x <- %Q4{{$}} -;CHECK-NEXT: @DEBUG_VALUE: y <- %Q4{{$}} +;CHECK: @DEBUG_VALUE: x <- %q4{{$}} +;CHECK-NEXT: @DEBUG_VALUE: y <- %q4{{$}} ;CHECK: beq LBB0_1 diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll index 658e730bddd2..44d30f871b11 100644 --- a/test/CodeGen/ARM/debug-info-qreg.ll +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -4,10 +4,12 @@ target triple = "thumbv7-apple-macosx10.6.7" ;CHECK: sub-register DW_OP_regx ;CHECK-NEXT: 256 +;CHECK-NEXT: @ ;CHECK-NEXT: DW_OP_piece ;CHECK-NEXT: 8 ;CHECK-NEXT: sub-register DW_OP_regx ;CHECK-NEXT: 257 +;CHECK-NEXT: @ ;CHECK-NEXT: DW_OP_piece ;CHECK-NEXT: 8 diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index 34bc938fab5b..02e6d8e47016 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -7,7 +7,7 @@ target triple = "thumbv7-apple-macosx10.6.7" ; of the size of the location description. ; CHECK: 0x00000000: -; CHECK-NEXT: 0x{{[0-9]*[a-f]*}} - 0x{{[0-9]*[a-f]*}}: DW_OP_regx D8 +; CHECK-NEXT: [0x{{[0-9]*[a-f]*}}, 0x{{[0-9]*[a-f]*}}): DW_OP_regx D8 define void @_Z3foov() optsize ssp !dbg !1 { entry: diff --git a/test/CodeGen/ARM/deprecated-asm.s b/test/CodeGen/ARM/deprecated-asm.s index 7318e6a68c5a..465da40c1c14 100644 --- a/test/CodeGen/ARM/deprecated-asm.s +++ b/test/CodeGen/ARM/deprecated-asm.s @@ -25,7 +25,7 @@ .type foo,%function foo: @ @foo .fnstart -@ BB#0: @ %entry +@ %bb.0: @ %entry mov r0, #0 bx lr stmia r4!, {r12-r14} diff --git a/test/CodeGen/ARM/deps-fix.ll b/test/CodeGen/ARM/deps-fix.ll index 527d2393345d..99ed85376a7f 100644 --- a/test/CodeGen/ARM/deps-fix.ll +++ b/test/CodeGen/ARM/deps-fix.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard -mtriple armv7-linux-gnueabi | FileCheck %s -;; This test checks that the ExecutionDepsFix pass performs the domain changes +;; This test checks that the ExecutionDomainFix pass performs the domain changes ;; even when some dependencies are propagated through implicit definitions. ; CHECK: fun_a diff --git a/test/CodeGen/ARM/dsp-mlal.ll b/test/CodeGen/ARM/dsp-mlal.ll new file mode 100644 index 000000000000..04968e475861 --- /dev/null +++ b/test/CodeGen/ARM/dsp-mlal.ll @@ -0,0 +1,171 @@ +; RUN: llc -mtriple=thumbv7m -mattr=+dsp %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7a %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7m -mattr=-dsp %s -o - | FileCheck --check-prefix=NODSP %s + +define hidden i32 @SMMULR_SMMLAR(i32 %a, i32 %b0, i32 %b1, i32 %Xn, i32 %Xn1) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMULR_SMMLAR: +; CHECK: ldr r0, [sp] +; CHECK-NEXT: smmulr r0, {{(r0, r2|r2, r0)}} +; CHECK-NEXT: smmlar r0, {{(r1, r3|r3, r1)}}, r0 +; NODSP-LABEL: SMMULR_SMMLAR: +; NODSP-NOT: smmulr +; NODSP-NOT: smmlar + %conv = sext i32 %b1 to i64 + %conv1 = sext i32 %Xn1 to i64 + %mul = mul nsw i64 %conv1, %conv + %add = add nsw i64 %mul, 2147483648 + %0 = and i64 %add, -4294967296 + %conv4 = sext i32 %b0 to i64 + %conv5 = sext i32 %Xn to i64 + %mul6 = mul nsw i64 %conv5, %conv4 + %add7 = add i64 %mul6, 2147483648 + %add8 = add i64 %add7, %0 + %1 = lshr i64 %add8, 32 + %conv10 = trunc i64 %1 to i32 + ret i32 %conv10 +} + +define hidden i32 @SMMULR(i32 %a, i32 %b) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMULR: +; CHECK: smmulr r0, {{(r0, r1|r1, r0)}} +; NODSP-LABEL: SMMULR: +; NODSP-NOT: smmulr + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv1, %conv + %add = add nsw i64 %mul, 2147483648 + %0 = lshr i64 %add, 32 + %conv2 = trunc i64 %0 to i32 + ret i32 %conv2 +} + +define hidden i32 @SMMUL(i32 %a, i32 %b) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMUL: +; CHECK: smmul r0, {{(r0, r1|r1, r0)}} +; NODSP-LABEL: SMMUL: +; NODSP-NOT: smmul + %conv = sext i32 %a to i64 + %conv1 = sext i32 %b to i64 + %mul = mul nsw i64 %conv1, %conv + %0 = lshr i64 %mul, 32 + %conv2 = trunc i64 %0 to i32 + ret i32 %conv2 +} + +define hidden i32 @SMMLSR(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMLSR: +; CHECK: smmlsr r0, {{(r1, r2|r2, r1)}}, r0 +; NODSP-LABEL: SMMLSR: +; NODSP-NOT: smmlsr + %conv6 = zext i32 %a to i64 + %shl = shl nuw i64 %conv6, 32 + %conv1 = sext i32 %b to i64 + %conv2 = sext i32 %c to i64 + %mul = mul nsw i64 %conv2, %conv1 + %sub = or i64 %shl, 2147483648 + %add = sub i64 %sub, %mul + %0 = lshr i64 %add, 32 + %conv3 = trunc i64 %0 to i32 + ret i32 %conv3 +} + +define hidden i32 @NOT_SMMLSR(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: NOT_SMMLSR: +; CHECK-NOT: smmlsr +; NODSP-LABEL: NOT_SMMLSR: +; NODSP-NOT: smmlsr + %conv = sext i32 %b to i64 + %conv1 = sext i32 %c to i64 + %mul = mul nsw i64 %conv1, %conv + %add = add nsw i64 %mul, 2147483648 + %0 = lshr i64 %add, 32 + %conv2 = trunc i64 %0 to i32 + %sub = sub nsw i32 %a, %conv2 + ret i32 %sub +} + +define hidden i32 @SMMLS(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMLS: +; CHECK: smmls r0, {{(r1, r2|r2, r1)}}, r0 +; NODSP-LABEL: SMMLS: +; NODSP-NOT: smmls + %conv5 = zext i32 %a to i64 + %shl = shl nuw i64 %conv5, 32 + %conv1 = sext i32 %b to i64 + %conv2 = sext i32 %c to i64 + %mul = mul nsw i64 %conv2, %conv1 + %sub = sub nsw i64 %shl, %mul + %0 = lshr i64 %sub, 32 + %conv3 = trunc i64 %0 to i32 + ret i32 %conv3 +} + +define hidden i32 @NOT_SMMLS(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: NOT_SMMLS: +; CHECK-NOT: smmls +; NODSP-LABEL: NOT_SMMLS: +; NODSP-NOT: smmls + %conv = sext i32 %b to i64 + %conv1 = sext i32 %c to i64 + %mul = mul nsw i64 %conv1, %conv + %0 = lshr i64 %mul, 32 + %conv2 = trunc i64 %0 to i32 + %sub = sub nsw i32 %a, %conv2 + ret i32 %sub +} + +define hidden i32 @SMMLA(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMLA: +; CHECK: smmla r0, {{(r1, r2|r2, r1)}}, r0 +; NODSP-LABEL: SMMLA: +; NODSP-NOT: smmla + %conv = sext i32 %b to i64 + %conv1 = sext i32 %c to i64 + %mul = mul nsw i64 %conv1, %conv + %0 = lshr i64 %mul, 32 + %conv2 = trunc i64 %0 to i32 + %add = add nsw i32 %conv2, %a + ret i32 %add +} + +define hidden i32 @SMMLAR(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: SMMLAR: +; CHECK: smmlar r0, {{(r1, r2|r2, r1)}}, r0 +; NODSP-LABEL: SMMLAR: +; NODSP-NOT: smmlar + %conv7 = zext i32 %a to i64 + %shl = shl nuw i64 %conv7, 32 + %conv1 = sext i32 %b to i64 + %conv2 = sext i32 %c to i64 + %mul = mul nsw i64 %conv2, %conv1 + %add = or i64 %shl, 2147483648 + %add3 = add i64 %add, %mul + %0 = lshr i64 %add3, 32 + %conv4 = trunc i64 %0 to i32 + ret i32 %conv4 +} + +define hidden i32 @NOT_SMMLA(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: +; CHECK-LABEL: NOT_SMMLA: +; CHECK-NOT: smmla +; NODSP-LABEL: NOT_SMMLA: +; NODSP-NOT: smmla + %conv = sext i32 %b to i64 + %conv1 = sext i32 %c to i64 + %mul = mul nsw i64 %conv1, %conv + %0 = lshr i64 %mul, 32 + %conv2 = trunc i64 %0 to i32 + %add = xor i32 %conv2, -2147483648 + %add3 = add i32 %add, %a + ret i32 %add3 +} diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll index 5b963fd64dea..b653acbd6a7f 100644 --- a/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/test/CodeGen/ARM/dyn-stackalloc.ll @@ -51,7 +51,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) { %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag) %tmp6.len = call i32 @strlen(i8* %tmp6) %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp6.indexed, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i1 false) %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents) call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6) ret void @@ -65,4 +65,4 @@ declare fastcc void @comment_add(%struct.comment*, i8*) declare i8* @strcpy(i8*, i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/CodeGen/ARM/emutls_generic.ll b/test/CodeGen/ARM/emutls_generic.ll index f5633dc23bcd..8c1c40dd6eac 100644 --- a/test/CodeGen/ARM/emutls_generic.ll +++ b/test/CodeGen/ARM/emutls_generic.ll @@ -6,6 +6,10 @@ ; RUN: | FileCheck -check-prefix=ARM_32 %s ; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -O3 \ ; RUN: | FileCheck -check-prefix=ARM_32 %s +; RUN: llc < %s -emulated-tls -mtriple=arm-apple-darwin -O3 \ +; RUN: | FileCheck -check-prefix=DARWIN %s +; RUN: llc < %s -emulated-tls -mtriple=thumbv7-windows-gnu -O3 \ +; RUN: | FileCheck -check-prefix=WIN %s ; Make sure that TLS symbols are emitted in expected order. @@ -61,3 +65,74 @@ entry: ; ARM_32-LABEL: __emutls_t.internal_y: ; ARM_32-NEXT: .long 9 ; ARM_32-NEXT: .long 0 + +; WIN-LABEL: get_external_x: +; WIN: movw r0, :lower16:__emutls_v.external_x +; WIN: movt r0, :upper16:__emutls_v.external_x +; WIN: bl __emutls_get_address +; WIN-LABEL: get_external_y: +; WIN: movw r0, :lower16:__emutls_v.external_y +; WIN: movt r0, :upper16:__emutls_v.external_y +; WIN: bl __emutls_get_address +; WIN-LABEL: get_internal_y: +; WIN: movw r0, :lower16:__emutls_v.internal_y +; WIN: movt r0, :upper16:__emutls_v.internal_y +; WIN: bl __emutls_get_address +; WIN-NOT: __emutls_t.external_x +; WIN-NOT: __emutls_v.external_x: +; WIN: .data{{$}} +; WIN: .globl __emutls_v.external_y +; WIN: .p2align 2 +; WIN-LABEL: __emutls_v.external_y: +; WIN-NEXT: .long 1 +; WIN-NEXT: .long 2 +; WIN-NEXT: .long 0 +; WIN-NEXT: .long __emutls_t.external_y +; WIN: .section .rdata, +; WIN-LABEL: __emutls_t.external_y: +; WIN-NEXT: .byte 7 +; WIN: .data{{$}} +; WIN-NOT: .globl +; WIN: .p2align 2 +; WIN-LABEL: __emutls_v.internal_y: +; WIN-NEXT: .long 8 +; WIN-NEXT: .long 16 +; WIN-NEXT: .long 0 +; WIN-NEXT: .long __emutls_t.internal_y +; WIN-LABEL: __emutls_t.internal_y: +; .quad 9 is equivalent to .long 9 .long 0 +; WIN-NEXT: .quad 9 + +; DARWIN-LABEL: _get_external_x: +; DARWIN: bl ___emutls_get_address +; DARWIN: .long L___emutls_v.external_x$non_lazy_ptr-(LPC0_0+8) +; DARWIN-LABEL: _get_external_y: +; DARWIN: bl ___emutls_get_address +; DARWIN: .long ___emutls_v.external_y-(LPC1_0+8) +; DARWIN-LABEL: _get_internal_y: +; DARWIN: bl ___emutls_get_address +; DARWIN: .long ___emutls_v.internal_y-(LPC2_0+8) +; DARWIN-NOT: ___emutls_t.external_x +; DARWIN-NOT: ___emutls_v.external_x: +; DARWIN: .section __DATA,__data +; DARWIN: .globl ___emutls_v.external_y +; DARWIN: .p2align 2 +; DARWIN-LABEL: ___emutls_v.external_y: +; DARWIN-NEXT: .long 1 +; DARWIN-NEXT: .long 2 +; DARWIN-NEXT: .long 0 +; DARWIN-NEXT: .long ___emutls_t.external_y +; DARWIN: .section __TEXT,__const +; DARWIN-LABEL: ___emutls_t.external_y: +; DARWIN-NEXT: .byte 7 +; DARWIN: .section __DATA,__data +; DARWIN-NOT: .globl +; DARWIN: .p2align 2 +; DARWIN-LABEL: ___emutls_v.internal_y: +; DARWIN-NEXT: .long 8 +; DARWIN-NEXT: .long 16 +; DARWIN-NEXT: .long 0 +; DARWIN-NEXT: .long ___emutls_t.internal_y +; DARWIN-LABEL: ___emutls_t.internal_y: +; DARWIN-NEXT: .long 9 +; DARWIN-NEXT: .long 0 diff --git a/test/CodeGen/ARM/expand-pseudos.mir b/test/CodeGen/ARM/expand-pseudos.mir index 1cc46bc0f55d..b35c2dce66da 100644 --- a/test/CodeGen/ARM/expand-pseudos.mir +++ b/test/CodeGen/ARM/expand-pseudos.mir @@ -25,11 +25,11 @@ body: | bb.0.entry: liveins: %r0 - %r1 = MOVi 2, 14, _, _ - CMPri killed %r0, 0, 14, _, implicit-def %cpsr + %r1 = MOVi 2, 14, %noreg, %noreg + CMPri killed %r0, 0, 14, %noreg, implicit-def %cpsr %r1 = MOVCCi16 killed %r1, 500, 0, killed %cpsr - %r0 = MOVr killed %r1, 14, _, _ - BX_RET 14, _, implicit %r0 + %r0 = MOVr killed %r1, 14, %noreg, %noreg + BX_RET 14, %noreg, implicit %r0 ... --- @@ -42,11 +42,11 @@ body: | bb.0.entry: liveins: %r0 - %r1 = MOVi 2, 14, _, _ - CMPri killed %r0, 0, 14, _, implicit-def %cpsr + %r1 = MOVi 2, 14, %noreg, %noreg + CMPri killed %r0, 0, 14, %noreg, implicit-def %cpsr %r1 = MOVCCi32imm killed %r1, 500500500, 0, killed %cpsr - %r0 = MOVr killed %r1, 14, _, _ - BX_RET 14, _, implicit %r0 + %r0 = MOVr killed %r1, 14, %noreg, %noreg + BX_RET 14, %noreg, implicit %r0 ... --- @@ -60,9 +60,9 @@ body: | bb.0.entry: liveins: %r0, %r1 - CMPri %r1, 500, 14, _, implicit-def %cpsr + CMPri %r1, 500, 14, %noreg, implicit-def %cpsr %r0 = MOVCCr killed %r0, killed %r1, 12, killed %cpsr - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... @@ -72,4 +72,4 @@ body: | # CHECK: %r1 = MOVi16 2068, 0, %cpsr, implicit killed %r1 # CHECK: %r1 = MOVTi16 %r1, 7637, 0, %cpsr # CHECK-LABEL: name: test3 -# CHECK: %r0 = MOVr killed %r1, 12, killed %cpsr, _, implicit killed %r0 +# CHECK: %r0 = MOVr killed %r1, 12, killed %cpsr, %noreg, implicit killed %r0 diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index 277461aa566b..8d9c27b6f22c 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -44,11 +44,11 @@ define void @t1() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i1 false) ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM-LABEL: t2: @@ -93,11 +93,11 @@ define void @t2() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t3() nounwind ssp { ; ARM-LABEL: t3: @@ -141,7 +141,7 @@ define void @t3() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -173,11 +173,11 @@ define void @t4() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t5() nounwind ssp { ; ARM-LABEL: t5: @@ -215,7 +215,7 @@ define void @t5() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -275,14 +275,14 @@ define void @t6() nounwind ssp { ; THUMB: ldrb r1, [r0, #25] ; THUMB: strb r1, [r0, #13] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } ; rdar://13202135 define void @t7() nounwind ssp { ; Just make sure this doesn't assert when we have an odd length and an alignment of 2. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i1 false) ret void } diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir index 4f3524bf7d11..59d981a436eb 100644 --- a/test/CodeGen/ARM/fpoffset_overflow.mir +++ b/test/CodeGen/ARM/fpoffset_overflow.mir @@ -3,10 +3,10 @@ # This should trigger an emergency spill in the register scavenger because the # frame offset into the large argument is too large. # CHECK-LABEL: name: func0 -# CHECK: t2STRi12 killed [[SPILLED:%r[0-9]+]], %sp, 0, 14, _ :: (store 4 into %stack.0) -# CHECK: [[SPILLED]] = t2ADDri killed %sp, 4096, 14, _, _ -# CHECK: %sp = t2LDRi12 killed [[SPILLED]], 40, 14, _ :: (load 4) -# CHECK: [[SPILLED]] = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0) +# CHECK: t2STRi12 killed [[SPILLED:%r[0-9]+]], %sp, 0, 14, %noreg :: (store 4 into %stack.0) +# CHECK: [[SPILLED]] = t2ADDri killed %sp, 4096, 14, %noreg, %noreg +# CHECK: %sp = t2LDRi12 killed [[SPILLED]], 40, 14, %noreg :: (load 4) +# CHECK: [[SPILLED]] = t2LDRi12 %sp, 0, 14, %noreg :: (load 4 from %stack.0) name: func0 tracksRegLiveness: true fixedStack: @@ -31,7 +31,7 @@ body: | %r12 = IMPLICIT_DEF %lr = IMPLICIT_DEF - %sp = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + %sp = t2LDRi12 %fixed-stack.0, 0, 14, %noreg :: (load 4) KILL %r0 KILL %r1 @@ -53,7 +53,7 @@ body: | # CHECK-LABEL: name: func1 # CHECK-NOT: t2STRi12 # CHECK-NOT: t2ADDri -# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, _ :: (load 4) +# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, %noreg :: (load 4) # CHECK-NOT: t2LDRi12 name: func1 tracksRegLiveness: true @@ -78,7 +78,7 @@ body: | %r12 = IMPLICIT_DEF %lr = IMPLICIT_DEF - %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + %r11 = t2LDRi12 %fixed-stack.0, 0, 14, %noreg :: (load 4) KILL %r0 KILL %r1 diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll index 03c977614320..f8d77afb983e 100644 --- a/test/CodeGen/ARM/global-merge-external.ll +++ b/test/CodeGen/ARM/global-merge-external.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefix=CHECK-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE -; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE -; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefixes=CHECK,CHECK-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefixes=CHECK,CHECK-NO-MERGE @x = global i32 0, align 4 @y = global i32 0, align 4 @@ -10,7 +10,7 @@ define void @f1(i32 %a1, i32 %a2) { ;CHECK: f1: -;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]] +;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.?LCPI[0-9]+_[0-9]]] ;CHECK: [[LABEL1]]: ;CHECK-MERGE: .long .L_MergedGlobals ;CHECK-NO-MERGE: .long {{_?x}} @@ -21,7 +21,7 @@ define void @f1(i32 %a1, i32 %a2) { define void @g1(i32 %a1, i32 %a2) { ;CHECK: g1: -;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]] +;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.?LCPI[0-9]+_[0-9]]] ;CHECK: [[LABEL2]]: ;CHECK-MERGE: .long .L_MergedGlobals ;CHECK-NO-MERGE: .long {{_?y}} diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll index 1c8142e5ddd5..b69f121d10ce 100644 --- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll +++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -21,8 +21,8 @@ entry: ; Afer if conversion, we have ; for.body -> for.cond.backedge (100%) ; -> cond.false.i (0%) -; CHECK: BB#1: derived from LLVM BB %for.body -; CHECK: Successors according to CFG: BB#2(0x80000000 / 0x80000000 = 100.00%) BB#4(0x00000001 / 0x80000000 = 0.00%) +; CHECK: %bb.1: derived from LLVM BB %for.body +; CHECK: Successors according to CFG: %bb.2(0x80000000 / 0x80000000 = 100.00%) %bb.4(0x00000001 / 0x80000000 = 0.00%) for.body: br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1 diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll index 5c39d63fda10..6f6f8bc1834a 100644 --- a/test/CodeGen/ARM/ifcvt-branch-weight.ll +++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll @@ -18,8 +18,8 @@ bb: %9 = icmp eq i32 %8, 0 br i1 %9, label %return, label %bb2 -; CHECK: BB#2: derived from LLVM BB %bb2 -; CHECK: Successors according to CFG: BB#4({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}50.00%) +; CHECK: %bb.2: derived from LLVM BB %bb2 +; CHECK: Successors according to CFG: %bb.4({{[0-9a-fx/= ]+}}50.00%) %bb.3({{[0-9a-fx/= ]+}}50.00%) bb2: %v10 = icmp eq i32 %3, 16 diff --git a/test/CodeGen/ARM/ifcvt-dead-def.ll b/test/CodeGen/ARM/ifcvt-dead-def.ll index 77a3f5c0961f..fedbcfb09ebd 100644 --- a/test/CodeGen/ARM/ifcvt-dead-def.ll +++ b/test/CodeGen/ARM/ifcvt-dead-def.ll @@ -8,7 +8,7 @@ target triple = "thumbv7-unknown-unknown" %struct.gs_color_s = type { i16, i16, i16, i16, i8, i8 } ; In this case, the if converter was cloning the return instruction so that we had -; r2 = ... +; r2 = ... ; return [pred] r2 ; ldr ; return diff --git a/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/test/CodeGen/ARM/ifcvt-iter-indbr.ll index 734962573061..ccc6ded49f13 100644 --- a/test/CodeGen/ARM/ifcvt-iter-indbr.ll +++ b/test/CodeGen/ARM/ifcvt-iter-indbr.ll @@ -30,10 +30,10 @@ declare i8* @bar(i32, i8*, i8*) ; CHECK-NEXT: [[FOOCALL]]: ; CHECK-NEXT: bl _foo ; -; CHECK-PROB: BB#0: -; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#3({{[0-9a-fx/= ]+}}25.00%) BB#5({{[0-9a-fx/= ]+}}25.00%) -; CHECK-PROB: BB#2: -; CHECK-PROB: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#5({{[0-9a-fx/= ]+}}50.00%) +; CHECK-PROB: %bb.0: +; CHECK-PROB: Successors according to CFG: %bb.1({{[0-9a-fx/= ]+}}50.00%) %bb.3({{[0-9a-fx/= ]+}}25.00%) %bb.5({{[0-9a-fx/= ]+}}25.00%) +; CHECK-PROB: %bb.2: +; CHECK-PROB: Successors according to CFG: %bb.3({{[0-9a-fx/= ]+}}50.00%) %bb.5({{[0-9a-fx/= ]+}}50.00%) define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) "no-frame-pointer-elim"="true" { entry: diff --git a/test/CodeGen/MIR/ARM/ifcvt_canFallThroughTo.mir b/test/CodeGen/ARM/ifcvt_canFallThroughTo.mir similarity index 100% rename from test/CodeGen/MIR/ARM/ifcvt_canFallThroughTo.mir rename to test/CodeGen/ARM/ifcvt_canFallThroughTo.mir diff --git a/test/CodeGen/MIR/ARM/ifcvt_diamond_unanalyzable.mir b/test/CodeGen/ARM/ifcvt_diamond_unanalyzable.mir similarity index 88% rename from test/CodeGen/MIR/ARM/ifcvt_diamond_unanalyzable.mir rename to test/CodeGen/ARM/ifcvt_diamond_unanalyzable.mir index a6e5521fd2cb..6b7ad20aa12f 100644 --- a/test/CodeGen/MIR/ARM/ifcvt_diamond_unanalyzable.mir +++ b/test/CodeGen/ARM/ifcvt_diamond_unanalyzable.mir @@ -26,5 +26,5 @@ body: | # CHECK: bb.0: # CHECK: %sp = tADDspi %sp, 2, 1, %cpsr # CHECK: %sp = tADDspi %sp, 1, 0, %cpsr, implicit %sp -# CHECK: %sp = tADDspi %sp, 3, 14, _ -# CHECK: BX_RET 14, _ +# CHECK: %sp = tADDspi %sp, 3, 14, %noreg +# CHECK: BX_RET 14, %noreg diff --git a/test/CodeGen/MIR/ARM/ifcvt_forked_diamond_unanalyzable.mir b/test/CodeGen/ARM/ifcvt_forked_diamond_unanalyzable.mir similarity index 86% rename from test/CodeGen/MIR/ARM/ifcvt_forked_diamond_unanalyzable.mir rename to test/CodeGen/ARM/ifcvt_forked_diamond_unanalyzable.mir index 652c333c523c..f5f09a8ec4a9 100644 --- a/test/CodeGen/MIR/ARM/ifcvt_forked_diamond_unanalyzable.mir +++ b/test/CodeGen/ARM/ifcvt_forked_diamond_unanalyzable.mir @@ -40,9 +40,9 @@ body: | # CHECK: Bcc %bb.2, 1, %cpsr # CHECK: bb.1: -# CHECK: %sp = tADDspi %sp, 4, 14, _ -# CHECK: BX_RET 14, _ +# CHECK: %sp = tADDspi %sp, 4, 14, %noreg +# CHECK: BX_RET 14, %noreg # CHECK: bb.2: -# CHECK: %sp = tADDspi %sp, 3, 14, _ -# CHECK: BX_RET 14, _ +# CHECK: %sp = tADDspi %sp, 3, 14, %noreg +# CHECK: BX_RET 14, %noreg diff --git a/test/CodeGen/MIR/ARM/ifcvt_simple_bad_zero_prob_succ.mir b/test/CodeGen/ARM/ifcvt_simple_bad_zero_prob_succ.mir similarity index 100% rename from test/CodeGen/MIR/ARM/ifcvt_simple_bad_zero_prob_succ.mir rename to test/CodeGen/ARM/ifcvt_simple_bad_zero_prob_succ.mir diff --git a/test/CodeGen/MIR/ARM/ifcvt_simple_unanalyzable.mir b/test/CodeGen/ARM/ifcvt_simple_unanalyzable.mir similarity index 93% rename from test/CodeGen/MIR/ARM/ifcvt_simple_unanalyzable.mir rename to test/CodeGen/ARM/ifcvt_simple_unanalyzable.mir index d0c6ffdb3fa0..8d1c71ac98fb 100644 --- a/test/CodeGen/MIR/ARM/ifcvt_simple_unanalyzable.mir +++ b/test/CodeGen/ARM/ifcvt_simple_unanalyzable.mir @@ -21,5 +21,5 @@ body: | # CHECK: bb.0: # CHECK: %sp = tADDspi %sp, 2, 0, %cpsr # CHECK: BX_RET 0, %cpsr -# CHECK: BX_RET 14, _ +# CHECK: BX_RET 14, %noreg diff --git a/test/CodeGen/MIR/ARM/ifcvt_triangleWoCvtToNextEdge.mir b/test/CodeGen/ARM/ifcvt_triangleWoCvtToNextEdge.mir similarity index 92% rename from test/CodeGen/MIR/ARM/ifcvt_triangleWoCvtToNextEdge.mir rename to test/CodeGen/ARM/ifcvt_triangleWoCvtToNextEdge.mir index 981752654fc3..92ecbc8dbbe8 100644 --- a/test/CodeGen/MIR/ARM/ifcvt_triangleWoCvtToNextEdge.mir +++ b/test/CodeGen/ARM/ifcvt_triangleWoCvtToNextEdge.mir @@ -47,6 +47,6 @@ body: | # CHECK: bb.2: # CHECK-NOT: successors: %bb # CHECK: tBL 1, %cpsr, @__stack_chk_fail -# CHECK: %sp = tADDspi %sp, 2, 14, _ -# CHECK: %sp = tADDspi %sp, 2, 14, _ +# CHECK: %sp = tADDspi %sp, 2, 14, %noreg +# CHECK: %sp = tADDspi %sp, 2, 14, %noreg # CHECK: tTAILJMPdND @bar, 14, %cpsr diff --git a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 6d62fd31f978..6f1e18ffdfca 100644 --- a/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -4,14 +4,14 @@ define void @i24_or(i24* %a) { ; LE-LABEL: i24_or: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: ldrh r1, [r0] ; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i24_or: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: ldrh r1, [r0] ; BE-NEXT: ldrb r2, [r0, #2] ; BE-NEXT: orr r1, r2, r1, lsl #8 @@ -28,7 +28,7 @@ define void @i24_or(i24* %a) { define void @i24_and_or(i24* %a) { ; LE-LABEL: i24_and_or: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: ldrh r1, [r0] ; LE-NEXT: mov r2, #16256 ; LE-NEXT: orr r2, r2, #49152 @@ -38,7 +38,7 @@ define void @i24_and_or(i24* %a) { ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i24_and_or: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: mov r1, #128 ; BE-NEXT: strb r1, [r0, #2] ; BE-NEXT: ldrh r1, [r0] @@ -54,7 +54,7 @@ define void @i24_and_or(i24* %a) { define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; LE-LABEL: i24_insert_bit: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: mov r3, #255 ; LE-NEXT: ldrh r2, [r0] ; LE-NEXT: orr r3, r3, #57088 @@ -64,7 +64,7 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i24_insert_bit: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: ldrh r2, [r0] ; BE-NEXT: mov r3, #57088 ; BE-NEXT: orr r3, r3, #16711680 @@ -84,14 +84,14 @@ define void @i24_insert_bit(i24* %a, i1 zeroext %bit) { define void @i56_or(i56* %a) { ; LE-LABEL: i56_or: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: ldr r1, [r0] ; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: str r1, [r0] ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_or: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: mov r1, r0 ; BE-NEXT: ldr r12, [r0] ; BE-NEXT: ldrh r2, [r1, #4]! @@ -114,7 +114,7 @@ define void @i56_or(i56* %a) { define void @i56_and_or(i56* %a) { ; LE-LABEL: i56_and_or: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: ldr r1, [r0] ; LE-NEXT: orr r1, r1, #384 ; LE-NEXT: bic r1, r1, #127 @@ -122,7 +122,7 @@ define void @i56_and_or(i56* %a) { ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_and_or: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: mov r1, r0 ; BE-NEXT: ldr r12, [r0] ; BE-NEXT: ldrh r2, [r1, #4]! @@ -147,7 +147,7 @@ define void @i56_and_or(i56* %a) { define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; LE-LABEL: i56_insert_bit: -; LE: @ BB#0: +; LE: @ %bb.0: ; LE-NEXT: ldr r2, [r0] ; LE-NEXT: bic r2, r2, #8192 ; LE-NEXT: orr r1, r2, r1, lsl #13 @@ -155,7 +155,7 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; LE-NEXT: mov pc, lr ; ; BE-LABEL: i56_insert_bit: -; BE: @ BB#0: +; BE: @ %bb.0: ; BE-NEXT: .save {r11, lr} ; BE-NEXT: push {r11, lr} ; BE-NEXT: mov r2, r0 diff --git a/test/CodeGen/ARM/imm-peephole-arm.mir b/test/CodeGen/ARM/imm-peephole-arm.mir index 95ae58ff9bdb..0457507eb448 100644 --- a/test/CodeGen/ARM/imm-peephole-arm.mir +++ b/test/CodeGen/ARM/imm-peephole-arm.mir @@ -42,18 +42,18 @@ body: | %0 = COPY %r0 %1 = MOVi32imm -25733 - %2 = SUBrr %0, killed %1, 14, _, _ + %2 = SUBrr %0, killed %1, 14, %noreg, %noreg %3 = MOVi32imm 25733 - %4 = SUBrr %0, killed %3, 14, _, _ + %4 = SUBrr %0, killed %3, 14, %noreg, %noreg %5 = MOVi32imm -25733 - %6 = ADDrr %0, killed %5, 14, _, _ + %6 = ADDrr %0, killed %5, 14, %noreg, %noreg %7 = MOVi32imm 25733 - %8 = ADDrr killed %0, killed %7, 14, _, _ + %8 = ADDrr killed %0, killed %7, 14, %noreg, %noreg %r0 = COPY killed %8 - BX_RET 14, _, implicit %r0 + BX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/imm-peephole-thumb.mir b/test/CodeGen/ARM/imm-peephole-thumb.mir index 553717ba74ac..04e2b193e96c 100644 --- a/test/CodeGen/ARM/imm-peephole-thumb.mir +++ b/test/CodeGen/ARM/imm-peephole-thumb.mir @@ -41,18 +41,18 @@ body: | liveins: %r0 %0 = COPY %r0 %1 = t2MOVi32imm -25733 - %2 = t2SUBrr %0, killed %1, 14, _, _ + %2 = t2SUBrr %0, killed %1, 14, %noreg, %noreg %3 = t2MOVi32imm 25733 - %4 = t2SUBrr %0, killed %3, 14, _, _ + %4 = t2SUBrr %0, killed %3, 14, %noreg, %noreg %5 = t2MOVi32imm -25733 - %6= t2ADDrr %0, killed %5, 14, _, _ + %6= t2ADDrr %0, killed %5, 14, %noreg, %noreg %7 = t2MOVi32imm 25733 - %8 = t2ADDrr killed %0, killed %7, 14, _, _ + %8 = t2ADDrr killed %0, killed %7, 14, %noreg, %noreg %r0 = COPY killed %8 - tBX_RET 14, _, implicit %r0 + tBX_RET 14, %noreg, implicit %r0 ... diff --git a/test/CodeGen/ARM/indirect-hidden.ll b/test/CodeGen/ARM/indirect-hidden.ll index ae1c505bb683..eb0302834879 100644 --- a/test/CodeGen/ARM/indirect-hidden.ll +++ b/test/CodeGen/ARM/indirect-hidden.ll @@ -19,4 +19,4 @@ define i32* @get_var_hidden() { ; CHECK-NOT: __DATA,__data ; CHECK: .indirect_symbol _var_hidden -; CHECK-NEXT: .long 0 \ No newline at end of file +; CHECK-NEXT: .long 0 diff --git a/test/CodeGen/ARM/interval-update-remat.ll b/test/CodeGen/ARM/interval-update-remat.ll index 524e8a0aa491..216f7e915a80 100644 --- a/test/CodeGen/ARM/interval-update-remat.ll +++ b/test/CodeGen/ARM/interval-update-remat.ll @@ -85,7 +85,7 @@ _ZN7MessageD1Ev.exit33: ; preds = %delete.notnull.i.i. if.end: ; preds = %_ZN7MessageD1Ev.exit33, %entry %message_.i.i = getelementptr inbounds %class.AssertionResult.24.249.299.1324.2349, %class.AssertionResult.24.249.299.1324.2349* %gtest_ar, i32 0, i32 1 %call.i.i.i = call %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* %message_.i.i) - call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 12, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 null, i8 0, i32 12, i1 false) call void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull %ref.tmp) call void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* nonnull sret %agg.tmp16) %callback_.i = getelementptr inbounds %class.TestCompletionCallback.9.234.284.1309.2334, %class.TestCompletionCallback.9.234.284.1309.2334* %callback, i32 0, i32 1 @@ -137,7 +137,7 @@ declare void @_ZN18ClientSocketHandle5m_fn3IPiEEvRK25Trans_NS___1_basic_stringIc declare void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* sret) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* returned) unnamed_addr diff --git a/test/CodeGen/ARM/intrinsics-overflow.ll b/test/CodeGen/ARM/intrinsics-overflow.ll index af3dd9dd4117..5f78b13c18d1 100644 --- a/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/test/CodeGen/ARM/intrinsics-overflow.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6 +; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7 define i32 @uadd_overflow(i32 %a, i32 %b) #0 { %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -7,10 +9,19 @@ define i32 @uadd_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: uadd_overflow: - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; CHECK: mov r[[R1]], #1 - ; CHECK: cmp r[[R2]], r[[R0]] - ; CHECK: movhs r[[R1]], #0 + + ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; ARM: mov r[[R2:[0-9]+]], #0 + ; ARM: adc r[[R0]], r[[R2]], #0 + + ; THUMBV6: movs r[[R2:[0-9]+]], #0 + ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV6: adcs r[[R2]], r[[R2]] + ; THUMBV6: mov r[[R0]], r[[R2]] + + ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 + ; THUMBV7: adc r[[R0]], r[[R2]], #0 } @@ -21,10 +32,25 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: sadd_overflow: - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] - ; CHECK: mov r[[R1]], #1 - ; CHECK: cmp r[[R2]], r[[R0]] - ; CHECK: movvc r[[R1]], #0 + + ; ARM: adds r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; ARM: mov r[[R0]], #1 + ; ARM: movvc r[[R0]], #0 + ; ARM: mov pc, lr + + ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] + ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] + ; THUMBV6: movs r[[R0]], #0 + ; THUMBV6: movs r[[R1]], #1 + ; THUMBV6: cmp r[[R3]], r[[R2]] + ; THUMBV6: bvc .L[[LABEL:.*]] + ; THUMBV6: mov r[[R0]], r[[R1]] + ; THUMBV6: .L[[LABEL]]: + + ; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R0:[0-9]+]], #1 + ; THUMBV7: it vc + ; THUMBV7: movvc r[[R0]], #0 } define i32 @usub_overflow(i32 %a, i32 %b) #0 { @@ -34,9 +60,26 @@ define i32 @usub_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: usub_overflow: - ; CHECK: mov r[[R2]], #1 - ; CHECK: cmp r[[R0]], r[[R1]] - ; CHECK: movhs r[[R2]], #0 + + ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; ARM: mov r[[R2:[0-9]+]], #0 + ; ARM: adc r[[R0]], r[[R2]], #0 + ; ARM: rsb r[[R0]], r[[R0]], #1 + + ; THUMBV6: movs r[[R2:[0-9]+]], #0 + ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV6: adcs r[[R2]], r[[R2]] + ; THUMBV6: movs r[[R0]], #1 + ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]] + + ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 + ; THUMBV7: adc r[[R0]], r[[R2]], #0 + ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1 + + ; We should know that the overflow is just 1 bit, + ; no need to clear any other bit + ; CHECK-NOT: and } define i32 @ssub_overflow(i32 %a, i32 %b) #0 { @@ -46,9 +89,23 @@ define i32 @ssub_overflow(i32 %a, i32 %b) #0 { ret i32 %2 ; CHECK-LABEL: ssub_overflow: - ; CHECK: mov r[[R2]], #1 - ; CHECK: cmp r[[R0]], r[[R1]] - ; CHECK: movvc r[[R2]], #0 + + ; ARM: mov r[[R2]], #1 + ; ARM: cmp r[[R0]], r[[R1]] + ; ARM: movvc r[[R2]], #0 + + ; THUMBV6: movs r[[R0]], #0 + ; THUMBV6: movs r[[R3:[0-9]+]], #1 + ; THUMBV6: cmp r[[R2]], r[[R1:[0-9]+]] + ; THUMBV6: bvc .L[[LABEL:.*]] + ; THUMBV6: mov r[[R0]], r[[R3]] + ; THUMBV6: .L[[LABEL]]: + + ; THUMBV7: movs r[[R2:[0-9]+]], #1 + ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]] + ; THUMBV7: it vc + ; THUMBV7: movvc r[[R2]], #0 + ; THUMBV7: mov r[[R0]], r[[R2]] } declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 diff --git a/test/CodeGen/ARM/jump-table-tbh.ll b/test/CodeGen/ARM/jump-table-tbh.ll index b3ee68ea0758..ab2c579e514e 100644 --- a/test/CodeGen/ARM/jump-table-tbh.ll +++ b/test/CodeGen/ARM/jump-table-tbh.ll @@ -10,7 +10,7 @@ define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) { ; T2-LABEL: test_tbh: ; T2: [[ANCHOR:.LCPI[0-9_]+]]: ; T2: tbh [pc, r{{[0-9]+}}, lsl #1] -; T2-NEXT: @ BB#{{[0-9]+}} +; T2-NEXT: @ %bb.{{[0-9]+}} ; T2-NEXT: LJTI ; T2-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2 ; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2 @@ -24,7 +24,7 @@ define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) { ; T1: lsls [[x]], [[x]], #1 ; T1: [[ANCHOR:.LCPI[0-9_]+]]: ; T1: add pc, [[x]] -; T1-NEXT: @ BB#2 +; T1-NEXT: @ %bb.2 ; T1-NEXT: .p2align 2 ; T1-NEXT: LJTI ; T1-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2 diff --git a/test/CodeGen/ARM/ldm-stm-base-materialization.ll b/test/CodeGen/ARM/ldm-stm-base-materialization.ll index a3231f95f478..755619e8b3ee 100644 --- a/test/CodeGen/ARM/ldm-stm-base-materialization.ll +++ b/test/CodeGen/ARM/ldm-stm-base-materialization.ll @@ -22,7 +22,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 24, i1 false) ret void } @@ -43,7 +43,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 28, i1 false) ret void } @@ -64,7 +64,7 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 32, i1 false) ret void } @@ -85,9 +85,9 @@ entry: %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 36, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 diff --git a/test/CodeGen/ARM/litpool-licm.ll b/test/CodeGen/ARM/litpool-licm.ll index dc6b37feaf05..923971d1afe1 100644 --- a/test/CodeGen/ARM/litpool-licm.ll +++ b/test/CodeGen/ARM/litpool-licm.ll @@ -43,4 +43,4 @@ done: ret void } -declare void @foo(i32*) \ No newline at end of file +declare void @foo(i32*) diff --git a/test/CodeGen/ARM/load_store_multiple.ll b/test/CodeGen/ARM/load_store_multiple.ll index 5ea1f8cf6ffa..a636a8d12b22 100644 --- a/test/CodeGen/ARM/load_store_multiple.ll +++ b/test/CodeGen/ARM/load_store_multiple.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=armv7-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-LE -; RUN: llc -mtriple=armv7eb-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=armv7-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=armv7eb-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-BE define void @ld_st_vec_i8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LE-LABEL: ld_st_vec_i8: diff --git a/test/CodeGen/ARM/load_store_opt_kill.mir b/test/CodeGen/ARM/load_store_opt_kill.mir index 4c210eaf8e9f..85cc5953d1dc 100644 --- a/test/CodeGen/ARM/load_store_opt_kill.mir +++ b/test/CodeGen/ARM/load_store_opt_kill.mir @@ -3,8 +3,8 @@ # CHECK-LABEL: name: f name: f # Make sure the load into %r0 doesn't clobber the base register before the second load uses it. -# CHECK: %r3 = LDRi12 %r0, 12, 14, _ -# CHECK-NEXT: %r0 = LDRi12 %r0, 8, 14, _ +# CHECK: %r3 = LDRi12 %r0, 12, 14, %noreg +# CHECK-NEXT: %r0 = LDRi12 %r0, 8, 14, %noreg body: | bb.0: liveins: %r0, %r3 diff --git a/test/CodeGen/ARM/local-call.ll b/test/CodeGen/ARM/local-call.ll index a38df62ff905..c07294685e92 100644 --- a/test/CodeGen/ARM/local-call.ll +++ b/test/CodeGen/ARM/local-call.ll @@ -17,4 +17,4 @@ define i64 @test_local_call(i64 %a, i64 %b) { %res = udiv i64 %a, %b ret i64 %res -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll index 1fbc3f2c0838..8aee8b9da709 100644 --- a/test/CodeGen/ARM/long-setcc.ll +++ b/test/CodeGen/ARM/long-setcc.ll @@ -1,20 +1,27 @@ ; RUN: llc -mtriple=arm-eabi < %s | FileCheck %s define i1 @t1(i64 %x) { +; CHECK-LABEL: t1: +; CHECK: lsr r0, r1, #31 %B = icmp slt i64 %x, 0 ret i1 %B } define i1 @t2(i64 %x) { +; CHECK-LABEL: t2: +; CHECK: mov r0, #0 +; CHECK: cmp r1, #0 +; CHECK: moveq r0, #1 %tmp = icmp ult i64 %x, 4294967296 ret i1 %tmp } define i1 @t3(i32 %x) { +; CHECK-LABEL: t3: +; CHECK: mov r0, #0 %tmp = icmp ugt i32 %x, -1 ret i1 %tmp } -; CHECK: cmp ; CHECK-NOT: cmp diff --git a/test/CodeGen/ARM/machine-copyprop.mir b/test/CodeGen/ARM/machine-copyprop.mir index 9be595f690db..bb9c3478d8b4 100644 --- a/test/CodeGen/ARM/machine-copyprop.mir +++ b/test/CodeGen/ARM/machine-copyprop.mir @@ -3,20 +3,20 @@ # Test that machine copy prop recognizes the implicit-def operands on a COPY # as clobbering the register. # CHECK-LABEL: name: func -# CHECK: %d2 = VMOVv2i32 2, 14, _ +# CHECK: %d2 = VMOVv2i32 2, 14, %noreg # CHECK: %s5 = COPY %s0, implicit %q1, implicit-def %q1 -# CHECK: VST1q32 %r0, 0, %q1, 14, _ +# CHECK: VST1q32 %r0, 0, %q1, 14, %noreg # The following two COPYs must not be removed # CHECK: %s4 = COPY %s20, implicit-def %q1 # CHECK: %s5 = COPY %s0, implicit killed %d0, implicit %q1, implicit-def %q1 -# CHECK: VST1q32 %r2, 0, %q1, 14, _ +# CHECK: VST1q32 %r2, 0, %q1, 14, %noreg name: func body: | bb.0: - %d2 = VMOVv2i32 2, 14, _ + %d2 = VMOVv2i32 2, 14, %noreg %s5 = COPY %s0, implicit %q1, implicit-def %q1 - VST1q32 %r0, 0, %q1, 14, _ + VST1q32 %r0, 0, %q1, 14, %noreg %s4 = COPY %s20, implicit-def %q1 %s5 = COPY %s0, implicit killed %d0, implicit %q1, implicit-def %q1 - VST1q32 %r2, 0, %q1, 14, _ + VST1q32 %r2, 0, %q1, 14, %noreg ... diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index 611cba6ed1fc..10e56a346a2a 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -37,14 +37,14 @@ entry: for.body.lr.ph: ; preds = %entry %1 = icmp sgt i32 %0, 1 %smax = select i1 %1, i32 %0, i32 1 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false) unreachable for.cond1.preheader: ; preds = %entry ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind ; rdar://12462006 define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind { diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll index a1eec78e453f..9ed1a57616c9 100644 --- a/test/CodeGen/ARM/machine-licm.ll +++ b/test/CodeGen/ARM/machine-licm.ll @@ -31,7 +31,7 @@ bb.nph: ; preds = %entry ; ARM-NOT: LCPI0_1: ; ARM: .section -; THUMB: BB#1 +; THUMB: %bb.1 ; THUMB: ldr r2, LCPI0_0 ; THUMB: add r2, pc ; THUMB: ldr r{{[0-9]+}}, [r2] diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index b447497b270a..1dccf0b99058 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -23,7 +23,7 @@ entry: ; CHECK-T1: strb [[TREG1]], ; CHECK-T1: ldrh [[TREG2:r[0-9]]], ; CHECK-T1: strh [[TREG2]] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -37,7 +37,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t1: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -55,7 +55,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] ; CHECK-T1-LABEL: t2: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -68,7 +68,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t3: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -80,7 +80,7 @@ entry: ; CHECK: strh [[REG5:r[0-9]+]], [r0] ; CHECK-T1-LABEL: t4: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -96,7 +96,7 @@ entry: ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -114,7 +114,7 @@ entry: ; CHECK-T1: strh [[TREG5]], ; CHECK-T1: ldr [[TREG6:r[0-9]]], ; CHECK-T1: str [[TREG6]] - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -130,9 +130,9 @@ entry: ; CHECK-T1: str %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/ARM/memcpy-ldm-stm.ll b/test/CodeGen/ARM/memcpy-ldm-stm.ll index 2ebe7ed5b146..314f559e357a 100644 --- a/test/CodeGen/ARM/memcpy-ldm-stm.ll +++ b/test/CodeGen/ARM/memcpy-ldm-stm.ll @@ -24,7 +24,7 @@ entry: ; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces. ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 17, i1 false) ret void } @@ -42,7 +42,7 @@ entry: ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2] ; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false) ret void } @@ -54,13 +54,13 @@ entry: define void @t3() { call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) ret void } @@ -70,7 +70,7 @@ define void @t3() { define void @test3(%struct.S* %d, %struct.S* %s) #0 { %1 = bitcast %struct.S* %d to i8* %2 = bitcast %struct.S* %s to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 48, i1 false) ; 3 ldm/stm pairs in v6; 2 in v7 ; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]] ; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]] @@ -91,4 +91,4 @@ declare void @g(i32*) attributes #0 = { "no-frame-pointer-elim"="true" } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 diff --git a/test/CodeGen/ARM/memcpy-no-inline.ll b/test/CodeGen/ARM/memcpy-no-inline.ll index 126546095e1f..7aaac19eee3f 100644 --- a/test/CodeGen/ARM/memcpy-no-inline.ll +++ b/test/CodeGen/ARM/memcpy-no-inline.ll @@ -14,7 +14,7 @@ entry: ; CHECK-NOT: ldm %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i1 false) ret void } @@ -24,10 +24,10 @@ entry: ; CHECK-NOT: __aeabi_memcpy %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 attributes #0 = { minsize noinline nounwind optsize } diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index ed6746290b75..882091b67f09 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -16,13 +16,13 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; EABI memset swaps arguments ; CHECK-IOS: mov r1, #1 @@ -33,7 +33,7 @@ entry: ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ; EABI uses memclr if value set to 0 ; CHECK-IOS: mov r1, #0 @@ -42,7 +42,7 @@ entry: ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i1 false) ; EABI uses aligned function variants if possible @@ -50,49 +50,49 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove4 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy4 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 0, i32 500, i1 false) ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove8 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy8 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 0, i32 500, i1 false) unreachable } @@ -113,7 +113,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: add r1, sp, #16 ; CHECK-IOS: bl _memcpy @@ -122,7 +122,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = bitcast [9 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK-IOS: mov r0, sp ; CHECK-IOS: mov r1, #1 @@ -138,7 +138,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = bitcast [9 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -155,7 +155,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [7 x i8], align 1 %0 = bitcast [7 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r1, sp, #10|sub(.w)? r1, r(7|11), #22}} ; CHECK-IOS: bl _memcpy @@ -164,7 +164,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [7 x i8], align 1 %1 = bitcast [7 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r0, sp, #3|sub(.w)? r0, r(7|11), #29}} ; CHECK-IOS: mov r1, #1 @@ -177,7 +177,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [7 x i8], align 1 %2 = bitcast [7 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -194,7 +194,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w) r., r(7|11), #26}} ; CHECK-IOS: bl _memcpy @@ -203,7 +203,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w) r., r(7|11), #35}} ; CHECK-IOS: mov r1, #1 @@ -216,7 +216,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -233,7 +233,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -242,7 +242,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -255,7 +255,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -272,7 +272,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #42}} ; CHECK-IOS: bl _memcpy @@ -281,7 +281,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #55}} ; CHECK-IOS: mov r1, #1 @@ -294,7 +294,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -311,7 +311,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -320,7 +320,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -333,7 +333,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -350,7 +350,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -359,7 +359,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -372,7 +372,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -390,15 +390,15 @@ entry: @arr9 = weak_odr global [128 x i8] undef define void @f9(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i1 false) unreachable } @@ -428,6 +428,6 @@ entry: ; CHECK-NOT: arr7: -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll index b2bd257701d3..01b21e9d3870 100644 --- a/test/CodeGen/ARM/memset-inline.ll +++ b/test/CodeGen/ARM/memset-inline.ll @@ -12,7 +12,7 @@ entry: ; CHECK-6M: str r1, [r0] ; CHECK-6M: str r1, [r0, #4] ; CHECK-6M: str r1, [r0, #8] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -33,7 +33,7 @@ entry: ; CHECK-6M: str [[REG]], [sp] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } @@ -54,7 +54,7 @@ entry: for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -78,7 +78,7 @@ entry: for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 2 %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -89,5 +89,5 @@ for.end: } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll index 53f8b8d15042..dbed4650c392 100644 --- a/test/CodeGen/ARM/misched-copy-arm.ll +++ b/test/CodeGen/ARM/misched-copy-arm.ll @@ -4,7 +4,7 @@ ; Loop counter copies should be eliminated. ; There is also a MUL here, but we don't care where it is scheduled. ; CHECK: postinc -; CHECK: *** Final schedule for BB#2 *** +; CHECK: *** Final schedule for %bb.2 *** ; CHECK: t2LDRs ; CHECK: t2ADDrr ; CHECK: t2CMPrr @@ -32,10 +32,10 @@ for.end: ; preds = %for.body, %entry ; This case was a crasher in constrainLocalCopy. ; The problem was the t2LDR_PRE defining both the global and local lrg. -; CHECK-LABEL: *** Final schedule for BB#5 *** -; CHECK: %[[R4:vreg[0-9]+]], %[[R1:vreg[0-9]+]] = t2LDR_PRE %[[R1]] -; CHECK: %vreg{{[0-9]+}} = COPY %[[R1]] -; CHECK: %vreg{{[0-9]+}} = COPY %[[R4]] +; CHECK-LABEL: *** Final schedule for %bb.5 *** +; CHECK: %[[R4:[0-9]+]]:gpr, %[[R1:[0-9]+]]:gpr = t2LDR_PRE %[[R1]] +; CHECK: %{{[0-9]+}}:gpr = COPY %[[R1]] +; CHECK: %{{[0-9]+}}:gpr = COPY %[[R4]] ; CHECK-LABEL: MACHINEINSTRS %struct.rtx_def = type { [4 x i8], [1 x %union.rtunion_def] } %union.rtunion_def = type { i64 } diff --git a/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/test/CodeGen/ARM/misched-int-basic-thumb2.mir index 32d1e03d9a1b..8b8f3f0771ff 100644 --- a/test/CodeGen/ARM/misched-int-basic-thumb2.mir +++ b/test/CodeGen/ARM/misched-int-basic-thumb2.mir @@ -37,62 +37,62 @@ } # # CHECK: ********** MI Scheduling ********** -# CHECK: SU(2): %vreg2 = t2MOVi32imm ; rGPR:%vreg2 +# CHECK: SU(2): %2:rgpr = t2MOVi32imm @g1 # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 2 # CHECK_R52: Latency : 2 # -# CHECK: SU(3): %vreg3 = t2LDRi12 %vreg2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%vreg3,%vreg2 +# CHECK: SU(3): %3:rgpr = t2LDRi12 %2:rgpr, 0, 14, %noreg; mem:LD4[@g1](dereferenceable) # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 3 # CHECK_R52: Latency : 4 # -# CHECK : SU(6): %vreg6 = t2ADDrr %vreg3, %vreg3, pred:14, pred:%noreg, opt:%noreg; rGPR:%vreg6,%vreg3,%vreg3 +# CHECK : SU(6): %6 = t2ADDrr %3:rgpr, %3:rgpr, 14, %noreg, %noreg # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 1 # CHECK_R52: Latency : 3 -# CHECK: SU(7): %vreg7 = t2SDIV %vreg6, %vreg5, pred:14, pred:%noreg; rGPR:%vreg7,%vreg6,%vreg5 +# CHECK: SU(7): %7:rgpr = t2SDIV %6:rgpr, %5:rgpr, 14, %noreg # CHECK_A9: Latency : 0 # CHECK_SWIFT: Latency : 14 # CHECK_R52: Latency : 8 -# CHECK: SU(8): t2STRi12 %vreg7, %vreg2, 0, pred:14, pred:%noreg; mem:ST4[@g1] rGPR:%vreg7,%vreg2 +# CHECK: SU(8): t2STRi12 %7:rgpr, %2:rgpr, 0, 14, %noreg; mem:ST4[@g1] # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 0 # CHECK_R52: Latency : 4 # -# CHECK: SU(9): %vreg8 = t2SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; rGPR:%vreg8,%vreg1,%vreg1 +# CHECK: SU(9): %8:rgpr = t2SMULBB %1:rgpr, %1:rgpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(10): %vreg9 = t2SMLABB %vreg0, %vreg0, %vreg8, pred:14, pred:%noreg; rGPR:%vreg9,%vreg0,%vreg0,%vreg8 +# CHECK: SU(10): %9:rgpr = t2SMLABB %0:rgpr, %0:rgpr, %8:rgpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(11): %vreg10 = t2UXTH %vreg9, 0, pred:14, pred:%noreg; rGPR:%vreg10,%vreg9 +# CHECK: SU(11): %10:rgpr = t2UXTH %9:rgpr, 0, 14, %noreg # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 1 # CHECK_R52: Latency : 3 # -# CHECK: SU(12): %vreg11 = t2MUL %vreg10, %vreg7, pred:14, pred:%noreg; rGPR:%vreg11,%vreg10,%vreg7 +# CHECK: SU(12): %11:rgpr = t2MUL %10:rgpr, %7:rgpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(13): %vreg12 = t2MLA %vreg11, %vreg11, %vreg11, pred:14, pred:%noreg; rGPR:%vreg12,%vreg11,%vreg11,%vreg11 +# CHECK: SU(13): %12:rgpr = t2MLA %11:rgpr, %11:rgpr, %11:rgpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(14): %vreg13, %vreg14 = t2UMULL %vreg12, %vreg12, pred:14, pred:%noreg; rGPR:%vreg13,%vreg14,%vreg12,%vreg12 +# CHECK: SU(14): %13:rgpr, %14:rgpr = t2UMULL %12:rgpr, %12:rgpr, 14, %noreg # CHECK_A9: Latency : 3 # CHECK_SWIFT: Latency : 5 # CHECK_R52: Latency : 4 # -# CHECK: SU(18): %vreg19, %vreg20 = t2UMLAL %vreg12, %vreg12, %vreg19, %vreg20, pred:14, pred:%noreg; rGPR:%vreg19,%vreg20,%vreg12,%vreg12,%vreg20 +# CHECK: SU(18): %19:rgpr, %20:rgpr = t2UMLAL %12:rgpr, %12:rgpr, %19:rgpr, %20:rgpr, 14, %noreg # CHECK_A9: Latency : 3 # CHECK_SWIFT: Latency : 7 # CHECK_R52: Latency : 4 @@ -152,24 +152,24 @@ body: | %1 = COPY %r1 %0 = COPY %r0 %2 = t2MOVi32imm @g1 - %3 = t2LDRi12 %2, 0, 14, _ :: (dereferenceable load 4 from @g1) + %3 = t2LDRi12 %2, 0, 14, %noreg :: (dereferenceable load 4 from @g1) %4 = t2MOVi32imm @g2 - %5 = t2LDRi12 %4, 0, 14, _ :: (dereferenceable load 4 from @g2) - %6 = t2ADDrr %3, %3, 14, _, _ - %7 = t2SDIV %6, %5, 14, _ - t2STRi12 %7, %2, 0, 14, _ :: (store 4 into @g1) - %8 = t2SMULBB %1, %1, 14, _ - %9 = t2SMLABB %0, %0, %8, 14, _ - %10 = t2UXTH %9, 0, 14, _ - %11 = t2MUL %10, %7, 14, _ - %12 = t2MLA %11, %11, %11, 14, _ - %13, %14 = t2UMULL %12, %12, 14, _ - %19, %16 = t2UMULL %13, %13, 14, _ - %17 = t2MLA %13, %14, %16, 14, _ - %20 = t2MLA %13, %14, %17, 14, _ - %19, %20 = t2UMLAL %12, %12, %19, %20, 14, _ + %5 = t2LDRi12 %4, 0, 14, %noreg :: (dereferenceable load 4 from @g2) + %6 = t2ADDrr %3, %3, 14, %noreg, %noreg + %7 = t2SDIV %6, %5, 14, %noreg + t2STRi12 %7, %2, 0, 14, %noreg :: (store 4 into @g1) + %8 = t2SMULBB %1, %1, 14, %noreg + %9 = t2SMLABB %0, %0, %8, 14, %noreg + %10 = t2UXTH %9, 0, 14, %noreg + %11 = t2MUL %10, %7, 14, %noreg + %12 = t2MLA %11, %11, %11, 14, %noreg + %13, %14 = t2UMULL %12, %12, 14, %noreg + %19, %16 = t2UMULL %13, %13, 14, %noreg + %17 = t2MLA %13, %14, %16, 14, %noreg + %20 = t2MLA %13, %14, %17, 14, %noreg + %19, %20 = t2UMLAL %12, %12, %19, %20, 14, %noreg %r0 = COPY %19 %r1 = COPY %20 - tBX_RET 14, _, implicit %r0, implicit %r1 + tBX_RET 14, %noreg, implicit %r0, implicit %r1 ... diff --git a/test/CodeGen/ARM/misched-int-basic.mir b/test/CodeGen/ARM/misched-int-basic.mir index d5231269d732..0428ea99c803 100644 --- a/test/CodeGen/ARM/misched-int-basic.mir +++ b/test/CodeGen/ARM/misched-int-basic.mir @@ -28,37 +28,37 @@ } # CHECK: ********** MI Scheduling ********** -# CHECK: SU(2): %vreg2 = SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; GPR:%vreg2,%vreg1,%vreg1 +# CHECK: SU(2): %2:gpr = SMULBB %1:gpr, %1:gpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(3): %vreg3 = SMLABB %vreg0, %vreg0, %vreg2, pred:14, pred:%noreg; GPRnopc:%vreg3,%vreg0,%vreg0 GPR:%vreg2 +# CHECK: SU(3): %3:gprnopc = SMLABB %0:gprnopc, %0:gprnopc, %2:gpr, 14, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(4): %vreg4 = UXTH %vreg3, 0, pred:14, pred:%noreg; GPRnopc:%vreg4,%vreg3 +# CHECK: SU(4): %4:gprnopc = UXTH %3:gprnopc, 0, 14, %noreg # CHECK_A9: Latency : 1 # CHECK_SWIFT: Latency : 1 # CHECK_R52: Latency : 3 # -# CHECK: SU(5): %vreg5 = MUL %vreg4, %vreg4, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg5,%vreg4,%vreg4 +# CHECK: SU(5): %5:gprnopc = MUL %4:gprnopc, %4:gprnopc, 14, %noreg, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(6): %vreg6 = MLA %vreg5, %vreg5, %vreg5, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg6,%vreg5,%vreg5,%vreg5 +# CHECK: SU(6): %6:gprnopc = MLA %5:gprnopc, %5:gprnopc, %5:gprnopc, 14, %noreg, %noreg # CHECK_A9: Latency : 2 # CHECK_SWIFT: Latency : 4 # CHECK_R52: Latency : 4 # -# CHECK: SU(7): %vreg7, %vreg8 = UMULL %vreg6, %vreg6, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg7,%vreg8,%vreg6,%vreg6 +# CHECK: SU(7): %7:gprnopc, %8:gprnopc = UMULL %6:gprnopc, %6:gprnopc, 14, %noreg, %noreg # CHECK_A9: Latency : 3 # CHECK_SWIFT: Latency : 5 # CHECK_R52: Latency : 4 # -# CHECK: SU(11): %vreg13, %vreg14 = UMLAL %vreg6, %vreg6, %vreg13, %vreg14, pred:14, pred:%noreg, opt:%noreg; GPR:%vreg13 GPRnopc:%vreg14,%vreg6,%vreg6 +# CHECK: SU(11): %13:gpr, %14:gprnopc = UMLAL %6:gprnopc, %6:gprnopc, %13:gpr, %14:gprnopc, 14, %noreg, %noreg # CHECK_SWIFT: Latency : 7 # CHECK_A9: Latency : 3 # CHECK_R52: Latency : 4 @@ -111,18 +111,18 @@ body: | %1 = COPY %r1 %0 = COPY %r0 - %2 = SMULBB %1, %1, 14, _ - %3 = SMLABB %0, %0, %2, 14, _ - %4 = UXTH %3, 0, 14, _ - %5 = MUL %4, %4, 14, _, _ - %6 = MLA %5, %5, %5, 14, _, _ - %7, %8 = UMULL %6, %6, 14, _, _ - %13, %10 = UMULL %7, %7, 14, _, _ - %11 = MLA %7, %8, %10, 14, _, _ - %14 = MLA %7, %8, %11, 14, _, _ - %13, %14 = UMLAL %6, %6, %13, %14, 14, _, _ + %2 = SMULBB %1, %1, 14, %noreg + %3 = SMLABB %0, %0, %2, 14, %noreg + %4 = UXTH %3, 0, 14, %noreg + %5 = MUL %4, %4, 14, %noreg, %noreg + %6 = MLA %5, %5, %5, 14, %noreg, %noreg + %7, %8 = UMULL %6, %6, 14, %noreg, %noreg + %13, %10 = UMULL %7, %7, 14, %noreg, %noreg + %11 = MLA %7, %8, %10, 14, %noreg, %noreg + %14 = MLA %7, %8, %11, 14, %noreg, %noreg + %13, %14 = UMLAL %6, %6, %13, %14, 14, %noreg, %noreg %r0 = COPY %13 %r1 = COPY %14 - BX_RET 14, _, implicit %r0, implicit %r1 + BX_RET 14, %noreg, implicit %r0, implicit %r1 ... diff --git a/test/CodeGen/ARM/negate-i1.ll b/test/CodeGen/ARM/negate-i1.ll index 0503763e674f..493b26a5a840 100644 --- a/test/CodeGen/ARM/negate-i1.ll +++ b/test/CodeGen/ARM/negate-i1.ll @@ -4,7 +4,7 @@ define i32 @select_i32_neg1_or_0(i1 %a) { ; CHECK-LABEL: select_i32_neg1_or_0: -; CHECK-NEXT: @ BB#0: +; CHECK-NEXT: @ %bb.0: ; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr @@ -15,7 +15,7 @@ define i32 @select_i32_neg1_or_0(i1 %a) { define i32 @select_i32_neg1_or_0_zeroext(i1 zeroext %a) { ; CHECK-LABEL: select_i32_neg1_or_0_zeroext: -; CHECK-NEXT: @ BB#0: +; CHECK-NEXT: @ %bb.0: ; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr ; diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll index 109d09582afd..4064aae65f66 100644 --- a/test/CodeGen/ARM/neon_vabs.ll +++ b/test/CodeGen/ARM/neon_vabs.ll @@ -3,7 +3,7 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind { ; CHECK-LABEL: test1: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 q8, q8 @@ -18,7 +18,7 @@ define <4 x i32> @test1(<4 x i32> %a) nounwind { define <4 x i32> @test2(<4 x i32> %a) nounwind { ; CHECK-LABEL: test2: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 q8, q8 @@ -33,7 +33,7 @@ define <4 x i32> @test2(<4 x i32> %a) nounwind { define <8 x i16> @test3(<8 x i16> %a) nounwind { ; CHECK-LABEL: test3: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s16 q8, q8 @@ -48,7 +48,7 @@ define <8 x i16> @test3(<8 x i16> %a) nounwind { define <16 x i8> @test4(<16 x i8> %a) nounwind { ; CHECK-LABEL: test4: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s8 q8, q8 @@ -63,7 +63,7 @@ define <16 x i8> @test4(<16 x i8> %a) nounwind { define <4 x i32> @test5(<4 x i32> %a) nounwind { ; CHECK-LABEL: test5: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 q8, q8 @@ -78,7 +78,7 @@ define <4 x i32> @test5(<4 x i32> %a) nounwind { define <2 x i32> @test6(<2 x i32> %a) nounwind { ; CHECK-LABEL: test6: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -91,7 +91,7 @@ define <2 x i32> @test6(<2 x i32> %a) nounwind { define <2 x i32> @test7(<2 x i32> %a) nounwind { ; CHECK-LABEL: test7: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -104,7 +104,7 @@ define <2 x i32> @test7(<2 x i32> %a) nounwind { define <4 x i16> @test8(<4 x i16> %a) nounwind { ; CHECK-LABEL: test8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s16 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -117,7 +117,7 @@ define <4 x i16> @test8(<4 x i16> %a) nounwind { define <8 x i8> @test9(<8 x i8> %a) nounwind { ; CHECK-LABEL: test9: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s8 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -130,7 +130,7 @@ define <8 x i8> @test9(<8 x i8> %a) nounwind { define <2 x i32> @test10(<2 x i32> %a) nounwind { ; CHECK-LABEL: test10: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vabs.s32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -146,7 +146,7 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind { define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind { ; CHECK-LABEL: test11: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r2, r3 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: vabdl.u16 q8, d17, d16 @@ -163,7 +163,7 @@ define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind { } define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind { ; CHECK-LABEL: test12: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r2, r3 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: vabdl.u8 q8, d17, d16 @@ -181,7 +181,7 @@ define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind { define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind { ; CHECK-LABEL: test13: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r2, r3 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: vabdl.u32 q8, d17, d16 diff --git a/test/CodeGen/ARM/nest-register.ll b/test/CodeGen/ARM/nest-register.ll index 6b8c3dc47db1..ac7afe0007cd 100644 --- a/test/CodeGen/ARM/nest-register.ll +++ b/test/CodeGen/ARM/nest-register.ll @@ -5,7 +5,7 @@ define i8* @nest_receiver(i8* nest %arg) nounwind { ; CHECK-LABEL: nest_receiver: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov pc, lr ret i8* %arg diff --git a/test/CodeGen/ARM/noopt-dmb-v7.ll b/test/CodeGen/ARM/noopt-dmb-v7.ll index 56a29c8a17e8..86b27600eb4b 100644 --- a/test/CodeGen/ARM/noopt-dmb-v7.ll +++ b/test/CodeGen/ARM/noopt-dmb-v7.ll @@ -9,7 +9,7 @@ entry: ret i32 0 } -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: dmb ish ; CHECK-NEXT: dmb ish ; CHECK-NEXT: dmb ish diff --git a/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll b/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll new file mode 100644 index 000000000000..cff5b8998e45 --- /dev/null +++ b/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll @@ -0,0 +1,238 @@ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=generic | FileCheck %s + +define i32 @sadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: sadd: +; CHECK: adds r0, r0, r1 +; CHECK-NEXT: movvc pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @uadd(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: uadd: +; CHECK: adds r0, r0, r1 +; CHECK-NEXT: movlo pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @ssub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: ssub: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: movvc pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @usub(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: usub: +; CHECK: subs r0, r0, r1 +; CHECK-NEXT: movhs pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 + +} + +define i32 @smul(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: smul: +; CHECK: smull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-NEXT: cmp r[[RHI]], r0, asr #31 +; CHECK-NEXT: moveq pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 +} + +define i32 @umul(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: umul: +; CHECK: umull r0, r[[RHI:[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK-NEXT: cmp r[[RHI]], #0 +; CHECK-NEXT: moveq pc, lr +entry: + %0 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %2 = extractvalue { i32, i1 } %0, 0 + ret i32 %2 +} + +define void @sum(i32* %a, i32* %b, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: sum: +; CHECK: ldr [[R0:r[0-9]+]], +; CHECK-NEXT: ldr [[R1:r[0-9]+|lr]], +; CHECK-NEXT: adds [[R2:r[0-9]+]], [[R1]], [[R0]] +; CHECK-NEXT: strvc [[R2]], +; CHECK-NEXT: addsvc +; CHECK-NEXT: bvs +entry: + %cmp7 = icmp eq i32 %n, 0 + br i1 %cmp7, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.08 = phi i32 [ %7, %cont2 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.08 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.08 + %1 = load i32, i32* %arrayidx1, align 4 + %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %1, i32 %0) + %3 = extractvalue { i32, i1 } %2, 1 + br i1 %3, label %trap, label %cont + +trap: + tail call void @llvm.trap() #2 + unreachable + +cont: + %4 = extractvalue { i32, i1 } %2, 0 + store i32 %4, i32* %arrayidx1, align 4 + %5 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.08, i32 1) + %6 = extractvalue { i32, i1 } %5, 1 + br i1 %6, label %trap, label %cont2 + +cont2: + %7 = extractvalue { i32, i1 } %5, 0 + %cmp = icmp eq i32 %7, %n + br i1 %cmp, label %for.cond.cleanup, label %for.body + +} + +define void @extern_loop(i32 %n) local_unnamed_addr #0 { +; Do not replace the compare around the clobbering call. +; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1 +; CHECK-NEXT: bl external_fn +; CHECK: cmp +entry: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont.lr.ph + +cont.lr.ph: + %2 = extractvalue { i32, i1 } %0, 0 + %cmp5 = icmp sgt i32 %2, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: + br label %for.body + +trap: + tail call void @llvm.trap() #2 + unreachable + +for.cond.cleanup: + ret void + +for.body: + %i.046 = phi i32 [ %5, %cont1 ], [ 0, %for.body.preheader ] + tail call void bitcast (void (...)* @external_fn to void ()*)() #4 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.046, i32 1) + %4 = extractvalue { i32, i1 } %3, 1 + br i1 %4, label %trap, label %cont1 + +cont1: + %5 = extractvalue { i32, i1 } %3, 0 + %cmp = icmp slt i32 %5, %2 + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +declare void @external_fn(...) local_unnamed_addr #0 + +define i32 @are_equal(i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: are_equal +; CHECK: subs r{{[0-9]+}}, r{{[0-9]+}}, #1 +; CHECK-NEXT: bne +entry: + %tobool7 = icmp eq i32 %n, 0 + br i1 %tobool7, label %while.end, label %land.rhs.preheader + +land.rhs.preheader: + br label %land.rhs + +while.cond: + %tobool = icmp eq i32 %dec9, 0 + br i1 %tobool, label %while.end, label %land.rhs + +land.rhs: + %dec9.in = phi i32 [ %dec9, %while.cond ], [ %n, %land.rhs.preheader ] + %dec9 = add nsw i32 %dec9.in, -1 + %arrayidx = getelementptr inbounds i32, i32* %a1, i32 %dec9 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %a2, i32 %dec9 + %1 = load i32, i32* %arrayidx1, align 4 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %while.cond, label %while.end + +while.end: + %n.addr.0.lcssa = phi i32 [ 0, %entry ], [ 0, %while.cond ], [ %dec9.in, %land.rhs ] + %cmp2 = icmp slt i32 %n.addr.0.lcssa, 1 + %conv = zext i1 %cmp2 to i32 + ret i32 %conv +} + +declare void @llvm.trap() #2 +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) #1 diff --git a/test/CodeGen/ARM/peephole-phi.mir b/test/CodeGen/ARM/peephole-phi.mir new file mode 100644 index 000000000000..54ae0115840b --- /dev/null +++ b/test/CodeGen/ARM/peephole-phi.mir @@ -0,0 +1,103 @@ +# RUN: llc -o - %s -mtriple=armv7-- -verify-machineinstrs -run-pass=peephole-opt | FileCheck %s +# +# Make sure we do not crash on this input. +# Note that this input could in principle be optimized, but right now we don't +# have this case implemented so the output should simply be unchanged. +# +# CHECK-LABEL: name: func +# CHECK: body: | +# CHECK: bb.0: +# CHECK: Bcc %bb.2, 1, undef %cpsr +# +# CHECK: bb.1: +# CHECK: %0:dpr = IMPLICIT_DEF +# CHECK: %1:gpr, %2:gpr = VMOVRRD %0, 14, %noreg +# CHECK: B %bb.3 +# +# CHECK: bb.2: +# CHECK: %3:spr = IMPLICIT_DEF +# CHECK: %4:gpr = VMOVRS %3, 14, %noreg +# +# CHECK: bb.3: +# CHECK: %5:gpr = PHI %1, %bb.1, %4, %bb.2 +# CHECK: %6:spr = VMOVSR %5, 14, %noreg +--- +name: func0 +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %0:dpr = IMPLICIT_DEF + %1:gpr, %2:gpr = VMOVRRD %0:dpr, 14, %noreg + B %bb.3 + + bb.2: + %3:spr = IMPLICIT_DEF + %4:gpr = VMOVRS %3:spr, 14, %noreg + + bb.3: + %5:gpr = PHI %1, %bb.1, %4, %bb.2 + %6:spr = VMOVSR %5, 14, %noreg +... + +# CHECK-LABEL: name: func1 +# CHECK: %6:spr = PHI %0, %bb.1, %2, %bb.2 +# CHEKC: %7:spr = COPY %6 +--- +name: func1 +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %1:spr = IMPLICIT_DEF + %0:gpr = VMOVRS %1, 14, %noreg + B %bb.3 + + bb.2: + %3:spr = IMPLICIT_DEF + %2:gpr = VMOVRS %3:spr, 14, %noreg + + bb.3: + %4:gpr = PHI %0, %bb.1, %2, %bb.2 + %5:spr = VMOVSR %4, 14, %noreg +... + +# The current implementation doesn't perform any transformations if undef +# operands are involved. +# CHECK-LABEL: name: func-undefops +# CHECK: body: | +# CHECK: bb.0: +# CHECK: Bcc %bb.2, 1, undef %cpsr +# +# CHECK: bb.1: +# CHECK: %0:gpr = VMOVRS undef %1:spr, 14, %noreg +# CHECK: B %bb.3 +# +# CHECK: bb.2: +# CHECK: %2:gpr = VMOVRS undef %3:spr, 14, %noreg +# +# CHECK: bb.3: +# CHECK: %4:gpr = PHI %0, %bb.1, %2, %bb.2 +# CHECK: %5:spr = VMOVSR %4, 14, %noreg +--- +name: func-undefops +tracksRegLiveness: true +body: | + bb.0: + Bcc %bb.2, 1, undef %cpsr + + bb.1: + %0:gpr = VMOVRS undef %1:spr, 14, %noreg + B %bb.3 + + bb.2: + %2:gpr = VMOVRS undef %3:spr, 14, %noreg + + bb.3: + %4:gpr = PHI %0, %bb.1, %2, %bb.2 + %5:spr = VMOVSR %4, 14, %noreg +... diff --git a/test/CodeGen/ARM/pei-swiftself.mir b/test/CodeGen/ARM/pei-swiftself.mir index 055efeea3289..d2d3469458b7 100644 --- a/test/CodeGen/ARM/pei-swiftself.mir +++ b/test/CodeGen/ARM/pei-swiftself.mir @@ -39,7 +39,7 @@ body: | ; not just use %r10 for that. ; CHECK-NOT: STRi12 %1,{{.*}}%r10 - STRi12 %r1, %stack.0, 0, 14, _ :: (store 4) + STRi12 %r1, %stack.0, 0, 14, %noreg :: (store 4) ; use the swiftself parameter value. KILL %r10 diff --git a/test/CodeGen/ARM/pr25317.ll b/test/CodeGen/ARM/pr25317.ll index 6770c6f84ecd..679b5a0299af 100644 --- a/test/CodeGen/ARM/pr25317.ll +++ b/test/CodeGen/ARM/pr25317.ll @@ -8,4 +8,4 @@ target triple = "armv7--linux-gnueabihf" define void @f(i32* %p) { call void asm sideeffect "str lr, $0", "=*o"(i32* %p) ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/pr34045-2.ll b/test/CodeGen/ARM/pr34045-2.ll new file mode 100644 index 000000000000..94bc3ea3e4fc --- /dev/null +++ b/test/CodeGen/ARM/pr34045-2.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple thumbv7 | FileCheck %s + +define hidden void @foo(i32* %ptr, i1 zeroext %long_blocks) { +entry: +; This test is actually checking that no cycle is introduced but at least we +; want to see one umull. +; CHECK: umull + %0 = load i32, i32* %ptr, align 4 + %conv.i.i13.i = zext i32 %0 to i64 + %mul.i.i14.i = mul nuw nsw i64 %conv.i.i13.i, 18782 + %1 = load i32, i32* undef, align 4 + %conv4.i.i16.i = zext i32 %1 to i64 + %add5.i.i17.i = add nuw nsw i64 %mul.i.i14.i, %conv4.i.i16.i + %shr.i.i18.i = lshr i64 %add5.i.i17.i, 32 + %add10.i.i20.i = add nuw nsw i64 %shr.i.i18.i, %add5.i.i17.i + %conv11.i.i21.i = trunc i64 %add10.i.i20.i to i32 + %x.0.neg.i.i26.i = sub i32 -2, %conv11.i.i21.i + %sub.i.i27.i = add i32 %x.0.neg.i.i26.i, 0 + store i32 %sub.i.i27.i, i32* %ptr, align 4 + br label %while.body.i + +while.body.i: ; preds = %while.body.i, %entry + br label %while.body.i +} + diff --git a/test/CodeGen/ARM/pr34045.ll b/test/CodeGen/ARM/pr34045.ll new file mode 100644 index 000000000000..5d52bfe591b7 --- /dev/null +++ b/test/CodeGen/ARM/pr34045.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple thumbv7 | FileCheck %s + +; ModuleID = 'bugpoint-reduced-simplified.bc' +define hidden void @bn_mul_comba8(i32* nocapture %r, i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr { +entry: +; This test is actually checking that no cycle is introduced but at least we +; want to see a couple of umull and one umlal in the output +; CHECK: umull +; CHECK: umull +; CHECK: umlal + %0 = load i32, i32* %a, align 4 + %conv = zext i32 %0 to i64 + %1 = load i32, i32* %b, align 4 + %conv2 = zext i32 %1 to i64 + %mul = mul nuw i64 %conv2, %conv + %shr = lshr i64 %mul, 32 + %2 = load i32, i32* %a, align 4 + %conv13 = zext i32 %2 to i64 + %3 = load i32, i32* undef, align 4 + %conv15 = zext i32 %3 to i64 + %mul16 = mul nuw i64 %conv15, %conv13 + %add18 = add i64 %mul16, %shr + %shr20 = lshr i64 %add18, 32 + %conv21 = trunc i64 %shr20 to i32 + %4 = load i32, i32* undef, align 4 + %conv34 = zext i32 %4 to i64 + %5 = load i32, i32* %b, align 4 + %conv36 = zext i32 %5 to i64 + %mul37 = mul nuw i64 %conv36, %conv34 + %conv38 = and i64 %add18, 4294967295 + %add39 = add i64 %mul37, %conv38 + %shr41 = lshr i64 %add39, 32 + %conv42 = trunc i64 %shr41 to i32 + %add43 = add i32 %conv42, %conv21 + %cmp44 = icmp ult i32 %add43, %conv42 + %c1.1 = zext i1 %cmp44 to i32 + %add65 = add i32 0, %c1.1 + %add86 = add i32 %add65, 0 + %add107 = add i32 %add86, 0 + %conv124 = zext i32 %add107 to i64 + %add125 = add i64 0, %conv124 + %conv145 = and i64 %add125, 4294967295 + %add146 = add i64 %conv145, 0 + %conv166 = and i64 %add146, 4294967295 + %add167 = add i64 %conv166, 0 + %conv187 = and i64 %add167, 4294967295 + %add188 = add i64 %conv187, 0 + %conv189 = trunc i64 %add188 to i32 + %arrayidx200 = getelementptr inbounds i32, i32* %r, i32 3 + store i32 %conv189, i32* %arrayidx200, align 4 + ret void +} + diff --git a/test/CodeGen/ARM/pr35103.ll b/test/CodeGen/ARM/pr35103.ll new file mode 100644 index 000000000000..4f0392f45fe4 --- /dev/null +++ b/test/CodeGen/ARM/pr35103.ll @@ -0,0 +1,43 @@ +; RUN: llc -O2 -mtriple arm < %s | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define i32 @foo(i32 %vreg0, i32 %vreg1, i32 %vreg2, i32 %vreg3, i32 %vreg4) local_unnamed_addr { +entry: + %conv = zext i32 %vreg2 to i64 + %conv1 = zext i32 %vreg0 to i64 + %add2 = add nuw nsw i64 %conv, %conv1 + %shr = lshr i64 %add2, 32 + %conv4 = trunc i64 %shr to i32 + %conv5 = and i64 %add2, 4294967295 + %add8 = add nuw nsw i64 %conv5, %conv1 + %shr9 = lshr i64 %add8, 32 + %conv10 = trunc i64 %shr9 to i32 + %add11 = add nuw nsw i32 %conv10, %conv4 + %conv12 = zext i32 %vreg3 to i64 + %conv14 = zext i32 %vreg1 to i64 + %add15 = add nuw nsw i64 %conv12, %conv14 + %shr16 = lshr i64 %add15, 32 + %conv19 = zext i32 %vreg4 to i64 + %add20 = add nuw nsw i64 %shr16, %conv19 + %shr22 = lshr i64 %add20, 32 + %conv23 = trunc i64 %shr22 to i32 + %add24 = add nuw nsw i32 %add11, %conv23 + ret i32 %add24 + +; CHECK: push {r11, lr} +; CHECK-NEXT: adds r2, r2, r0 +; CHECK-NEXT: mov r12, #0 +; CHECK-NEXT: adc lr, r12, #0 +; CHECK-NEXT: adds r0, r2, r0 +; CHECK-NEXT: ldr r2, [sp, #8] +; CHECK-NEXT: adc r0, r12, #0 +; CHECK-NEXT: adds r1, r3, r1 +; The interesting bit is the next instruction which looks +; like is computing a dead r1 but is actually computing a carry +; for the final adc. +; CHECK-NEXT: adcs r1, r2, #0 +; CHECK-NEXT: adc r0, r0, lr +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr + +} diff --git a/test/CodeGen/ARM/preferred-align.ll b/test/CodeGen/ARM/preferred-align.ll index a9a17229e064..26dbb1cbd546 100644 --- a/test/CodeGen/ARM/preferred-align.ll +++ b/test/CodeGen/ARM/preferred-align.ll @@ -18,4 +18,4 @@ @var16 = global i16 zeroinitializer ; CHECK: .globl var16 -; CHECK-NEXT: .p2align 1 \ No newline at end of file +; CHECK-NEXT: .p2align 1 diff --git a/test/CodeGen/ARM/prera-ldst-aliasing.mir b/test/CodeGen/ARM/prera-ldst-aliasing.mir index ce37106ed8d2..cc3200860796 100644 --- a/test/CodeGen/ARM/prera-ldst-aliasing.mir +++ b/test/CodeGen/ARM/prera-ldst-aliasing.mir @@ -26,15 +26,15 @@ body: | %1 : gpr = COPY %r1 %0 : gpr = COPY %r0 - %2 : gpr = t2LDRi12 %1, 0, 14, _ :: (load 4 from %ir.y) - t2STRi12 killed %2, %0, 0, 14, _ :: (store 4 into %ir.x) - %3 : gpr = t2LDRi12 %1, 4, 14, _ :: (load 4 from %ir.arrayidx2) - t2STRi12 killed %3, %0, 4, 14, _ :: (store 4 into %ir.arrayidx3) + %2 : gpr = t2LDRi12 %1, 0, 14, %noreg :: (load 4 from %ir.y) + t2STRi12 killed %2, %0, 0, 14, %noreg :: (store 4 into %ir.x) + %3 : gpr = t2LDRi12 %1, 4, 14, %noreg :: (load 4 from %ir.arrayidx2) + t2STRi12 killed %3, %0, 4, 14, %noreg :: (store 4 into %ir.arrayidx3) ; CHECK: t2LDRi12 ; CHECK-NEXT: t2LDRi12 ; CHECK-NEXT: t2STRi12 ; CHECK-NEXT: t2STRi12 - tBX_RET 14, _ + tBX_RET 14, %noreg ... diff --git a/test/CodeGen/ARM/prera-ldst-insertpt.mir b/test/CodeGen/ARM/prera-ldst-insertpt.mir index eafcc7c36d33..c0202eb84faf 100644 --- a/test/CodeGen/ARM/prera-ldst-insertpt.mir +++ b/test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -28,14 +28,14 @@ body: | %2 : rgpr = COPY %r2 %1 : rgpr = COPY %r1 %0 : gpr = COPY %r0 - %3 : rgpr = t2MUL %2, %2, 14, _ - %4 : rgpr = t2MUL %1, %1, 14, _ + %3 : rgpr = t2MUL %2, %2, 14, %noreg + %4 : rgpr = t2MUL %1, %1, 14, %noreg %5 : rgpr = t2MOVi32imm -858993459 - %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ - %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _ - t2STRi12 %1, %0, 0, 14, _ :: (store 4) - %10 : rgpr = t2LSLri %2, 1, 14, _, _ - t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) + %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, %noreg + %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, %noreg + t2STRi12 %1, %0, 0, 14, %noreg :: (store 4) + %10 : rgpr = t2LSLri %2, 1, 14, %noreg, %noreg + t2STRi12 killed %10, %0, 4, 14, %noreg :: (store 4) ; Make sure we move the paired stores next to each other, and ; insert them in an appropriate location. @@ -44,17 +44,17 @@ body: | ; CHECK-NEXT: t2MOVi ; CHECK-NEXT: t2ADDrs - %11 : rgpr = t2MOVi 55, 14, _, _ - %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ - t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) - %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _ - t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) + %11 : rgpr = t2MOVi 55, 14, %noreg, %noreg + %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, %noreg, %noreg + t2STRi12 killed %12, %0, 16, 14, %noreg :: (store 4) + %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, %noreg, %noreg + t2STRi12 killed %13, %0, 20, 14, %noreg :: (store 4) ; Make sure we move the paired stores next to each other. ; CHECK: t2STRi12 killed %12, ; CHECK-NEXT: t2STRi12 killed %13, - tBX_RET 14, _ + tBX_RET 14, %noreg --- # CHECK-LABEL: name: b name: b @@ -71,11 +71,11 @@ body: | %2 : rgpr = COPY %r2 %1 : rgpr = COPY %r1 %0 : gpr = COPY %r0 - t2STRi12 %1, %0, 0, 14, _ :: (store 4) - %10 : rgpr = t2LSLri %2, 1, 14, _, _ - t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) - %3 : rgpr = t2MUL %2, %2, 14, _ - t2STRi12 %3, %0, 8, 14, _ :: (store 4) + t2STRi12 %1, %0, 0, 14, %noreg :: (store 4) + %10 : rgpr = t2LSLri %2, 1, 14, %noreg, %noreg + t2STRi12 killed %10, %0, 4, 14, %noreg :: (store 4) + %3 : rgpr = t2MUL %2, %2, 14, %noreg + t2STRi12 %3, %0, 8, 14, %noreg :: (store 4) ; Make sure we move the paired stores next to each other, and ; insert them in an appropriate location. @@ -85,21 +85,21 @@ body: | ; CHECK-NEXT: t2MUL ; CHECK-NEXT: t2MOVi32imm - %4 : rgpr = t2MUL %1, %1, 14, _ + %4 : rgpr = t2MUL %1, %1, 14, %noreg %5 : rgpr = t2MOVi32imm -858993459 - %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ - %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, _ - %10 : rgpr = t2LSLri %2, 1, 14, _, _ - %11 : rgpr = t2MOVi 55, 14, _, _ - %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ - t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) - %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, _, _ - t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) + %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, %noreg + %8 : rgpr, %9 : rgpr = t2UMULL killed %4, %5, 14, %noreg + %10 : rgpr = t2LSLri %2, 1, 14, %noreg, %noreg + %11 : rgpr = t2MOVi 55, 14, %noreg, %noreg + %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, %noreg, %noreg + t2STRi12 killed %12, %0, 16, 14, %noreg :: (store 4) + %13 : gprnopc = t2ADDrs %11, killed %9, 19, 14, %noreg, %noreg + t2STRi12 killed %13, %0, 20, 14, %noreg :: (store 4) ; Make sure we move the paired stores next to each other. ; CHECK: t2STRi12 {{.*}}, 16 ; CHECK-NEXT: t2STRi12 {{.*}}, 20 - tBX_RET 14, _ + tBX_RET 14, %noreg ... diff --git a/test/CodeGen/ARM/scavenging.mir b/test/CodeGen/ARM/scavenging.mir index dfd02fbee75c..c7fb7b3e86c7 100644 --- a/test/CodeGen/ARM/scavenging.mir +++ b/test/CodeGen/ARM/scavenging.mir @@ -25,36 +25,36 @@ body: | %r7 = IMPLICIT_DEF %0 : tgpr = IMPLICIT_DEF - %0 = tADDhirr %0, %sp, 14, _ - tSTRi %r0, %0, 0, 14, _ + %0 = tADDhirr %0, %sp, 14, %noreg + tSTRi %r0, %0, 0, 14, %noreg %1 : tgpr = IMPLICIT_DEF - %1 = tADDhirr %1, %sp, 14, _ - tSTRi %r1, %1, 0, 14, _ + %1 = tADDhirr %1, %sp, 14, %noreg + tSTRi %r1, %1, 0, 14, %noreg %2 : tgpr = IMPLICIT_DEF - %2 = tADDhirr %2, %sp, 14, _ - tSTRi %r2, %2, 0, 14, _ + %2 = tADDhirr %2, %sp, 14, %noreg + tSTRi %r2, %2, 0, 14, %noreg %3 : tgpr = IMPLICIT_DEF - %3 = tADDhirr %3, %sp, 14, _ - tSTRi %r3, %3, 0, 14, _ + %3 = tADDhirr %3, %sp, 14, %noreg + tSTRi %r3, %3, 0, 14, %noreg %4 : tgpr = IMPLICIT_DEF - %4 = tADDhirr %4, %sp, 14, _ - tSTRi %r4, %4, 0, 14, _ + %4 = tADDhirr %4, %sp, 14, %noreg + tSTRi %r4, %4, 0, 14, %noreg %5 : tgpr = IMPLICIT_DEF - %5 = tADDhirr %5, %sp, 14, _ - tSTRi %r5, %5, 0, 14, _ + %5 = tADDhirr %5, %sp, 14, %noreg + tSTRi %r5, %5, 0, 14, %noreg %6 : tgpr = IMPLICIT_DEF - %6 = tADDhirr %6, %sp, 14, _ - tSTRi %r6, %6, 0, 14, _ + %6 = tADDhirr %6, %sp, 14, %noreg + tSTRi %r6, %6, 0, 14, %noreg %7 : tgpr = IMPLICIT_DEF - %7 = tADDhirr %7, %sp, 14, _ - tSTRi %r7, %7, 0, 14, _ + %7 = tADDhirr %7, %sp, 14, %noreg + tSTRi %r7, %7, 0, 14, %noreg KILL %r0 KILL %r1 diff --git a/test/CodeGen/ARM/sched-it-debug-nodes.mir b/test/CodeGen/ARM/sched-it-debug-nodes.mir index c055508e6c7e..c09c2db7ef70 100644 --- a/test/CodeGen/ARM/sched-it-debug-nodes.mir +++ b/test/CodeGen/ARM/sched-it-debug-nodes.mir @@ -32,9 +32,9 @@ ; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or ; hopefully, triggering an assert). - ; CHECK: BUNDLE %ITSTATE - ; CHECK: * DBG_VALUE %R1, %noreg, !"u" - ; CHECK-NOT: * DBG_VALUE %R1, %noreg, !"u" + ; CHECK: BUNDLE implicit-def dead %itstate + ; CHECK: * DBG_VALUE debug-use %r1, debug-use %noreg, !"u" + ; CHECK-NOT: * DBG_VALUE killed %r1, %noreg, !"u" declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1 @@ -131,27 +131,27 @@ body: | bb.0.entry: liveins: %r0, %r1, %r2, %r3, %lr, %r7 - DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28 - DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28 - t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - %r0 = t2MOVi -1, 3, %cpsr, _, implicit undef %r0 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 + DBG_VALUE debug-use %r0, debug-use %noreg, !18, !27, debug-location !28 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + DBG_VALUE debug-use %r2, debug-use %noreg, !20, !27, debug-location !28 + DBG_VALUE debug-use %r3, debug-use %noreg, !21, !27, debug-location !28 + t2CMPri %r3, 4, 14, %noreg, implicit-def %cpsr, debug-location !31 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + %r0 = t2MOVi -1, 3, %cpsr, %noreg, implicit undef %r0 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 tBX_RET 3, %cpsr, implicit %r0, debug-location !34 - %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr + %sp = frame-setup t2STMDB_UPD %sp, 14, %noreg, killed %r7, killed %lr frame-setup CFI_INSTRUCTION def_cfa_offset 8 frame-setup CFI_INSTRUCTION offset %lr, -4 frame-setup CFI_INSTRUCTION offset %r7, -8 - DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28 - DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28 - DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28 - DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28 - %r1 = tMOVr killed %r2, 14, _, debug-location !32 - %r2 = tMOVr killed %r3, 14, _, debug-location !32 - tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32 - %r0 = t2MOVi 0, 14, _, _ - %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0 + DBG_VALUE debug-use %r0, debug-use %noreg, !18, !27, debug-location !28 + DBG_VALUE debug-use %r1, debug-use %noreg, !19, !27, debug-location !28 + DBG_VALUE debug-use %r2, debug-use %noreg, !20, !27, debug-location !28 + DBG_VALUE debug-use %r3, debug-use %noreg, !21, !27, debug-location !28 + %r1 = tMOVr killed %r2, 14, %noreg, debug-location !32 + %r2 = tMOVr killed %r3, 14, %noreg, debug-location !32 + tBL 14, %noreg, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32 + %r0 = t2MOVi 0, 14, %noreg, %noreg + %sp = t2LDMIA_RET %sp, 14, %noreg, def %r7, def %pc, implicit %r0 ... diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index e2dc5542df04..b608a200c5e1 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -3,9 +3,15 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ ; RUN: | FileCheck %s --check-prefix=ARMT2 +; RUN: llc -mtriple=thumb-eabi -mcpu=cortex-m0 %s -o - \ +; RUN: | FileCheck %s --check-prefix=THUMB1 + ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - \ ; RUN: | FileCheck %s --check-prefix=THUMB2 +; RUN: llc -mtriple=thumbv8m.base-eabi %s -o - \ +; RUN: | FileCheck %s --check-prefix=V8MBASE + define i32 @t1(i32 %c) nounwind readnone { entry: ; ARM-LABEL: t1: @@ -17,6 +23,14 @@ entry: ; ARMT2: movw [[R:r[0-1]]], #357 ; ARMT2: movwgt [[R]], #123 +; THUMB1-LABEL: t1: +; THUMB1: mov r1, r0 +; THUMB1: movs r2, #255 +; THUMB1: adds r2, #102 +; THUMB1: movs r0, #123 +; THUMB1: cmp r1, #1 +; THUMB1: bgt + ; THUMB2-LABEL: t1: ; THUMB2: movw [[R:r[0-1]]], #357 ; THUMB2: movgt [[R]], #123 @@ -37,6 +51,10 @@ entry: ; ARMT2: mov [[R:r[0-1]]], #123 ; ARMT2: movwgt [[R]], #357 +; THUMB1-LABEL: t2: +; THUMB1: cmp r{{[0-9]+}}, #1 +; THUMB1: bgt + ; THUMB2-LABEL: t2: ; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #123 ; THUMB2: movwgt [[R]], #357 @@ -56,6 +74,13 @@ entry: ; ARMT2: mov [[R:r[0-1]]], #0 ; ARMT2: movweq [[R]], #1 +; THUMB1-LABEL: t3: +; THUMB1: mov r1, r0 +; THUMB1: movs r0, #1 +; THUMB1: movs r2, #0 +; THUMB1: cmp r1, #160 +; THUMB1: beq + ; THUMB2-LABEL: t3: ; THUMB2: mov{{(s|\.w)}} [[R:r[0-1]]], #0 ; THUMB2: moveq [[R]], #1 @@ -74,6 +99,10 @@ entry: ; ARMT2: movwlt [[R0:r[0-9]+]], #65365 ; ARMT2: movtlt [[R0]], #65365 +; THUMB1-LABEL: t4: +; THUMB1: cmp r{{[0-9]+}}, r{{[0-9]+}} +; THUMB1: b{{lt|ge}} + ; THUMB2-LABEL: t4: ; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290 %0 = icmp slt i32 %a, %b @@ -90,6 +119,12 @@ entry: ; ARM-NOT: mov ; ARM: movne r0, #0 +; THUMB1-LABEL: t5: +; THUMB1: mov r1, r0 +; THUMB1: movs r0, #0 +; THUMB1: cmp r1, #1 +; THUMB1: bne + ; THUMB2-LABEL: t5: ; THUMB2-NOT: mov ; THUMB2: cmp r0, #1 @@ -107,6 +142,10 @@ entry: ; ARM: cmp r0, #0 ; ARM: movne r0, #1 +; THUMB1-LABEL: t6: +; THUMB1: cmp r{{[0-9]+}}, #0 +; THUMB1: bne + ; THUMB2-LABEL: t6: ; THUMB2-NOT: mov ; THUMB2: cmp r0, #0 @@ -116,3 +155,202 @@ entry: %lnot.ext = zext i1 %tobool to i32 ret i32 %lnot.ext } + +define i32 @t7(i32 %a, i32 %b) nounwind readnone { +entry: +; ARM-LABEL: t7: +; ARM: mov r2, #0 +; ARM: cmp r0, r1 +; ARM: movne r2, #1 +; ARM: lsl r0, r2, #2 + +; ARMT2-LABEL: t7: +; ARMT2: mov r2, #0 +; ARMT2: cmp r0, r1 +; ARMT2: movwne r2, #1 +; ARMT2: lsl r0, r2, #2 + +; THUMB1-LABEL: t7: +; THUMB1: movs r2, #1 +; THUMB1: movs r3, #0 +; THUMB1: cmp r0, r1 +; THUMB1: bne .LBB6_2 +; THUMB1: mov r2, r3 +; THUMB1: .LBB6_2: +; THUMB1: lsls r0, r2, #2 + +; THUMB2-LABEL: t7: +; THUMB2: movs r2, #0 +; THUMB2: cmp r0, r1 +; THUMB2: it ne +; THUMB2: movne r2, #1 +; THUMB2: lsls r0, r2, #2 + %0 = icmp ne i32 %a, %b + %1 = select i1 %0, i32 4, i32 0 + ret i32 %1 +} + +define void @t8(i32 %a) { +entry: + +; ARM scheduler emits icmp/zext before both calls, so isn't relevant + +; ARMT2-LABEL: t8: +; ARMT2: mov r1, r0 +; ARMT2: mov r0, #9 +; ARMT2: mov r4, #0 +; ARMT2: cmp r1, #5 +; ARMT2: movweq r4, #1 +; ARMT2: bl t7 + +; THUMB1-LABEL: t8: +; THUMB1: mov r1, r0 +; THUMB1: movs r4, #1 +; THUMB1: movs r0, #0 +; THUMB1: cmp r1, #5 +; THUMB1: beq .LBB7_2 +; THUMB1: mov r4, r0 + +; THUMB2-LABEL: t8: +; THUMB2: mov r1, r0 +; THUMB2: movs r4, #0 +; THUMB2: cmp r1, #5 +; THUMB2: it eq +; THUMB2: moveq r4, #1 + %cmp = icmp eq i32 %a, 5 + %conv = zext i1 %cmp to i32 + %call = tail call i32 @t7(i32 9, i32 %a) + tail call i32 @t7(i32 %conv, i32 %call) + ret void +} + +define void @t9(i8* %a, i8 %b) { +entry: + +; ARM scheduler emits icmp/zext before both calls, so isn't relevant + +; ARMT2-LABEL: t9: +; ARMT2: cmp r4, r4 +; ARMT2: movweq r0, #1 + +; THUMB1-LABEL: t9: +; THUMB1: cmp r4, r4 +; THUMB1: beq .LBB8_2 +; THUMB1: mov r0, r1 + +; THUMB2-LABEL: t9: +; THUMB2: cmp r4, r4 +; THUMB2: it eq +; THUMB2: moveq r0, #1 + + %0 = load i8, i8* %a + %conv = sext i8 %0 to i32 + %conv119 = zext i8 %0 to i32 + %conv522 = and i32 %conv, 255 + %cmp723 = icmp eq i32 %conv522, %conv119 + tail call void @f(i1 zeroext %cmp723) + br i1 %cmp723, label %while.body, label %while.end + +while.body: ; preds = %entry, %while.body + %ref.025 = phi i8 [ %inc9, %while.body ], [ %0, %entry ] + %in.024 = phi i32 [ %inc, %while.body ], [ %conv, %entry ] + %inc = add i32 %in.024, 1 + %inc9 = add i8 %ref.025, 1 + %conv1 = zext i8 %inc9 to i32 + %cmp = icmp slt i32 %conv1, %conv119 + %conv5 = and i32 %inc, 255 + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} + +declare void @f(i1 zeroext) + + +define i1 @t10() { +entry: + %q = alloca i32 + %p = alloca i32 + store i32 -3, i32* %q + store i32 -8, i32* %p + %0 = load i32, i32* %q + %1 = load i32, i32* %p + %div = sdiv i32 %0, %1 + %mul = mul nsw i32 %div, %1 + %rem = srem i32 %0, %1 + %add = add nsw i32 %mul, %rem + %cmp = icmp eq i32 %add, %0 + ret i1 %cmp + +; ARM-LABEL: t10: +; ARM: mov r0, #0 +; ARM: cmn r1, #3 +; ARM: moveq r0, #1 + +; ARMT2-LABEL: t10: +; ARMT2: mov r0, #0 +; ARMT2: cmn r1, #3 +; ARMT2: movweq r0, #1 + +; THUMB1-LABEL: t10: +; THUMB1: movs r0, #1 +; THUMB1: movs r1, #0 +; THUMB1: cmp r2, r5 +; THUMB1: beq .LBB9_2 +; THUMB1: mov r0, r1 + +; THUMB2-LABEL: t10: +; THUMB2: adds r0, #3 +; THUMB2: mov.w r0, #0 +; THUMB2: it eq +; THUMB2: moveq r0, #1 + +; V8MBASE-LABEL: t10: +; V8MBASE-NOT: movs r0, #0 +; V8MBASE: movs r0, #7 +} + +define i1 @t11() { +entry: + %bit = alloca i32 + %load = load i32, i32* %bit + %clear = and i32 %load, -4096 + %set = or i32 %clear, 33 + store i32 %set, i32* %bit + %load1 = load i32, i32* %bit + %clear2 = and i32 %load1, -33550337 + %set3 = or i32 %clear2, 40960 + %clear5 = and i32 %set3, 4095 + %rem = srem i32 %clear5, 10 + %clear9 = and i32 %set3, -4096 + %set10 = or i32 %clear9, %rem + store i32 %set10, i32* %bit + %clear12 = and i32 %set10, 4095 + %cmp = icmp eq i32 %clear12, 3 + ret i1 %cmp + +; ARM-LABEL: t11: +; ARM: mov r0, #0 +; ARM: cmp r1, #3 +; ARM: moveq r0, #1 + +; ARMT2-LABEL: t11: +; ARMT2: mov r0, #0 +; ARMT2: cmp r1, #3 +; ARMT2: movweq r0, #1 + +; THUMB1-LABEL: t11: +; THUMB1-NOT: movs r0, #0 +; THUMB1: movs r0, #5 + +; THUMB2-LABEL: t11: +; THUMB2: movs r0, #0 +; THUMB2: cmp r1, #3 +; THUMB2: it eq +; THUMB2: moveq r0, #1 + +; V8MBASE-LABEL: t11: +; V8MBASE-NOT: movs r0, #0 +; V8MBASE: movw r0, #40960 +} diff --git a/test/CodeGen/ARM/select_const.ll b/test/CodeGen/ARM/select_const.ll index 23de9c35a5b8..7cce0b082037 100644 --- a/test/CodeGen/ARM/select_const.ll +++ b/test/CodeGen/ARM/select_const.ll @@ -8,7 +8,7 @@ define i32 @select_0_or_1(i1 %cond) { ; CHECK-LABEL: select_0_or_1: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: bic r0, r1, r0 ; CHECK-NEXT: mov pc, lr @@ -18,7 +18,7 @@ define i32 @select_0_or_1(i1 %cond) { define i32 @select_0_or_1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_1_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 0, i32 1 @@ -27,7 +27,7 @@ define i32 @select_0_or_1_zeroext(i1 zeroext %cond) { define i32 @select_0_or_1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_1_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: bic r0, r1, r0 ; CHECK-NEXT: mov pc, lr @@ -39,7 +39,7 @@ define i32 @select_0_or_1_signext(i1 signext %cond) { define i32 @select_1_or_0(i1 %cond) { ; CHECK-LABEL: select_1_or_0: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 @@ -48,7 +48,7 @@ define i32 @select_1_or_0(i1 %cond) { define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_1_or_0_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 ret i32 %sel @@ -56,7 +56,7 @@ define i32 @select_1_or_0_zeroext(i1 zeroext %cond) { define i32 @select_1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_1_or_0_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 1, i32 0 @@ -67,7 +67,7 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: bic r0, r1, r0 ; CHECK-NEXT: rsb r0, r0, #0 @@ -78,7 +78,7 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr @@ -88,7 +88,7 @@ define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { define i32 @select_0_or_neg1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mvn r0, r0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 0, i32 -1 @@ -97,7 +97,7 @@ define i32 @select_0_or_neg1_signext(i1 signext %cond) { define i32 @select_0_or_neg1_alt(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1_alt: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: sub r0, r0, #1 ; CHECK-NEXT: mov pc, lr @@ -108,7 +108,7 @@ define i32 @select_0_or_neg1_alt(i1 %cond) { define i32 @select_0_or_neg1_alt_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_alt_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: sub r0, r0, #1 ; CHECK-NEXT: mov pc, lr %z = zext i1 %cond to i32 @@ -118,7 +118,7 @@ define i32 @select_0_or_neg1_alt_zeroext(i1 zeroext %cond) { define i32 @select_0_or_neg1_alt_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_neg1_alt_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mvn r0, r0 ; CHECK-NEXT: mov pc, lr %z = zext i1 %cond to i32 @@ -130,7 +130,7 @@ define i32 @select_0_or_neg1_alt_signext(i1 signext %cond) { define i32 @select_neg1_or_0(i1 %cond) { ; CHECK-LABEL: select_neg1_or_0: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: and r0, r0, #1 ; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr @@ -140,7 +140,7 @@ define i32 @select_neg1_or_0(i1 %cond) { define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_neg1_or_0_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: rsb r0, r0, #0 ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 -1, i32 0 @@ -149,7 +149,7 @@ define i32 @select_neg1_or_0_zeroext(i1 zeroext %cond) { define i32 @select_neg1_or_0_signext(i1 signext %cond) { ; CHECK-LABEL: select_neg1_or_0_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov pc, lr %sel = select i1 %cond, i32 -1, i32 0 ret i32 %sel @@ -159,7 +159,7 @@ define i32 @select_neg1_or_0_signext(i1 signext %cond) { define i32 @select_Cplus1_C(i1 %cond) { ; CHECK-LABEL: select_Cplus1_C: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #41 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: movne r1, #42 @@ -171,7 +171,7 @@ define i32 @select_Cplus1_C(i1 %cond) { define i32 @select_Cplus1_C_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_Cplus1_C_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #41 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movne r1, #42 @@ -183,7 +183,7 @@ define i32 @select_Cplus1_C_zeroext(i1 zeroext %cond) { define i32 @select_Cplus1_C_signext(i1 signext %cond) { ; CHECK-LABEL: select_Cplus1_C_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #41 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: movne r1, #42 @@ -197,7 +197,7 @@ define i32 @select_Cplus1_C_signext(i1 signext %cond) { define i32 @select_C_Cplus1(i1 %cond) { ; CHECK-LABEL: select_C_Cplus1: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #42 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: movne r1, #41 @@ -209,7 +209,7 @@ define i32 @select_C_Cplus1(i1 %cond) { define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_C_Cplus1_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #42 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: movne r1, #41 @@ -221,7 +221,7 @@ define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) { define i32 @select_C_Cplus1_signext(i1 signext %cond) { ; CHECK-LABEL: select_C_Cplus1_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #42 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: movne r1, #41 @@ -236,7 +236,7 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) { define i32 @select_C1_C2(i1 %cond) { ; CHECK-LABEL: select_C1_C2: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #165 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: orr r1, r1, #256 @@ -249,7 +249,7 @@ define i32 @select_C1_C2(i1 %cond) { define i32 @select_C1_C2_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_C1_C2_zeroext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #165 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: orr r1, r1, #256 @@ -262,7 +262,7 @@ define i32 @select_C1_C2_zeroext(i1 zeroext %cond) { define i32 @select_C1_C2_signext(i1 signext %cond) { ; CHECK-LABEL: select_C1_C2_signext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #165 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: orr r1, r1, #256 @@ -278,7 +278,7 @@ define i32 @select_C1_C2_signext(i1 signext %cond) { define i64 @opaque_constant1(i1 %cond, i64 %x) { ; CHECK-LABEL: opaque_constant1: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov lr, #1 @@ -310,7 +310,7 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) { define i64 @opaque_constant2(i1 %cond, i64 %x) { ; CHECK-LABEL: opaque_constant2: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, #1 ; CHECK-NEXT: tst r0, #1 ; CHECK-NEXT: orr r1, r1, #65536 diff --git a/test/CodeGen/ARM/setcc-logic.ll b/test/CodeGen/ARM/setcc-logic.ll index 79bae1facb3e..c48636dffa7f 100644 --- a/test/CodeGen/ARM/setcc-logic.ll +++ b/test/CodeGen/ARM/setcc-logic.ll @@ -3,7 +3,7 @@ define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind { ; CHECK-LABEL: ne_neg1_and_ne_zero: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: add r1, r0, #1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: cmp r1, #1 @@ -19,7 +19,7 @@ define zeroext i1 @ne_neg1_and_ne_zero(i32 %x) nounwind { define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: and_eq: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: eor r2, r2, r3 ; CHECK-NEXT: eor r0, r0, r1 ; CHECK-NEXT: orrs r0, r0, r2 @@ -34,7 +34,7 @@ define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: or_ne: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: eor r2, r2, r3 ; CHECK-NEXT: eor r0, r0, r1 ; CHECK-NEXT: orrs r0, r0, r2 @@ -48,7 +48,7 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind { ; CHECK-LABEL: and_eq_vec: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: vmov d19, r2, r3 diff --git a/test/CodeGen/ARM/shift-combine.ll b/test/CodeGen/ARM/shift-combine.ll index cfda54f6b05b..f6892f36a430 100644 --- a/test/CodeGen/ARM/shift-combine.ll +++ b/test/CodeGen/ARM/shift-combine.ll @@ -217,3 +217,60 @@ entry: ret i32 %conv } +; CHECK-LABEL: test_shift8_mask8 +; CHECK-BE: ldr r1, [r0] +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #8, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 8 + %and = and i32 %shl, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask16 +; CHECK-BE: ldr r1, [r0] +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #8, #16 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 8 + %and = and i32 %shl, 65535 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_sext_shift8_mask8 +; CHECK-BE: ldrb r0, [r0] +; CHECK-COMMON: ldrb r0, [r0, #1] +; CHECK-COMMON: str r0, [r1] +define arm_aapcscc void @test_sext_shift8_mask8(i16* %p, i32* %q) { +entry: + %0 = load i16, i16* %p, align 4 + %1 = sext i16 %0 to i32 + %shl = lshr i32 %1, 8 + %and = and i32 %shl, 255 + store i32 %and, i32* %q, align 4 + ret void +} + +; CHECK-LABEL: test_sext_shift8_mask16 +; CHECK-ARM: ldrsh r0, [r0] +; CHECK-BE: ldrsh r0, [r0] +; CHECK-THUMB: ldrsh.w r0, [r0] +; CHECK-COMMON: ubfx r0, r0, #8, #16 +; CHECK-COMMON: str r0, [r1] +define arm_aapcscc void @test_sext_shift8_mask16(i16* %p, i32* %q) { +entry: + %0 = load i16, i16* %p, align 4 + %1 = sext i16 %0 to i32 + %shl = lshr i32 %1, 8 + %and = and i32 %shl, 65535 + store i32 %and, i32* %q, align 4 + ret void +} diff --git a/test/CodeGen/ARM/single-issue-r52.mir b/test/CodeGen/ARM/single-issue-r52.mir index 1eba074dafb3..22751592ff7e 100644 --- a/test/CodeGen/ARM/single-issue-r52.mir +++ b/test/CodeGen/ARM/single-issue-r52.mir @@ -20,22 +20,22 @@ # CHECK: ********** MI Scheduling ********** # CHECK: ScheduleDAGMILive::schedule starting -# CHECK: SU(1): %vreg1 = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0 +# CHECK: SU(1): %1:qqpr = VLD4d8Pseudo %0:gpr, 8, 14, %noreg; mem:LD32[%A](align=8) # CHECK: Latency : 8 # CHECK: Single Issue : true; -# CHECK: SU(2): %vreg4 = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1 +# CHECK: SU(2): %4:dpr = VADDv8i8 %1.dsub_0:qqpr, %1.dsub_1:qqpr, 14, %noreg # CHECK: Latency : 5 # CHECK: Single Issue : false; -# CHECK: SU(3): %vreg5, %vreg6 = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4 +# CHECK: SU(3): %5:gpr, %6:gpr = VMOVRRD %4:dpr, 14, %noreg # CHECK: Latency : 4 # CHECK: Single Issue : false; -# TOPDOWN: Scheduling SU(1) %vreg1 = VLD4d8Pseudo +# TOPDOWN: Scheduling SU(1) %1:qqpr = VLD4d8Pseudo # TOPDOWN: Bump cycle to end group -# TOPDOWN: Scheduling SU(2) %vreg4 = VADDv8i8 +# TOPDOWN: Scheduling SU(2) %4:dpr = VADDv8i8 -# BOTTOMUP: Scheduling SU(2) %vreg4 = VADDv8i8 -# BOTTOMUP: Scheduling SU(1) %vreg1 = VLD4d8Pseudo +# BOTTOMUP: Scheduling SU(2) %4:dpr = VADDv8i8 +# BOTTOMUP: Scheduling SU(1) %1:qqpr = VLD4d8Pseudo # BOTTOMUP: Bump cycle to begin group ... @@ -76,11 +76,11 @@ body: | liveins: %r0 %0 = COPY %r0 - %1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8) - %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _ - %5, %6 = VMOVRRD %4, 14, _ + %1 = VLD4d8Pseudo %0, 8, 14, %noreg :: (load 32 from %ir.A, align 8) + %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, %noreg + %5, %6 = VMOVRRD %4, 14, %noreg %r0 = COPY %5 %r1 = COPY %6 - BX_RET 14, _, implicit %r0, implicit killed %r1 + BX_RET 14, %noreg, implicit %r0, implicit killed %r1 ... diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll index 4788644cf195..2e0c3bc1b862 100644 --- a/test/CodeGen/ARM/smml.ll +++ b/test/CodeGen/ARM/smml.ll @@ -5,6 +5,8 @@ ; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6 ; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2 ; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMBV6T2 +; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V4 +; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-V6T2 define i32 @Test0(i32 %a, i32 %b, i32 %c) nounwind readnone ssp { entry: diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll index 2a7a82da8f69..84bf7ac826eb 100644 --- a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll +++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll @@ -15,13 +15,13 @@ define i32 @main() #0 { entry: %title = alloca [15 x i8], align 1 %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i1 false) %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3 ret i32 0 } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 ; Function Attrs: nounwind optsize declare i32 @printf(i8* nocapture readonly, ...) #2 diff --git a/test/CodeGen/ARM/stack-size-section.ll b/test/CodeGen/ARM/stack-size-section.ll new file mode 100644 index 000000000000..851433468b17 --- /dev/null +++ b/test/CodeGen/ARM/stack-size-section.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -mtriple=armv7-linux -stack-size-section | FileCheck %s + +; CHECK-LABEL: func1: +; CHECK: .section .stack_sizes,"",%progbits +; CHECK-NEXT: .long func1 +; CHECK-NEXT: .byte 8 +define void @func1(i32, i32) #0 { + alloca i32, align 4 + alloca i32, align 4 + ret void +} + +; CHECK-LABEL: func2: +; CHECK: .section .stack_sizes,"",%progbits +; CHECK-NEXT: .long func2 +; CHECK-NEXT: .byte 16 +define void @func2() #0 { + alloca i32, align 4 + call void @func1(i32 1, i32 2) + ret void +} + +; CHECK-LABEL: dynalloc: +; CHECK-NOT: .section .stack_sizes +define void @dynalloc(i32 %N) #0 { + alloca i32, i32 %N + ret void +} + +attributes #0 = { "no-frame-pointer-elim"="true" } diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll index b3ed5de857be..c6509cfe9cf8 100644 --- a/test/CodeGen/ARM/struct-byval-frame-index.ll +++ b/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -60,10 +60,10 @@ target triple = "armv7l-unknown-linux-gnueabihf" @brefframe = external global [4 x [4 x i8]], align 1 ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) #0 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 ; Function Attrs: nounwind declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1 @@ -122,10 +122,10 @@ for.cond210.preheader: ; preds = %if.then169 unreachable if.end230: ; preds = %if.end164 - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 bitcast ([4 x i32]* @b8mode to i8*), i32 16, i1 false) %b8pdir = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15 %3 = bitcast [4 x i32]* %b8pdir to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i1 false) br i1 undef, label %if.end236, label %if.then233 if.then233: ; preds = %if.end230 diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll index d5abfc0af51b..1b406103d118 100644 --- a/test/CodeGen/ARM/subreg-remat.ll +++ b/test/CodeGen/ARM/subreg-remat.ll @@ -4,10 +4,10 @@ target triple = "thumbv7-apple-ios" ; ; The vector %v2 is built like this: ; -; %vreg6:ssub_1 = ... -; %vreg6:ssub_0 = VLDRS , 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6 +; %6:ssub_1 = ... +; %6:ssub_0 = VLDRS %const.0, 0, 14, %noreg; mem:LD4[ConstantPool] DPR_VFP2:%6 ; -; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized +; When %6 spills, the VLDRS constant pool load cannot be rematerialized ; since it implicitly reads the ssub_1 sub-register. ; ; CHECK: f1 @@ -31,7 +31,7 @@ define void @f1(float %x, <2 x float>* %p) { ; because the bits are undef, we should rematerialize. The vector is now built ; like this: ; -; %vreg2:ssub_0 = VLDRS , 0, pred:14, pred:%noreg, %vreg2; mem:LD4[ConstantPool] +; %2:ssub_0 = VLDRS %const.0, 0, 14, %noreg, implicit-def %2; mem:LD4[ConstantPool] ; ; The extra operand indicates that the instruction fully defines the ; virtual register. It doesn't read the old value. diff --git a/test/CodeGen/ARM/tail-dup-bundle.mir b/test/CodeGen/ARM/tail-dup-bundle.mir index 67c1cb5a6b94..719d616f26bc 100644 --- a/test/CodeGen/ARM/tail-dup-bundle.mir +++ b/test/CodeGen/ARM/tail-dup-bundle.mir @@ -19,7 +19,7 @@ body: | bb.1: liveins: %r0 - t2CMPri %r0, 32, 14, _, implicit-def %cpsr + t2CMPri %r0, 32, 14, %noreg, implicit-def %cpsr BUNDLE implicit-def dead %itstate, implicit-def %cpsr, implicit killed %r0, implicit killed %cpsr { t2IT 1, 24, implicit-def %itstate t2CMPri killed %r0, 9, 1, killed %cpsr, implicit-def %cpsr, implicit internal killed %itstate @@ -28,9 +28,9 @@ body: | bb.2: %r0 = IMPLICIT_DEF - t2B %bb.1, 14, _ + t2B %bb.1, 14, %noreg bb.3: %r0 = IMPLICIT_DEF - t2B %bb.1, 14, _ + t2B %bb.1, 14, %noreg ... diff --git a/test/CodeGen/ARM/tail-merge-branch-weight.ll b/test/CodeGen/ARM/tail-merge-branch-weight.ll index f83f28815793..f03906b6bf51 100644 --- a/test/CodeGen/ARM/tail-merge-branch-weight.ll +++ b/test/CodeGen/ARM/tail-merge-branch-weight.ll @@ -9,9 +9,9 @@ ; = 0.2 * 0.4 + 0.8 * 0.7 = 0.64 ; CHECK: # Machine code for function test0: -; CHECK: Successors according to CFG: BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}20.00%) BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}80.00%) -; CHECK: BB#{{[0-9]+}}: -; CHECK: BB#{{[0-9]+}}: +; CHECK: Successors according to CFG: %bb.{{[0-9]+}}({{[0-9a-fx/= ]+}}20.00%) %bb.{{[0-9]+}}({{[0-9a-fx/= ]+}}80.00%) +; CHECK: %bb.{{[0-9]+}}: +; CHECK: %bb.{{[0-9]+}}: ; CHECK: # End machine code for function test0. define i32 @test0(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture %b) { diff --git a/test/CodeGen/ARM/tailcall-mem-intrinsics.ll b/test/CodeGen/ARM/tailcall-mem-intrinsics.ll index 6744efa8ab89..08370f2bf12a 100644 --- a/test/CodeGen/ARM/tailcall-mem-intrinsics.ll +++ b/test/CodeGen/ARM/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: bl __aeabi_memcpy define i8* @tail_memcpy_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -12,7 +12,7 @@ entry: ; CHECK: bl __aeabi_memmove define i8* @tail_memmove_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -20,12 +20,12 @@ entry: ; CHECK: bl __aeabi_memset define i8* @tail_memset_ret(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret i8* %p } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll index 6f8d245e74a0..5b7ba0ae51b0 100644 --- a/test/CodeGen/ARM/taildup-branch-weight.ll +++ b/test/CodeGen/ARM/taildup-branch-weight.ll @@ -3,7 +3,7 @@ ; RUN: | FileCheck %s ; CHECK: Machine code for function test0: -; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}3.12%) BB#2({{[0-9a-fx/= ]+}}96.88%) +; CHECK: Successors according to CFG: %bb.1({{[0-9a-fx/= ]+}}3.12%) %bb.2({{[0-9a-fx/= ]+}}96.88%) define void @test0(i32 %a, i32 %b, i32* %c, i32* %d) { entry: @@ -30,7 +30,7 @@ B4: !0 = !{!"branch_weights", i32 4, i32 124} ; CHECK: Machine code for function test1: -; CHECK: Successors according to CFG: BB#2(0x7c000000 / 0x80000000 = 96.88%) BB#1(0x04000000 / 0x80000000 = 3.12%) +; CHECK: Successors according to CFG: %bb.2(0x7c000000 / 0x80000000 = 96.88%) %bb.1(0x04000000 / 0x80000000 = 3.12%) @g0 = common global i32 0, align 4 diff --git a/test/CodeGen/ARM/thumb-litpool.ll b/test/CodeGen/ARM/thumb-litpool.ll index f68fdb6fdc0f..bd8829c22bce 100644 --- a/test/CodeGen/ARM/thumb-litpool.ll +++ b/test/CodeGen/ARM/thumb-litpool.ll @@ -12,4 +12,4 @@ define void @foo() minsize { call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7}"() call void @callee(i8* @var) ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/usat.ll b/test/CodeGen/ARM/usat.ll new file mode 100644 index 000000000000..8f19d11ef7bb --- /dev/null +++ b/test/CodeGen/ARM/usat.ll @@ -0,0 +1,214 @@ +; RUN: llc -mtriple=armv4t-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V4T +; RUN: llc -mtriple=armv6-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6 +; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=V6T2 + +; Check for several conditions that should result in USAT. +; For example, the base test is equivalent to +; x < 0 ? 0 : (x > k ? k : x) in C. All patterns that bound x +; to the interval [0, k] where k + 1 is a power of 2 can be +; transformed into USAT. At the end there are some tests +; checking that conditionals are not transformed if they don't +; match the right pattern. + +; +; Base tests with different bit widths +; + +; x < 0 ? 0 : (x > k ? k : x) +; 32-bit base test +define i32 @unsigned_sat_base_32bit(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_32bit: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i32 %x, 0 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + ret i32 %saturateLow +} + +; x < 0 ? 0 : (x > k ? k : x) +; 16-bit base test +define i16 @unsigned_sat_base_16bit(i16 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_16bit: +; V6: usat r0, #11, r0 +; V6T2: usat r0, #11, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i16 %x, 0 + %cmpUp = icmp sgt i16 %x, 2047 + %saturateUp = select i1 %cmpUp, i16 2047, i16 %x + %saturateLow = select i1 %cmpLow, i16 0, i16 %saturateUp + ret i16 %saturateLow +} + +; x < 0 ? 0 : (x > k ? k : x) +; 8-bit base test +define i8 @unsigned_sat_base_8bit(i8 %x) #0 { +; CHECK-LABEL: unsigned_sat_base_8bit: +; V6: usat r0, #5, r0 +; V6T2: usat r0, #5, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i8 %x, 0 + %cmpUp = icmp sgt i8 %x, 31 + %saturateUp = select i1 %cmpUp, i8 31, i8 %x + %saturateLow = select i1 %cmpLow, i8 0, i8 %saturateUp + ret i8 %saturateLow +} + +; +; Tests where the conditionals that check for upper and lower bounds, +; or the < and > operators, are arranged in different ways. Only some +; of the possible combinations that lead to USAT are tested. +; +; x < 0 ? 0 : (x < k ? x : k) +define i32 @unsigned_sat_lower_upper_1(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_lower_upper_1: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp slt i32 %x, 0 + %cmpUp = icmp slt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 %x, i32 8388607 + %saturateLow = select i1 %cmpLow, i32 0, i32 %saturateUp + ret i32 %saturateLow +} + +; x > 0 ? (x > k ? k : x) : 0 +define i32 @unsigned_sat_lower_upper_2(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_lower_upper_2: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpLow = icmp sgt i32 %x, 0 + %cmpUp = icmp sgt i32 %x, 8388607 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %x + %saturateLow = select i1 %cmpLow, i32 %saturateUp, i32 0 + ret i32 %saturateLow +} + +; x < k ? (x < 0 ? 0 : x) : k +define i32 @unsigned_sat_upper_lower_1(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_1: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 %saturateLow, i32 8388607 + ret i32 %saturateUp +} + +; x > k ? k : (x < 0 ? 0 : x) +define i32 @unsigned_sat_upper_lower_2(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_2: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; k < x ? k : (x > 0 ? x : 0) +define i32 @unsigned_sat_upper_lower_3(i32 %x) #0 { +; CHECK-LABEL: unsigned_sat_upper_lower_3: +; V6: usat r0, #23, r0 +; V6T2: usat r0, #23, r0 +; V4T-NOT: usat +entry: + %cmpUp = icmp slt i32 8388607, %x + %cmpLow = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 %x, i32 0 + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; +; The following tests check for patterns that should not transform +; into USAT but are similar enough that could confuse the selector. +; +; x > k ? k : (x > 0 ? 0 : x) +; First condition upper-saturates, second doesn't lower-saturate. +define i32 @no_unsigned_sat_missing_lower(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_missing_lower +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp sgt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; x < k ? k : (x < 0 ? 0 : x) +; Second condition lower-saturates, first doesn't upper-saturate. +define i32 @no_unsigned_sat_missing_upper(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_missing_upper: +; CHECK-NOT: usat +entry: + %cmpUp = icmp slt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; Lower constant is different in the select and in the compare +define i32 @no_unsigned_sat_incorrect_constant(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_constant: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 -1, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The interval is not [0, k] +define i32 @no_unsigned_sat_incorrect_interval(i32 %x) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_interval: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, -4 + %saturateLow = select i1 %cmpLow, i32 -4, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; The returned value (y) is not the same as the tested value (x). +define i32 @no_unsigned_sat_incorrect_return(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_return: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %x, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %y + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} + +; One of the values in a compare (y) is not the same as the rest +; of the compare and select values (x). +define i32 @no_unsigned_sat_incorrect_compare(i32 %x, i32 %y) #0 { +; CHECK-LABEL: no_unsigned_sat_incorrect_compare: +; CHECK-NOT: usat +entry: + %cmpUp = icmp sgt i32 %x, 8388607 + %cmpLow = icmp slt i32 %y, 0 + %saturateLow = select i1 %cmpLow, i32 0, i32 %x + %saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow + ret i32 %saturateUp +} diff --git a/test/CodeGen/ARM/v6-jumptable-clobber.mir b/test/CodeGen/ARM/v6-jumptable-clobber.mir index ba25ac2cfe46..52a39ffc5e1d 100644 --- a/test/CodeGen/ARM/v6-jumptable-clobber.mir +++ b/test/CodeGen/ARM/v6-jumptable-clobber.mir @@ -231,21 +231,21 @@ body: | successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c) liveins: %r0, %r1 - %r2 = tLDRpci %const.0, 14, _ - tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr) + %r2 = tLDRpci %const.0, 14, %noreg + tSTRi killed %r2, killed %r1, 0, 14, %noreg :: (store 4 into %ir.addr) dead %r1 = SPACE 980, undef %r0 - %r0 = tUXTB killed %r0, 14, _ - %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _ - tCMPi8 %r1, 25, 14, _, implicit-def %cpsr + %r0 = tUXTB killed %r0, 14, %noreg + %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, %noreg + tCMPi8 %r1, 25, 14, %noreg, implicit-def %cpsr tBcc %bb.2.d1, 8, killed %cpsr bb.1 (%ir-block.0): successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2) liveins: %r1 - %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _ - %r1 = tLEApcrelJT %jump-table.0, 14, _ - %r0 = tLDRr killed %r1, killed %r0, 14, _ :: (load 4 from jump-table) + %r0, dead %cpsr = tLSLri killed %r1, 2, 14, %noreg + %r1 = tLEApcrelJT %jump-table.0, 14, %noreg + %r0 = tLDRr killed %r1, killed %r0, 14, %noreg :: (load 4 from jump-table) tBR_JTr killed %r0, %jump-table.0 bb.3.d2: @@ -329,20 +329,20 @@ body: | successors: %bb.2.d1(0x03c3c3c4), %bb.1(0x7c3c3c3c) liveins: %r0, %r1 - %r2 = tLDRpci %const.0, 14, _ - tSTRi killed %r2, killed %r1, 0, 14, _ :: (store 4 into %ir.addr) - %r0 = tUXTB killed %r0, 14, _ - %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, _ - tCMPi8 %r1, 25, 14, _, implicit-def %cpsr + %r2 = tLDRpci %const.0, 14, %noreg + tSTRi killed %r2, killed %r1, 0, 14, %noreg :: (store 4 into %ir.addr) + %r0 = tUXTB killed %r0, 14, %noreg + %r1, dead %cpsr = tSUBi3 killed %r0, 1, 14, %noreg + tCMPi8 %r1, 25, 14, %noreg, implicit-def %cpsr tBcc %bb.2.d1, 8, killed %cpsr bb.1 (%ir-block.0): successors: %bb.3.d2(0x07c549d2), %bb.9.d8(0x07c549d2), %bb.4.d3(0x07c549d2), %bb.5.d4(0x07c549d2), %bb.6.d5(0x07c549d2), %bb.7.d6(0x07c549d2), %bb.8.d7(0x07c549d2), %bb.10.d9(0x07c549d2), %bb.11.d10(0x07c549d2), %bb.2.d1(0x03ab62db), %bb.12.d11(0x07c549d2), %bb.13.d12(0x07c549d2), %bb.14.d13(0x07c549d2), %bb.15.d14(0x07c549d2), %bb.16.d15(0x07c549d2), %bb.17.d16(0x07c549d2), %bb.18.d17(0x07c549d2) liveins: %r1 - %r0, dead %cpsr = tLSLri killed %r1, 2, 14, _ - %r1 = tLEApcrelJT %jump-table.0, 14, _ - %r0 = tLDRr killed %r1, killed %r0, 14, _ :: (load 4 from jump-table) + %r0, dead %cpsr = tLSLri killed %r1, 2, 14, %noreg + %r1 = tLEApcrelJT %jump-table.0, 14, %noreg + %r0 = tLDRr killed %r1, killed %r0, 14, %noreg :: (load 4 from jump-table) tBR_JTr killed %r0, %jump-table.0 bb.3.d2: diff --git a/test/CodeGen/ARM/v8m-tail-call.ll b/test/CodeGen/ARM/v8m-tail-call.ll index c369df0c0222..96438dc5e647 100644 --- a/test/CodeGen/ARM/v8m-tail-call.ll +++ b/test/CodeGen/ARM/v8m-tail-call.ll @@ -45,3 +45,61 @@ define hidden i32 @f2(i32, i32, i32, i32, i32) { ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: b h2 } + +; Make sure that tail calls to function pointers that require r0-r3 for argument +; passing do not break the compiler. +@fnptr = global i32 (i32, i32, i32, i32)* null +define i32 @test3() { +; CHECK-LABEL: test3: +; CHECK: blx {{r[0-9]+}} + %1 = load i32 (i32, i32, i32, i32)*, i32 (i32, i32, i32, i32)** @fnptr + %2 = tail call i32 %1(i32 1, i32 2, i32 3, i32 4) + ret i32 %2 +} + +@fnptr2 = global i32 (i32, i32, i64)* null +define i32 @test4() { +; CHECK-LABEL: test4: +; CHECK: blx {{r[0-9]+}} + %1 = load i32 (i32, i32, i64)*, i32 (i32, i32, i64)** @fnptr2 + %2 = tail call i32 %1(i32 1, i32 2, i64 3) + ret i32 %2 +} + +; Check that tail calls to function pointers where not all of r0-r3 are used for +; parameter passing are tail-call optimized. +; test5: params in r0, r1. r2 & r3 are free. +@fnptr3 = global i32 (i32, i32)* null +define i32 @test5() { +; CHECK-LABEL: test5: +; CHECK: ldr [[REG:r[0-9]+]] +; CHECK: bx [[REG]] +; CHECK-NOT: blx [[REG]] + %1 = load i32 (i32, i32)*, i32 (i32, i32)** @fnptr3 + %2 = tail call i32 %1(i32 1, i32 2) + ret i32 %2 +} + +; test6: params in r0 and r2-r3. r1 is free. +@fnptr4 = global i32 (i32, i64)* null +define i32 @test6() { +; CHECK-LABEL: test6: +; CHECK: ldr [[REG:r[0-9]+]] +; CHECK: bx [[REG]] +; CHECK-NOT: blx [[REG]] + %1 = load i32 (i32, i64)*, i32 (i32, i64)** @fnptr4 + %2 = tail call i32 %1(i32 1, i64 2) + ret i32 %2 +} + +; Check that tail calls to functions other than function pointers are +; tail-call optimized. +define i32 @test7() { +; CHECK-LABEL: test7: +; CHECK: b bar +; CHECK-NOT: bl bar + %tail = tail call i32 @bar(i32 1, i32 2, i32 3, i32 4) + ret i32 %tail +} + +declare i32 @bar(i32, i32, i32, i32) diff --git a/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll index 673e04687a10..73189fe69dbe 100644 --- a/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll +++ b/test/CodeGen/ARM/v8m.base-jumptable_alignment.ll @@ -30,7 +30,7 @@ for.cond7.preheader.i.us.i.i: ; preds = %for.cond7.preheader unreachable for.cond14.preheader.us.i.i.i: ; preds = %for.inc459.us.i.i.i, %for.cond7.preheader.i.i.preheader.i -; CHECK: @ BB#4 +; CHECK: @ %bb.4 ; CHECK-NEXT: .p2align 2 switch i4 undef, label %func_1.exit.loopexit [ i4 0, label %for.inc459.us.i.i.i diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index 0a7f7698fa88..2997750ccb1a 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -3,7 +3,7 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: v_andi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vand d16, d17, d16 @@ -17,7 +17,7 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: v_andi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vand d16, d17, d16 @@ -31,7 +31,7 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: v_andi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vand d16, d17, d16 @@ -45,7 +45,7 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ; CHECK-LABEL: v_andi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vand d16, d17, d16 @@ -59,7 +59,7 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: v_andQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vand q8, q9, q8 @@ -74,7 +74,7 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: v_andQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vand q8, q9, q8 @@ -89,7 +89,7 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: v_andQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vand q8, q9, q8 @@ -104,7 +104,7 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; CHECK-LABEL: v_andQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vand q8, q9, q8 @@ -119,7 +119,7 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: v_bici8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vbic d16, d17, d16 @@ -134,7 +134,7 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: v_bici16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vbic d16, d17, d16 @@ -149,7 +149,7 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: v_bici32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vbic d16, d17, d16 @@ -164,7 +164,7 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ; CHECK-LABEL: v_bici64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vbic d16, d17, d16 @@ -179,7 +179,7 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: v_bicQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vbic q8, q9, q8 @@ -195,7 +195,7 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: v_bicQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vbic q8, q9, q8 @@ -211,7 +211,7 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: v_bicQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vbic q8, q9, q8 @@ -227,7 +227,7 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; CHECK-LABEL: v_bicQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vbic q8, q9, q8 @@ -243,7 +243,7 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: v_eori8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: veor d16, d17, d16 @@ -257,7 +257,7 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: v_eori16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: veor d16, d17, d16 @@ -271,7 +271,7 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: v_eori32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: veor d16, d17, d16 @@ -285,7 +285,7 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ; CHECK-LABEL: v_eori64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: veor d16, d17, d16 @@ -299,7 +299,7 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: v_eorQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: veor q8, q9, q8 @@ -314,7 +314,7 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: v_eorQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: veor q8, q9, q8 @@ -329,7 +329,7 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: v_eorQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: veor q8, q9, q8 @@ -344,7 +344,7 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; CHECK-LABEL: v_eorQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: veor q8, q9, q8 @@ -359,7 +359,7 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_mvni8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -371,7 +371,7 @@ define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { ; CHECK-LABEL: v_mvni16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -383,7 +383,7 @@ define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: v_mvni32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -395,7 +395,7 @@ define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { ; CHECK-LABEL: v_mvni64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -407,7 +407,7 @@ define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { ; CHECK-LABEL: v_mvnQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmvn q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -420,7 +420,7 @@ define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { ; CHECK-LABEL: v_mvnQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmvn q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -433,7 +433,7 @@ define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: v_mvnQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmvn q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -446,7 +446,7 @@ define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { ; CHECK-LABEL: v_mvnQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmvn q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -459,7 +459,7 @@ define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: v_orri8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorr d16, d17, d16 @@ -473,7 +473,7 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: v_orri16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorr d16, d17, d16 @@ -487,7 +487,7 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: v_orri32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorr d16, d17, d16 @@ -501,7 +501,7 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ; CHECK-LABEL: v_orri64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorr d16, d17, d16 @@ -515,7 +515,7 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: v_orrQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorr q8, q9, q8 @@ -530,7 +530,7 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: v_orrQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorr q8, q9, q8 @@ -545,7 +545,7 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: v_orrQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorr q8, q9, q8 @@ -560,7 +560,7 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; CHECK-LABEL: v_orrQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorr q8, q9, q8 @@ -575,7 +575,7 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: v_orni8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorn d16, d17, d16 @@ -590,7 +590,7 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: v_orni16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorn d16, d17, d16 @@ -605,7 +605,7 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: v_orni32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorn d16, d17, d16 @@ -620,7 +620,7 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ; CHECK-LABEL: v_orni64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vorn d16, d17, d16 @@ -635,7 +635,7 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: v_ornQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorn q8, q9, q8 @@ -651,7 +651,7 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: v_ornQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorn q8, q9, q8 @@ -667,7 +667,7 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: v_ornQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorn q8, q9, q8 @@ -683,7 +683,7 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; CHECK-LABEL: v_ornQi64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vorn q8, q9, q8 @@ -699,7 +699,7 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vtsti8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtst.8 d16, d17, d16 @@ -715,7 +715,7 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vtsti16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtst.16 d16, d17, d16 @@ -731,7 +731,7 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: vtsti32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtst.32 d16, d17, d16 @@ -747,7 +747,7 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vtstQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtst.8 q8, q9, q8 @@ -764,7 +764,7 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vtstQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtst.16 q8, q9, q8 @@ -781,7 +781,7 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vtstQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtst.32 q8, q9, q8 @@ -798,7 +798,7 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_orrimm: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vorr.i32 d16, #0x1000000 ; CHECK-NEXT: vmov r0, r1, d16 @@ -810,7 +810,7 @@ define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { ; CHECK-LABEL: v_orrimmQ: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vorr.i32 q8, #0x1000000 ; CHECK-NEXT: vmov r0, r1, d16 @@ -823,7 +823,7 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimm: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vbic.i32 d16, #0xff000000 ; CHECK-NEXT: vmov r0, r1, d16 @@ -835,7 +835,7 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimmQ: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vbic.i32 q8, #0xff000000 ; CHECK-NEXT: vmov r0, r1, d16 @@ -848,7 +848,7 @@ define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind { ; CHECK-LABEL: hidden_not_v4i32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d19, r2, r3 ; CHECK-NEXT: vmov.i32 q8, #0x6 ; CHECK-NEXT: vmov d18, r0, r1 diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index 5f470d60707c..7052607bf80f 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -3,7 +3,7 @@ define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { ; CHECK-LABEL: vcvt_f32tos32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.s32.f32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -15,7 +15,7 @@ define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { ; CHECK-LABEL: vcvt_f32tou32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.u32.f32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -27,7 +27,7 @@ define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vcvt_s32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.f32.s32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -39,7 +39,7 @@ define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vcvt_u32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.f32.u32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -51,7 +51,7 @@ define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { ; CHECK-LABEL: vcvtQ_f32tos32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.s32.f32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -64,7 +64,7 @@ define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { ; CHECK-LABEL: vcvtQ_f32tou32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.u32.f32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -77,7 +77,7 @@ define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vcvtQ_s32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.f32.s32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -90,7 +90,7 @@ define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vcvtQ_u32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.f32.u32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -103,7 +103,7 @@ define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { ; CHECK-LABEL: vcvt_n_f32tos32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.s32.f32 d16, d16, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -115,7 +115,7 @@ define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { ; CHECK-LABEL: vcvt_n_f32tou32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.u32.f32 d16, d16, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -127,7 +127,7 @@ define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vcvt_n_s32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.f32.s32 d16, d16, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -139,7 +139,7 @@ define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vcvt_n_u32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.f32.u32 d16, d16, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -156,7 +156,7 @@ declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwi define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { ; CHECK-LABEL: vcvtQ_n_f32tos32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.s32.f32 q8, q8, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -169,7 +169,7 @@ define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { ; CHECK-LABEL: vcvtQ_n_f32tou32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.u32.f32 q8, q8, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -182,7 +182,7 @@ define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vcvtQ_n_s32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.f32.s32 q8, q8, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -195,7 +195,7 @@ define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vcvtQ_n_u32tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.f32.u32 q8, q8, #1 ; CHECK-NEXT: vmov r0, r1, d16 @@ -213,7 +213,7 @@ declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwi define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind { ; CHECK-LABEL: vcvt_f16tof32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vcvt.f32.f16 q8, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -226,7 +226,7 @@ define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind { define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { ; CHECK-LABEL: vcvt_f32tof16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vcvt.f16.f32 d16, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -242,7 +242,7 @@ declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone define <4 x i16> @fix_float_to_i16(<4 x float> %in) { ; CHECK-LABEL: fix_float_to_i16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f32 q8, q8, #1 @@ -257,7 +257,7 @@ define <4 x i16> @fix_float_to_i16(<4 x float> %in) { define <2 x i64> @fix_float_to_i64(<2 x float> %in) { ; CHECK-LABEL: fix_float_to_i64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} @@ -287,7 +287,7 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) { define <4 x i16> @fix_double_to_i16(<4 x double> %in) { ; CHECK-LABEL: fix_double_to_i16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d18, r0, r1 ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] @@ -319,7 +319,7 @@ define <4 x i16> @fix_double_to_i16(<4 x double> %in) { define <2 x i64> @fix_double_to_i64(<2 x double> %in) { ; CHECK-LABEL: fix_double_to_i64: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} @@ -352,7 +352,7 @@ define <2 x i64> @fix_double_to_i64(<2 x double> %in) { define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK-LABEL: multi_sint: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.s32.f64 s0, d16 ; CHECK-NEXT: vstr s0, [r2] @@ -369,7 +369,7 @@ define i32 @multi_sint(double %c, i32* nocapture %p, i32* nocapture %q) { define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) { ; CHECK-LABEL: multi_uint: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f64 s0, d16 ; CHECK-NEXT: vstr s0, [r2] @@ -386,7 +386,7 @@ define i32 @multi_uint(double %c, i32* nocapture %p, i32* nocapture %q) { define void @double_to_sint_store(double %c, i32* nocapture %p) { ; CHECK-LABEL: double_to_sint_store: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.s32.f64 s0, d16 ; CHECK-NEXT: vstr s0, [r2] @@ -398,7 +398,7 @@ define void @double_to_sint_store(double %c, i32* nocapture %p) { define void @double_to_uint_store(double %c, i32* nocapture %p) { ; CHECK-LABEL: double_to_uint_store: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vcvt.u32.f64 s0, d16 ; CHECK-NEXT: vstr s0, [r2] @@ -410,7 +410,7 @@ define void @double_to_uint_store(double %c, i32* nocapture %p) { define void @float_to_sint_store(float %c, i32* nocapture %p) { ; CHECK-LABEL: float_to_sint_store: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.s32.f32 s0, s0 ; CHECK-NEXT: vstr s0, [r1] @@ -422,7 +422,7 @@ define void @float_to_sint_store(float %c, i32* nocapture %p) { define void @float_to_uint_store(float %c, i32* nocapture %p) { ; CHECK-LABEL: float_to_uint_store: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-NEXT: vstr s0, [r1] diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll index 11bed5a1a474..326c5f761a98 100644 --- a/test/CodeGen/ARM/vcvt_combine.ll +++ b/test/CodeGen/ARM/vcvt_combine.ll @@ -69,4 +69,4 @@ define <3 x i32> @test_illegal_fp_to_int(<3 x float> %in) { %scale = fmul <3 x float> %in, %val = fptosi <3 x float> %scale to <3 x i32> ret <3 x i32> %val -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll index 4a6c36b42772..d88fe31a59d2 100644 --- a/test/CodeGen/ARM/vdiv_combine.ll +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -160,4 +160,4 @@ define <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) { %conv = sitofp <3 x i32> %in to <3 x float> %res = fdiv <3 x float> %conv, ret <3 x float> %res -} \ No newline at end of file +} diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 5b524145be76..397680c5b0cf 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -3,7 +3,7 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextd: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vext.8 d16, d17, d16, #3 @@ -17,7 +17,7 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextRd: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vext.8 d16, d17, d16, #5 @@ -31,7 +31,7 @@ define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextq: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vext.8 q8, q9, q8, #3 @@ -46,7 +46,7 @@ define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextRq: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vext.8 q8, q9, q8, #7 @@ -61,7 +61,7 @@ define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: test_vextd16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vext.16 d16, d17, d16, #3 @@ -75,7 +75,7 @@ define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: test_vextq32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vext.32 q8, q9, q8, #3 @@ -92,7 +92,7 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextd_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vext.8 d16, d17, d16, #3 @@ -106,7 +106,7 @@ define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: test_vextRq_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vext.8 q8, q9, q8, #7 @@ -121,7 +121,7 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <16 x i8> @test_vextq_undef_op2(<16 x i8> %a) nounwind { ; CHECK-LABEL: test_vextq_undef_op2: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vext.8 q8, q8, q8, #2 @@ -135,7 +135,7 @@ entry: define <8 x i8> @test_vextd_undef_op2(<8 x i8> %a) nounwind { ; CHECK-LABEL: test_vextd_undef_op2: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vext.8 d16, d16, d16, #2 ; CHECK-NEXT: vmov r0, r1, d16 @@ -148,7 +148,7 @@ entry: define <16 x i8> @test_vextq_undef_op2_undef(<16 x i8> %a) nounwind { ; CHECK-LABEL: test_vextq_undef_op2_undef: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vext.8 q8, q8, q8, #2 @@ -162,7 +162,7 @@ entry: define <8 x i8> @test_vextd_undef_op2_undef(<8 x i8> %a) nounwind { ; CHECK-LABEL: test_vextd_undef_op2_undef: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vext.8 d16, d16, d16, #2 ; CHECK-NEXT: vmov r0, r1, d16 @@ -180,7 +180,7 @@ entry: ; Essence: a vext is used on %A and something saner than stack load/store for final result. define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: test_interleaved: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vext.16 d16, d16, d17, #3 ; CHECK-NEXT: vorr d17, d16, d16 @@ -198,7 +198,7 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; An undef in the shuffle list should still be optimizable define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: test_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0, #8] ; CHECK-NEXT: vzip.16 d17, d16 @@ -215,7 +215,7 @@ define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; Try to look for fallback to by-element inserts. define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind { ; CHECK-LABEL: test_multisource: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: add r2, r0, #48 ; CHECK-NEXT: add r0, r0, #32 @@ -240,7 +240,7 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind { ; Again, test for fallback to by-element inserts. define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ; CHECK-LABEL: test_largespan: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vorr d18, d16, d16 ; CHECK-NEXT: vuzp.16 d18, d17 @@ -258,7 +258,7 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ; really important.) define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: test_illegal: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vorr d22, d16, d16 ; CHECK-NEXT: vmov.u16 r0, d16[0] @@ -287,7 +287,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; Make sure this doesn't crash define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind { ; CHECK-LABEL: test_elem_mismatch: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0:128] ; CHECK-NEXT: vmov.32 r0, d16[0] ; CHECK-NEXT: vmov.32 r2, d17[0] @@ -309,7 +309,7 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16> define <4 x i32> @test_reverse_and_extract(<2 x i32>* %A) { ; CHECK-LABEL: test_reverse_and_extract: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vrev64.32 q9, q8 ; CHECK-NEXT: vext.32 q8, q8, q9, #2 @@ -324,7 +324,7 @@ entry: define <4 x i32> @test_dup_and_extract(<2 x i32>* %A) { ; CHECK-LABEL: test_dup_and_extract: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vdup.32 q9, d16[0] ; CHECK-NEXT: vext.32 q8, q9, q8, #2 @@ -339,7 +339,7 @@ entry: define <4 x i32> @test_zip_and_extract(<2 x i32>* %A) { ; CHECK-LABEL: test_zip_and_extract: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vorr q9, q8, q8 ; CHECK-NEXT: vorr q10, q8, q8 diff --git a/test/CodeGen/ARM/virtregrewriter-subregliveness.mir b/test/CodeGen/ARM/virtregrewriter-subregliveness.mir index 83335a3ccffd..44bc856c914d 100644 --- a/test/CodeGen/ARM/virtregrewriter-subregliveness.mir +++ b/test/CodeGen/ARM/virtregrewriter-subregliveness.mir @@ -33,7 +33,7 @@ body: | ; CHECK-NEXT: %r1 = KILL %r1, implicit killed %r0_r1 undef %0.gsub_0 = COPY %r0 %0.gsub_1 = COPY %r1 - tBX_RET 14, _, implicit %0 + tBX_RET 14, %noreg, implicit %0 ... @@ -55,7 +55,7 @@ body: | ; CHECK: %r0 = KILL %r0, implicit-def %r0_r1 ; CHECK-NEXT: tBX_RET undef %0.gsub_0 = COPY %r0 - tBX_RET 14, _, implicit %0 + tBX_RET 14, %noreg, implicit %0 ... @@ -78,7 +78,7 @@ body: | ; CHECK: %r0 = KILL %r0, implicit-def %r1, implicit-def %r0_r1 ; CHECK-NEXT: tBX_RET undef %0.gsub_0 = COPY %r0, implicit-def %r1 - tBX_RET 14, _, implicit %0 + tBX_RET 14, %noreg, implicit %0 ... diff --git a/test/CodeGen/ARM/vldm-liveness.mir b/test/CodeGen/ARM/vldm-liveness.mir index a85a018a8b1a..c06342c687dd 100644 --- a/test/CodeGen/ARM/vldm-liveness.mir +++ b/test/CodeGen/ARM/vldm-liveness.mir @@ -1,9 +1,9 @@ # RUN: llc -run-pass arm-ldst-opt -verify-machineinstrs %s -o - | FileCheck %s # ARM load store optimizer was dealing with a sequence like: -# s1 = VLDRS [r0, 1], Q0 -# s3 = VLDRS [r0, 2], Q0, Q0 -# s0 = VLDRS [r0, 0], Q0, Q0 -# s2 = VLDRS [r0, 4], Q0, Q0 +# s1 = VLDRS [r0, 1], implicit-def Q0 +# s3 = VLDRS [r0, 2], implicit killed Q0, implicit-def Q0 +# s0 = VLDRS [r0, 0], implicit killed Q0, implicit-def Q0 +# s2 = VLDRS [r0, 4], implicit killed Q0, implicit-def Q0 # # It decided to combine the {s0, s1} loads into a single instruction in the # third position. However, this leaves the instruction defining s3 with a stray @@ -26,15 +26,15 @@ body: | bb.0 (%ir-block.0): liveins: %r0 - %s1 = VLDRS %r0, 1, 14, _, implicit-def %q0 :: (load 4) - %s3 = VLDRS %r0, 2, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) - ; CHECK: %s3 = VLDRS %r0, 2, 14, _, implicit killed undef %q0, implicit-def %q0 :: (load 4) + %s1 = VLDRS %r0, 1, 14, %noreg, implicit-def %q0 :: (load 4) + %s3 = VLDRS %r0, 2, 14, %noreg, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: %s3 = VLDRS %r0, 2, 14, %noreg, implicit killed undef %q0, implicit-def %q0 :: (load 4) - %s0 = VLDRS %r0, 0, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) - ; CHECK: VLDMSIA %r0, 14, _, def %s0, def %s1, implicit-def _ + %s0 = VLDRS %r0, 0, 14, %noreg, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: VLDMSIA %r0, 14, %noreg, def %s0, def %s1, implicit-def %noreg - %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) - ; CHECK: %s2 = VLDRS killed %r0, 4, 14, _, implicit killed %q0, implicit-def %q0 :: (load 4) + %s2 = VLDRS killed %r0, 4, 14, %noreg, implicit killed %q0, implicit-def %q0 :: (load 4) + ; CHECK: %s2 = VLDRS killed %r0, 4, 14, %noreg, implicit killed %q0, implicit-def %q0 :: (load 4) - tBX_RET 14, _, implicit %q0 + tBX_RET 14, %noreg, implicit %q0 ... diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index 3fa93bb43f03..731bc373aaa6 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -3,7 +3,7 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vpaddi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vpadd.i8 d16, d17, d16 @@ -17,7 +17,7 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vpaddi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vpadd.i16 d16, d17, d16 @@ -31,7 +31,7 @@ define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: vpaddi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vpadd.i32 d16, d17, d16 @@ -45,7 +45,7 @@ define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { ; CHECK-LABEL: vpaddf32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vpadd.f32 d16, d17, d16 @@ -65,7 +65,7 @@ declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwin define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind { ; CHECK-LABEL: vpaddls8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.s8 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -77,7 +77,7 @@ define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind { define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind { ; CHECK-LABEL: vpaddls16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.s16 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -89,7 +89,7 @@ define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind { define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vpaddls32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.s32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -101,7 +101,7 @@ define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind { define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind { ; CHECK-LABEL: vpaddlu8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.u8 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -113,7 +113,7 @@ define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind { define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind { ; CHECK-LABEL: vpaddlu16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.u16 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -125,7 +125,7 @@ define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind { define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind { ; CHECK-LABEL: vpaddlu32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vpaddl.u32 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -137,7 +137,7 @@ define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind { define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind { ; CHECK-LABEL: vpaddlQs8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s8 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -150,7 +150,7 @@ define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind { define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind { ; CHECK-LABEL: vpaddlQs16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s16 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -163,7 +163,7 @@ define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind { define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vpaddlQs32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -176,7 +176,7 @@ define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind { define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind { ; CHECK-LABEL: vpaddlQu8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u8 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -189,7 +189,7 @@ define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind { define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind { ; CHECK-LABEL: vpaddlQu16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u16 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -202,7 +202,7 @@ define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind { define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind { ; CHECK-LABEL: vpaddlQu32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u32 q8, q8 ; CHECK-NEXT: vmov r0, r1, d16 @@ -216,7 +216,7 @@ define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind { ; Combine vuzp+vadd->vpadd. define void @addCombineToVPADD_i8(<16 x i8> *%cbcr, <8 x i8> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADD_i8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpadd.i8 d16, d16, d17 ; CHECK-NEXT: vstr d16, [r1] @@ -233,7 +233,7 @@ define void @addCombineToVPADD_i8(<16 x i8> *%cbcr, <8 x i8> *%X) nounwind ssp { ; Combine vuzp+vadd->vpadd. define void @addCombineToVPADD_i16(<8 x i16> *%cbcr, <4 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADD_i16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpadd.i16 d16, d16, d17 ; CHECK-NEXT: vstr d16, [r1] @@ -249,7 +249,7 @@ define void @addCombineToVPADD_i16(<8 x i16> *%cbcr, <4 x i16> *%X) nounwind ssp ; Combine vtrn+vadd->vpadd. define void @addCombineToVPADD_i32(<4 x i32> *%cbcr, <2 x i32> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADD_i32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpadd.i32 d16, d16, d17 ; CHECK-NEXT: vstr d16, [r1] @@ -265,7 +265,7 @@ define void @addCombineToVPADD_i32(<4 x i32> *%cbcr, <2 x i32> *%X) nounwind ssp ; Combine vuzp+vaddl->vpaddl define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_s8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s8 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -284,7 +284,7 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss ; FIXME: Legalization butchers the shuffles. define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDL_s8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov.i16 d16, #0x8 ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vext.8 d17, d18, d16, #1 @@ -309,7 +309,7 @@ define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp ; Combine vuzp+vaddl->vpaddl define void @addCombineToVPADDLq_u8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_u8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u8 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -328,7 +328,7 @@ define void @addCombineToVPADDLq_u8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss ; shuffle is awkward, so this doesn't match at the moment. define void @addCombineToVPADDLq_u8_early_zext(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_u8_early_zext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmovl.u8 q9, d17 ; CHECK-NEXT: vmovl.u8 q8, d16 @@ -349,7 +349,7 @@ define void @addCombineToVPADDLq_u8_early_zext(<16 x i8> *%cbcr, <8 x i16> *%X) ; FIXME: Legalization butchers the shuffle. define void @addCombineToVPADDL_u8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDL_u8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vext.8 d18, d16, d16, #1 ; CHECK-NEXT: vbic.i16 d16, #0xff00 @@ -370,7 +370,7 @@ define void @addCombineToVPADDL_u8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp ; Matching to vpaddl.8 requires matching shuffle(zext()). define void @addCombineToVPADDL_u8_early_zext(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDL_u8_early_zext: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vmovl.u8 q8, d16 ; CHECK-NEXT: vpadd.i16 d16, d16, d17 @@ -388,7 +388,7 @@ define void @addCombineToVPADDL_u8_early_zext(<16 x i8> *%cbcr, <4 x i16> *%X) n ; Combine vuzp+vaddl->vpaddl define void @addCombineToVPADDLq_s16(<8 x i16> *%cbcr, <4 x i32> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_s16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s16 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -406,7 +406,7 @@ define void @addCombineToVPADDLq_s16(<8 x i16> *%cbcr, <4 x i32> *%X) nounwind s ; Combine vuzp+vaddl->vpaddl define void @addCombineToVPADDLq_u16(<8 x i16> *%cbcr, <4 x i32> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_u16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u16 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -424,7 +424,7 @@ define void @addCombineToVPADDLq_u16(<8 x i16> *%cbcr, <4 x i32> *%X) nounwind s ; Combine vtrn+vaddl->vpaddl define void @addCombineToVPADDLq_s32(<4 x i32> *%cbcr, <2 x i64> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_s32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.s32 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -442,7 +442,7 @@ define void @addCombineToVPADDLq_s32(<4 x i32> *%cbcr, <2 x i64> *%X) nounwind s ; Combine vtrn+vaddl->vpaddl define void @addCombineToVPADDLq_u32(<4 x i32> *%cbcr, <2 x i64> *%X) nounwind ssp { ; CHECK-LABEL: addCombineToVPADDLq_u32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vpaddl.u32 q8, q8 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1] @@ -460,7 +460,7 @@ define void @addCombineToVPADDLq_u32(<4 x i32> *%cbcr, <2 x i64> *%X) nounwind s ; Legalization promotes the <4 x i8> to <4 x i16>. define <4 x i8> @fromExtendingExtractVectorElt_i8(<8 x i8> %in) { ; CHECK-LABEL: fromExtendingExtractVectorElt_i8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vpaddl.s8 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 @@ -474,7 +474,7 @@ define <4 x i8> @fromExtendingExtractVectorElt_i8(<8 x i8> %in) { ; Legalization promotes the <2 x i16> to <2 x i32>. define <2 x i16> @fromExtendingExtractVectorElt_i16(<4 x i16> %in) { ; CHECK-LABEL: fromExtendingExtractVectorElt_i16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vpaddl.s16 d16, d16 ; CHECK-NEXT: vmov r0, r1, d16 diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll index df6336043fdf..12cb504eda79 100644 --- a/test/CodeGen/ARM/vtrn.ll +++ b/test/CodeGen/ARM/vtrn.ll @@ -2,7 +2,7 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vtrni8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtrn.8 d17, d16 @@ -19,7 +19,7 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vtrni8_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] ; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] @@ -34,7 +34,7 @@ define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vtrni16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtrn.16 d17, d16 @@ -51,7 +51,7 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vtrni16_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] ; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]] @@ -66,7 +66,7 @@ define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: vtrni32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtrn.32 d17, d16 @@ -83,7 +83,7 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: vtrni32_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] ; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] @@ -98,7 +98,7 @@ define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { ; CHECK-LABEL: vtrnf: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtrn.32 d17, d16 @@ -115,7 +115,7 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { ; CHECK-LABEL: vtrnf_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] ; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]] @@ -130,7 +130,7 @@ define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind { define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vtrnQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtrn.8 q9, q8 @@ -148,7 +148,7 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vtrnQi8_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vtrn.8 q9, q8 @@ -163,7 +163,7 @@ define <32 x i8> @vtrnQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vtrnQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtrn.16 q9, q8 @@ -181,7 +181,7 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vtrnQi16_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vtrn.16 q9, q8 @@ -196,7 +196,7 @@ define <16 x i16> @vtrnQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vtrnQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtrn.32 q9, q8 @@ -214,7 +214,7 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vtrnQi32_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vtrn.32 q9, q8 @@ -229,7 +229,7 @@ define <8 x i32> @vtrnQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vtrnQf: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtrn.32 q9, q8 @@ -247,7 +247,7 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vtrnQf_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vtrn.32 q9, q8 @@ -263,7 +263,7 @@ define <8 x float> @vtrnQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vtrni8_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vtrn.8 d17, d16 @@ -280,7 +280,7 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vtrni8_undef_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1] ; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0] ; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]] @@ -295,7 +295,7 @@ define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vtrnQi16_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vtrn.16 q9, q8 @@ -313,7 +313,7 @@ define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vtrnQi16_undef_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vtrn.16 q9, q8 @@ -375,7 +375,7 @@ define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { entry: ; CHECK-LABEL: lower_twice_no_vtrn: - ; CHECK: @ BB#0: + ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d18, [r0] ; CHECK-NEXT: vtrn.16 d18, d16 @@ -394,7 +394,7 @@ entry: define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) { entry: ; CHECK-LABEL: upper_twice_no_vtrn: - ; CHECK: @ BB#0: + ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d18, [r0] ; CHECK-NEXT: vtrn.16 d18, d16 diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll index 24090cfd6c65..0ac366be3fea 100644 --- a/test/CodeGen/ARM/vuzp.ll +++ b/test/CodeGen/ARM/vuzp.ll @@ -3,7 +3,7 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vuzp.8 d17, d16 @@ -20,7 +20,7 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpi8_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vuzp.8 d16, d17 @@ -35,7 +35,7 @@ define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vuzp.16 d17, d16 @@ -52,7 +52,7 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpi16_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vuzp.16 d16, d17 @@ -69,7 +69,7 @@ define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vuzp.8 q9, q8 @@ -87,7 +87,7 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpQi8_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vuzp.8 q9, q8 @@ -102,7 +102,7 @@ define <32 x i8> @vuzpQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vuzp.16 q9, q8 @@ -120,7 +120,7 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpQi16_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vuzp.16 q9, q8 @@ -135,7 +135,7 @@ define <16 x i16> @vuzpQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vuzpQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vuzp.32 q9, q8 @@ -153,7 +153,7 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vuzpQi32_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vuzp.32 q9, q8 @@ -168,7 +168,7 @@ define <8 x i32> @vuzpQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vuzpQf: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vuzp.32 q9, q8 @@ -186,7 +186,7 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vuzpQf_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vuzp.32 q9, q8 @@ -203,7 +203,7 @@ define <8 x float> @vuzpQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpi8_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vuzp.8 d17, d16 @@ -220,7 +220,7 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vuzpi8_undef_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vuzp.8 d16, d17 @@ -235,7 +235,7 @@ define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vuzp.16 q9, q8 @@ -253,7 +253,7 @@ define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vuzpQi16_undef_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vuzp.16 q9, q8 @@ -268,7 +268,7 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { ; CHECK-LABEL: vuzp_lower_shufflemask_undef: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vorr q9, q8, q8 @@ -285,7 +285,7 @@ entry: define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) { ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vdup.32 q9, d16[0] @@ -303,7 +303,7 @@ entry: define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) { ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vrev64.32 q9, q8 @@ -323,7 +323,7 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 ; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to ; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8. ; CHECK-LABEL: cmpsel_trunc: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: add r12, sp, #16 ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: mov r12, sp @@ -352,7 +352,7 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 ; to perform the vuzp and get the vbsl mask. define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1, ; CHECK-LABEL: vuzp_trunc_and_shuffle: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} ; CHECK-NEXT: add r12, sp, #8 @@ -388,7 +388,7 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1, ; This produces a build_vector with some of the operands undefs. define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1, ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: add r12, sp, #16 @@ -416,7 +416,7 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1 define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1, ; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: mov r12, sp ; CHECK-NEXT: vld1.64 {d16, d17}, [r12] ; CHECK-NEXT: add r12, sp, #16 @@ -435,7 +435,7 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1, ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ BB#1: +; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI22_0: ; CHECK-NEXT: .byte 255 @ 0xff ; CHECK-NEXT: .byte 255 @ 0xff @@ -458,7 +458,7 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1, ; get some vector size that we can represent. define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1, ; CHECK-LABEL: vuzp_wide_type: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r10, r11, lr} ; CHECK-NEXT: push {r4, r10, r11, lr} ; CHECK-NEXT: .setfp r11, sp, #8 @@ -517,7 +517,7 @@ define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1, ; CHECK-NEXT: pop {r4, r10, r11, lr} ; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ BB#1: +; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI23_0: ; CHECK-NEXT: .byte 0 @ 0x0 ; CHECK-NEXT: .byte 1 @ 0x1 @@ -539,7 +539,7 @@ define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1, %struct.uint8x8x2_t = type { [2 x <8 x i8>] } define %struct.uint8x8x2_t @vuzp_extract_subvector(<16 x i8> %t) #0 { ; CHECK-LABEL: vuzp_extract_subvector: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: vorr d18, d17, d17 diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index 06b49ab94053..5047b3e087ad 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -3,7 +3,7 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vzip.8 d17, d16 @@ -20,7 +20,7 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vzip.8 d16, d17 @@ -35,7 +35,7 @@ define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vzipi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vzip.16 d17, d16 @@ -52,7 +52,7 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: vzipi16_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vzip.16 d16, d17 @@ -69,7 +69,7 @@ define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vzip.8 q9, q8 @@ -87,7 +87,7 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vzip.8 q9, q8 @@ -102,7 +102,7 @@ define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vzipQi16: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vzip.16 q9, q8 @@ -120,7 +120,7 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: vzipQi16_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vzip.16 q9, q8 @@ -135,7 +135,7 @@ define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vzipQi32: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vzip.32 q9, q8 @@ -153,7 +153,7 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { ; CHECK-LABEL: vzipQi32_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vzip.32 q9, q8 @@ -168,7 +168,7 @@ define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vzipQf: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vzip.32 q9, q8 @@ -186,7 +186,7 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { ; CHECK-LABEL: vzipQf_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vzip.32 q9, q8 @@ -203,7 +203,7 @@ define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vzip.8 d17, d16 @@ -220,7 +220,7 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { ; CHECK-LABEL: vzipi8_undef_Qres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vzip.8 d16, d17 @@ -235,7 +235,7 @@ define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8_undef: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r1] ; CHECK-NEXT: vld1.64 {d18, d19}, [r0] ; CHECK-NEXT: vzip.8 q9, q8 @@ -253,7 +253,7 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; CHECK-LABEL: vzipQi8_undef_QQres: -; CHECK: @ BB#0: +; CHECK: @ %bb.0: ; CHECK-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-NEXT: vld1.64 {d18, d19}, [r1] ; CHECK-NEXT: vzip.8 q9, q8 @@ -268,7 +268,7 @@ define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { ; CHECK-LABEL: vzip_lower_shufflemask_undef: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d17, [r1] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vzip.16 d16, d17 @@ -287,7 +287,7 @@ entry: ; as a vtrn. define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) { ; CHECK-LABEL: vzip_lower_shufflemask_undef_rev: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r1] ; CHECK-NEXT: vldr d19, [r0] ; CHECK-NEXT: vtrn.16 d19, d16 @@ -303,7 +303,7 @@ entry: define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) { ; CHECK-LABEL: vzip_lower_shufflemask_zeroed: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vdup.32 q9, d16[0] ; CHECK-NEXT: vzip.32 q8, q9 @@ -318,7 +318,7 @@ entry: define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) { ; CHECK-LABEL: vzip_lower_shufflemask_vuzp: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vdup.32 q9, d16[0] ; CHECK-NEXT: vzip.32 q8, q9 @@ -333,7 +333,7 @@ entry: define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) { ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vorr q9, q8, q8 ; CHECK-NEXT: vzip.32 q8, q9 @@ -349,7 +349,7 @@ entry: define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) { ; CHECK-LABEL: vzip_vext_factor: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] ; CHECK-NEXT: vext.16 d18, d16, d17, #1 ; CHECK-NEXT: vext.16 d16, d18, d17, #2 @@ -365,7 +365,7 @@ entry: define <8 x i8> @vdup_zip(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; CHECK-LABEL: vdup_zip: -; CHECK: @ BB#0: @ %entry +; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vld1.8 {d16[]}, [r1] ; CHECK-NEXT: vld1.8 {d17[]}, [r0] ; CHECK-NEXT: vzip.8 d17, d16 diff --git a/test/CodeGen/AVR/atomics/fence.ll b/test/CodeGen/AVR/atomics/fence.ll index 6ea49bc7e3fc..b4cd215f3a26 100644 --- a/test/CodeGen/AVR/atomics/fence.ll +++ b/test/CodeGen/AVR/atomics/fence.ll @@ -4,7 +4,7 @@ ; AVR is always singlethreaded so fences do nothing. ; CHECK_LABEL: atomic_fence8 -; CHECK: ; BB#0: +; CHECK: ; %bb.0: ; CHECK-NEXT: ret define void @atomic_fence8() { fence acquire diff --git a/test/CodeGen/AVR/return.ll b/test/CodeGen/AVR/return.ll index 1f80576af288..6d81faa6e8d4 100644 --- a/test/CodeGen/AVR/return.ll +++ b/test/CodeGen/AVR/return.ll @@ -5,7 +5,7 @@ define void @return_void() { ; CHECK: return_void:{{[a-zA-Z0-9 #@]*}} -; CHECK-NEXT: #{{[a-zA-Z0-9 #@]*}} +; CHECK-NEXT: {{.*}}: ; CHECK-NEXT: ret ret void } @@ -18,7 +18,7 @@ define i8 @return8_imm() { define i8 @return8_arg(i8 %x) { ; CHECK: return8_arg:{{[a-zA-Z0-9 #@]*}} -; CHECK-NEXT: #{{[a-zA-Z0-9 #@]*}} +; CHECK-NEXT: {{.*}}: ; CHECK-NEXT: ret ret i8 %x } @@ -38,7 +38,7 @@ define i16 @return16_imm() { define i16 @return16_arg(i16 %x) { ; CHECK: return16_arg:{{[a-zA-Z0-9 #@]*}} -; CHECK-NEXT: #{{[a-zA-Z0-9 #@]*}} +; CHECK-NEXT: {{.*}}: ; CHECK-NEXT: ret ret i16 %x } @@ -60,7 +60,7 @@ define i32 @return32_imm() { define i32 @return32_arg(i32 %x) { ; CHECK: return32_arg:{{[a-zA-Z0-9 #@]*}} -; CHECK-NEXT: #{{[a-zA-Z0-9 #@]*}} +; CHECK-NEXT: {{.*}}: ; CHECK-NEXT: ret ret i32 %x } @@ -87,7 +87,7 @@ define i64 @return64_imm() { define i64 @return64_arg(i64 %x) { ; CHECK: return64_arg:{{[a-zA-Z0-9 #@]*}} -; CHECK-NEXT: #{{[a-zA-Z0-9 #@]*}} +; CHECK-NEXT: {{.*}}: ; CHECK-NEXT: ret ret i64 %x } diff --git a/test/CodeGen/AVR/select-must-add-unconditional-jump.ll b/test/CodeGen/AVR/select-must-add-unconditional-jump.ll index e6344dfc6927..22caecf05c01 100644 --- a/test/CodeGen/AVR/select-must-add-unconditional-jump.ll +++ b/test/CodeGen/AVR/select-must-add-unconditional-jump.ll @@ -9,18 +9,18 @@ ; ; This issue manifests in a CFG that looks something like this: ; -; BB#2: derived from LLVM BB %finish -; Predecessors according to CFG: BB#0 BB#1 -; %vreg0 = PHI %vreg3, , %vreg5, -; %vreg7 = LDIRdK 2 -; %vreg8 = LDIRdK 1 -; CPRdRr %vreg2, %vreg0, %SREG -; BREQk , %SREG -; Successors according to CFG: BB#5(?%) BB#6(?%) +; %bb.2: derived from LLVM BB %finish +; Predecessors according to CFG: %bb.0 %bb.1 +; %0 = PHI %3, <%bb.0>, %5, <%bb.1> +; %7 = LDIRdK 2 +; %8 = LDIRdK 1 +; CPRdRr %2, %0, implicit-def %SREG +; BREQk <%bb.6>, implicit %SREG +; Successors according to CFG: %bb.5(?%) %bb.6(?%) ; -; The code assumes it the fallthrough block after this is BB#5, but -; it's actually BB#3! To be proper, there should be an unconditional -; jump tying this block to BB#5. +; The code assumes it the fallthrough block after this is %bb.5, but +; it's actually %bb.3! To be proper, there should be an unconditional +; jump tying this block to %bb.5. define i8 @select_must_add_unconditional_jump(i8 %arg0, i8 %arg1) unnamed_addr { entry-block: @@ -49,10 +49,10 @@ dead: ; basic block containing `select` needs to contain explicit jumps to ; both successors. -; CHECK: BB#2: derived from LLVM BB %finish -; CHECK: BREQk <[[BRANCHED:BB#[0-9]+]]> -; CHECK: RJMPk <[[DIRECT:BB#[0-9]+]]> +; CHECK: %bb.2: derived from LLVM BB %finish +; CHECK: BREQk [[BRANCHED:%bb.[0-9]+]] +; CHECK: RJMPk [[DIRECT:%bb.[0-9]+]] ; CHECK: Successors according to CFG ; CHECK-SAME-DAG: {{.*}}[[BRANCHED]] ; CHECK-SAME-DAG: {{.*}}[[DIRECT]] -; CHECK: BB#3: derived from LLVM BB +; CHECK: %bb.3: derived from LLVM BB diff --git a/test/CodeGen/AVR/std-ldd-immediate-overflow.ll b/test/CodeGen/AVR/std-ldd-immediate-overflow.ll index 290e349c5342..5580e3ae9731 100644 --- a/test/CodeGen/AVR/std-ldd-immediate-overflow.ll +++ b/test/CodeGen/AVR/std-ldd-immediate-overflow.ll @@ -8,11 +8,11 @@ define i32 @std_ldd_overflow() { store i32 0, i32 *%1 %2 = bitcast [4 x i8]* %dst to i8* %3 = bitcast [4 x i8]* %src to i8* - call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i1 false) ; CHECK-NOT: std {{[XYZ]}}+64, {{r[0-9]+}} ; CHECK-NOT: ldd {{r[0-9]+}}, {{[XYZ]}}+64 ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i1) diff --git a/test/CodeGen/AVR/unaligned-atomic-loads.ll b/test/CodeGen/AVR/unaligned-atomic-loads.ll new file mode 100644 index 000000000000..db1ab33fa883 --- /dev/null +++ b/test/CodeGen/AVR/unaligned-atomic-loads.ll @@ -0,0 +1,19 @@ +; RUN: llc -mattr=addsubiw < %s -march=avr | FileCheck %s + +; This verifies that the middle end can handle an unaligned atomic load. +; +; In the past, an assertion inside the SelectionDAGBuilder would always +; hit an assertion for unaligned loads and stores. + +%AtomicI16 = type { %CellI16, [0 x i8] } +%CellI16 = type { i16, [0 x i8] } + +; CHECK-LABEL: foo +; CHECK: ret +define void @foo(%AtomicI16* %self) { +start: + %a = getelementptr inbounds %AtomicI16, %AtomicI16* %self, i16 0, i32 0, i32 0 + load atomic i16, i16* %a seq_cst, align 1 + ret void +} + diff --git a/test/CodeGen/BPF/byval.ll b/test/CodeGen/BPF/byval.ll index 25ba909d9cd7..2d2e8d289d6f 100644 --- a/test/CodeGen/BPF/byval.ll +++ b/test/CodeGen/BPF/byval.ll @@ -16,7 +16,7 @@ entry: store i32 3, i32* %arrayinit.element2, align 8 %arrayinit.start = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 3 %scevgep4 = bitcast i32* %arrayinit.start to i8* - call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep4, i8 0, i64 28, i1 false) call void @foo(i32 %a, %struct.S* byval align 8 %.compoundliteral) #3 ret void } @@ -24,4 +24,4 @@ entry: declare void @foo(i32, %struct.S* byval align 8) #1 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 diff --git a/test/CodeGen/BPF/ex1.ll b/test/CodeGen/BPF/ex1.ll index 97cc7e07ab9b..de9599b54d20 100644 --- a/test/CodeGen/BPF/ex1.ll +++ b/test/CodeGen/BPF/ex1.ll @@ -12,7 +12,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne %devname = alloca [3 x i8], align 1 %fmt = alloca [15 x i8], align 1 %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i1 false) %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 %3 = load i64, i64* %2, align 8 %4 = inttoptr i64 %3 to %struct.sk_buff* @@ -25,7 +25,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne ;